mirror of
https://github.com/mandiant/capa.git
synced 2026-07-04 19:57:12 -07:00
Merge pull request #986 from mandiant/feature-981
add Address abstraction
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
- add unmanaged call characteristic for dotnet files #1023 @mike-hunhoff
|
||||
- add mixed mode characteristic feature extraction for dotnet files #1024 @mike-hunhoff
|
||||
- emit class and namespace features for dotnet files #1030 @mike-hunhoff
|
||||
- render: support Addresses that aren't simple integers, like .NET token+offset #981 @williballenthin
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
@@ -21,6 +22,8 @@
|
||||
- Python 3.7 is now the minimum supported Python version #866 @williballenthin
|
||||
- remove /x32 and /x64 flavors of number and operand features #932 @williballenthin
|
||||
- the tool now accepts multiple paths to rules, and JSON doc updated accordingly @williballenthin
|
||||
- extractors must use handles to identify functions/basic blocks/instructions #981 @williballenthin
|
||||
- the freeze file format schema was updated, including format version bump to v2 #986 @williballenthin
|
||||
|
||||
### New Rules (7)
|
||||
|
||||
|
||||
+7
-6
@@ -13,6 +13,7 @@ from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Mapping, Iterable
|
||||
import capa.perf
|
||||
import capa.features.common
|
||||
from capa.features.common import Result, Feature
|
||||
from capa.features.address import Address
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# circular import, otherwise
|
||||
@@ -26,7 +27,7 @@ if TYPE_CHECKING:
|
||||
# to collect the locations of a feature, do: `features[Number(0x10)]`
|
||||
#
|
||||
# aliased here so that the type can be documented and xref'd.
|
||||
FeatureSet = Dict[Feature, Set[int]]
|
||||
FeatureSet = Dict[Feature, Set[Address]]
|
||||
|
||||
|
||||
class Statement:
|
||||
@@ -257,10 +258,10 @@ class Subscope(Statement):
|
||||
# inspect(match_details)
|
||||
#
|
||||
# aliased here so that the type can be documented and xref'd.
|
||||
MatchResults = Mapping[str, List[Tuple[int, Result]]]
|
||||
MatchResults = Mapping[str, List[Tuple[Address, Result]]]
|
||||
|
||||
|
||||
def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: Iterable[int]):
|
||||
def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: Iterable[Address]):
|
||||
"""
|
||||
record into the given featureset that the given rule matched at the given locations.
|
||||
|
||||
@@ -277,7 +278,7 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
|
||||
namespace, _, _ = namespace.rpartition("/")
|
||||
|
||||
|
||||
def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tuple[FeatureSet, MatchResults]:
|
||||
def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
match the given rules against the given features,
|
||||
returning an updated set of features and the matches.
|
||||
@@ -315,10 +316,10 @@ def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tupl
|
||||
# sanity check
|
||||
assert bool(res) is True
|
||||
|
||||
results[rule.name].append((va, res))
|
||||
results[rule.name].append((addr, res))
|
||||
# we need to update the current `features`
|
||||
# because subsequent iterations of this loop may use newly added features,
|
||||
# such as rule or namespace matches.
|
||||
index_rule_matches(features, rule, [va])
|
||||
index_rule_matches(features, rule, [addr])
|
||||
|
||||
return (features, results)
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
import abc
|
||||
|
||||
from dncil.clr.token import Token
|
||||
|
||||
|
||||
class Address(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def __eq__(self, other):
|
||||
...
|
||||
|
||||
@abc.abstractmethod
|
||||
def __lt__(self, other):
|
||||
# implement < so that addresses can be sorted from low to high
|
||||
...
|
||||
|
||||
@abc.abstractmethod
|
||||
def __hash__(self):
|
||||
# implement hash so that addresses can be used in sets and dicts
|
||||
...
|
||||
|
||||
@abc.abstractmethod
|
||||
def __repr__(self):
|
||||
# implement repr to help during debugging
|
||||
...
|
||||
|
||||
|
||||
class AbsoluteVirtualAddress(int, Address):
|
||||
"""an absolute memory address"""
|
||||
|
||||
def __new__(cls, v):
|
||||
assert v >= 0
|
||||
return int.__new__(cls, v)
|
||||
|
||||
def __repr__(self):
|
||||
return f"absolute(0x{self:x})"
|
||||
|
||||
|
||||
class RelativeVirtualAddress(int, Address):
|
||||
"""a memory address relative to a base address"""
|
||||
|
||||
def __repr__(self):
|
||||
return f"relative(0x{self:x})"
|
||||
|
||||
|
||||
class FileOffsetAddress(int, Address):
|
||||
"""an address relative to the start of a file"""
|
||||
|
||||
def __new__(cls, v):
|
||||
assert v >= 0
|
||||
return int.__new__(cls, v)
|
||||
|
||||
def __repr__(self):
|
||||
return f"file(0x{self:x})"
|
||||
|
||||
|
||||
class DNTokenAddress(Address):
|
||||
"""a .NET token"""
|
||||
|
||||
def __init__(self, token: Token):
|
||||
self.token = token
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.token.value == other.token.value
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.token.value < other.token.value
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.token.value)
|
||||
|
||||
def __repr__(self):
|
||||
return f"token(0x{self.token.value:x})"
|
||||
|
||||
|
||||
class DNTokenOffsetAddress(Address):
|
||||
"""an offset into an object specified by a .NET token"""
|
||||
|
||||
def __init__(self, token: Token, offset: int):
|
||||
assert offset >= 0
|
||||
self.token = token
|
||||
self.offset = offset
|
||||
|
||||
def __eq__(self, other):
|
||||
return (self.token.value, self.offset) == (other.token.value, other.offset)
|
||||
|
||||
def __lt__(self, other):
|
||||
return (self.token.value, self.offset) < (other.token.value, other.offset)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.token.value, self.offset))
|
||||
|
||||
def __repr__(self):
|
||||
return f"token(0x{self.token.value:x})+(0x{self.offset:x})"
|
||||
|
||||
|
||||
class _NoAddress(Address):
|
||||
def __eq__(self, other):
|
||||
return True
|
||||
|
||||
def __lt__(self, other):
|
||||
return False
|
||||
|
||||
def __hash__(self):
|
||||
return hash(0)
|
||||
|
||||
def __repr__(self):
|
||||
return "no address"
|
||||
|
||||
|
||||
NO_ADDRESS = _NoAddress()
|
||||
@@ -10,18 +10,11 @@ from capa.features.common import Feature
|
||||
|
||||
|
||||
class BasicBlock(Feature):
|
||||
def __init__(self):
|
||||
super(BasicBlock, self).__init__(None)
|
||||
def __init__(self, description=None):
|
||||
super(BasicBlock, self).__init__(None, description=description)
|
||||
|
||||
def __str__(self):
|
||||
return "basic block"
|
||||
|
||||
def get_value_str(self):
|
||||
return ""
|
||||
|
||||
def freeze_serialize(self):
|
||||
return (self.__class__.__name__, [])
|
||||
|
||||
@classmethod
|
||||
def freeze_deserialize(cls, args):
|
||||
return cls()
|
||||
|
||||
+24
-45
@@ -11,7 +11,7 @@ import abc
|
||||
import codecs
|
||||
import logging
|
||||
import collections
|
||||
from typing import TYPE_CHECKING, Set, Dict, List, Union
|
||||
from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional, Sequence
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# circular import, otherwise
|
||||
@@ -20,6 +20,7 @@ if TYPE_CHECKING:
|
||||
import capa.perf
|
||||
import capa.features
|
||||
import capa.features.extractors.elf
|
||||
from capa.features.address import Address
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
MAX_BYTES_FEATURE_SIZE = 0x100
|
||||
@@ -70,20 +71,13 @@ class Result:
|
||||
success: bool,
|
||||
statement: Union["capa.engine.Statement", "Feature"],
|
||||
children: List["Result"],
|
||||
locations=None,
|
||||
locations: Optional[Set[Address]] = None,
|
||||
):
|
||||
"""
|
||||
args:
|
||||
success (bool)
|
||||
statement (capa.engine.Statement or capa.features.Feature)
|
||||
children (list[Result])
|
||||
locations (iterable[VA])
|
||||
"""
|
||||
super(Result, self).__init__()
|
||||
self.success = success
|
||||
self.statement = statement
|
||||
self.children = children
|
||||
self.locations = locations if locations is not None else ()
|
||||
self.locations = locations if locations is not None else set()
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, bool):
|
||||
@@ -98,7 +92,7 @@ class Result:
|
||||
|
||||
|
||||
class Feature(abc.ABC):
|
||||
def __init__(self, value: Union[str, int, bytes], description=None):
|
||||
def __init__(self, value: Union[str, int, float, bytes], description=None):
|
||||
"""
|
||||
Args:
|
||||
value (any): the value of the feature, such as the number or string.
|
||||
@@ -116,6 +110,15 @@ class Feature(abc.ABC):
|
||||
def __eq__(self, other):
|
||||
return self.name == other.name and self.value == other.value
|
||||
|
||||
def __lt__(self, other):
|
||||
# TODO: this is a huge hack!
|
||||
import capa.features.freeze.features
|
||||
|
||||
return (
|
||||
capa.features.freeze.features.feature_from_capa(self).json()
|
||||
< capa.features.freeze.features.feature_from_capa(other).json()
|
||||
)
|
||||
|
||||
def get_value_str(self) -> str:
|
||||
"""
|
||||
render the value of this feature, for use by `__str__` and friends.
|
||||
@@ -137,27 +140,10 @@ class Feature(abc.ABC):
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
def evaluate(self, ctx: Dict["Feature", Set[int]], **kwargs) -> Result:
|
||||
def evaluate(self, ctx: Dict["Feature", Set[Address]], **kwargs) -> Result:
|
||||
capa.perf.counters["evaluate.feature"] += 1
|
||||
capa.perf.counters["evaluate.feature." + self.name] += 1
|
||||
return Result(self in ctx, self, [], locations=ctx.get(self, []))
|
||||
|
||||
def freeze_serialize(self):
|
||||
return (self.__class__.__name__, [self.value])
|
||||
|
||||
@classmethod
|
||||
def freeze_deserialize(cls, args):
|
||||
# as you can see below in code,
|
||||
# if the last argument is a dictionary,
|
||||
# consider it to be kwargs passed to the feature constructor.
|
||||
if len(args) == 1:
|
||||
return cls(*args)
|
||||
elif isinstance(args[-1], dict):
|
||||
kwargs = args[-1]
|
||||
args = args[:-1]
|
||||
return cls(*args, **kwargs)
|
||||
else:
|
||||
return cls(*args)
|
||||
return Result(self in ctx, self, [], locations=ctx.get(self, set()))
|
||||
|
||||
|
||||
class MatchedRule(Feature):
|
||||
@@ -230,7 +216,7 @@ class Substring(String):
|
||||
# instead, return a new instance that has a reference to both the substring and the matched values.
|
||||
return Result(True, _MatchedSubstring(self, matches), [], locations=locations)
|
||||
else:
|
||||
return Result(False, _MatchedSubstring(self, None), [])
|
||||
return Result(False, _MatchedSubstring(self, {}), [])
|
||||
|
||||
def __str__(self):
|
||||
return "substring(%s)" % self.value
|
||||
@@ -244,11 +230,11 @@ class _MatchedSubstring(Substring):
|
||||
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
||||
"""
|
||||
|
||||
def __init__(self, substring: Substring, matches):
|
||||
def __init__(self, substring: Substring, matches: Dict[str, Set[Address]]):
|
||||
"""
|
||||
args:
|
||||
substring (Substring): the substring feature that matches.
|
||||
match (Dict[string, List[int]]|None): mapping from matching string to its locations.
|
||||
substring: the substring feature that matches.
|
||||
match: mapping from matching string to its locations.
|
||||
"""
|
||||
super(_MatchedSubstring, self).__init__(str(substring.value), description=substring.description)
|
||||
# we want this to collide with the name of `Substring` above,
|
||||
@@ -327,7 +313,7 @@ class Regex(String):
|
||||
# see #262.
|
||||
return Result(True, _MatchedRegex(self, matches), [], locations=locations)
|
||||
else:
|
||||
return Result(False, _MatchedRegex(self, None), [])
|
||||
return Result(False, _MatchedRegex(self, {}), [])
|
||||
|
||||
def __str__(self):
|
||||
return "regex(string =~ %s)" % self.value
|
||||
@@ -341,11 +327,11 @@ class _MatchedRegex(Regex):
|
||||
note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
|
||||
"""
|
||||
|
||||
def __init__(self, regex: Regex, matches):
|
||||
def __init__(self, regex: Regex, matches: Dict[str, Set[Address]]):
|
||||
"""
|
||||
args:
|
||||
regex (Regex): the regex feature that matches.
|
||||
match (Dict[string, List[int]]|None): mapping from matching string to its locations.
|
||||
regex: the regex feature that matches.
|
||||
matches: mapping from matching string to its locations.
|
||||
"""
|
||||
super(_MatchedRegex, self).__init__(str(regex.value), description=regex.description)
|
||||
# we want this to collide with the name of `Regex` above,
|
||||
@@ -389,13 +375,6 @@ class Bytes(Feature):
|
||||
def get_value_str(self):
|
||||
return hex_string(bytes_to_str(self.value))
|
||||
|
||||
def freeze_serialize(self):
|
||||
return (self.__class__.__name__, [bytes_to_str(self.value).upper()])
|
||||
|
||||
@classmethod
|
||||
def freeze_deserialize(cls, args):
|
||||
return cls(*[codecs.decode(x, "hex") for x in args])
|
||||
|
||||
|
||||
# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
|
||||
ARCH_I386 = "i386"
|
||||
|
||||
@@ -7,23 +7,60 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import abc
|
||||
from typing import Tuple, Iterator, SupportsInt
|
||||
import dataclasses
|
||||
from typing import Any, Dict, Tuple, Union, Iterator
|
||||
from dataclasses import dataclass
|
||||
|
||||
import capa.features.address
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
|
||||
# feature extractors may reference functions, BBs, insns by opaque handle values.
|
||||
# the only requirement of these handles are that they support `__int__`,
|
||||
# so that they can be rendered as addresses.
|
||||
# you can use the `.address` property to get and render the address of the feature.
|
||||
#
|
||||
# these handles are only consumed by routines on
|
||||
# the feature extractor from which they were created.
|
||||
#
|
||||
# int(FunctionHandle) -> function start address
|
||||
# int(BBHandle) -> BasicBlock start address
|
||||
# int(InsnHandle) -> instruction address
|
||||
FunctionHandle = SupportsInt
|
||||
BBHandle = SupportsInt
|
||||
InsnHandle = SupportsInt
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionHandle:
|
||||
"""reference to a function recognized by a feature extractor.
|
||||
|
||||
Attributes:
|
||||
address: the address of the function.
|
||||
inner: extractor-specific data.
|
||||
ctx: a context object for the extractor.
|
||||
"""
|
||||
|
||||
address: Address
|
||||
inner: Any
|
||||
ctx: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BBHandle:
|
||||
"""reference to a basic block recognized by a feature extractor.
|
||||
|
||||
Attributes:
|
||||
address: the address of the basic block start address.
|
||||
inner: extractor-specific data.
|
||||
"""
|
||||
|
||||
address: Address
|
||||
inner: Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class InsnHandle:
|
||||
"""reference to a instruction recognized by a feature extractor.
|
||||
|
||||
Attributes:
|
||||
address: the address of the instruction address.
|
||||
inner: extractor-specific data.
|
||||
"""
|
||||
|
||||
address: Address
|
||||
inner: Any
|
||||
|
||||
|
||||
class FeatureExtractor:
|
||||
@@ -53,14 +90,18 @@ class FeatureExtractor:
|
||||
super(FeatureExtractor, self).__init__()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_base_address(self) -> int:
|
||||
def get_base_address(self) -> Union[AbsoluteVirtualAddress, capa.features.address._NoAddress]:
|
||||
"""
|
||||
fetch the preferred load address at which the sample was analyzed.
|
||||
|
||||
when the base address is `NO_ADDRESS`, then the loader has no concept of a preferred load address.
|
||||
such as: shellcode, .NET modules, etc.
|
||||
in these scenarios, RelativeVirtualAddresses aren't used.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features found at every scope ("global").
|
||||
|
||||
@@ -71,12 +112,12 @@ class FeatureExtractor:
|
||||
print('0x%x: %s', va, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract file-scope features.
|
||||
|
||||
@@ -87,7 +128,7 @@ class FeatureExtractor:
|
||||
print('0x%x: %s', va, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -99,32 +140,33 @@ class FeatureExtractor:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def is_library_function(self, va: int) -> bool:
|
||||
def is_library_function(self, addr: Address) -> bool:
|
||||
"""
|
||||
is the given address a library function?
|
||||
the backend may implement its own function matching algorithm, or none at all.
|
||||
we accept a VA here, rather than function object, to handle addresses identified in instructions.
|
||||
we accept an address here, rather than function object,
|
||||
to handle addresses identified in instructions.
|
||||
|
||||
this information is used to:
|
||||
- filter out matches in library functions (by default), and
|
||||
- recognize when to fetch symbol names for called (non-API) functions
|
||||
|
||||
args:
|
||||
va (int): the virtual address of a function.
|
||||
addr (Address): the address of a function.
|
||||
|
||||
returns:
|
||||
bool: True if the given address is the start of a library function.
|
||||
"""
|
||||
return False
|
||||
|
||||
def get_function_name(self, va: int) -> str:
|
||||
def get_function_name(self, addr: Address) -> str:
|
||||
"""
|
||||
fetch any recognized name for the given address.
|
||||
this is only guaranteed to return a value when the given function is a recognized library function.
|
||||
we accept a VA here, rather than function object, to handle addresses identified in instructions.
|
||||
|
||||
args:
|
||||
va (int): the virtual address of a function.
|
||||
addr (Address): the address of a function.
|
||||
|
||||
returns:
|
||||
str: the function name
|
||||
@@ -132,10 +174,10 @@ class FeatureExtractor:
|
||||
raises:
|
||||
KeyError: when the given function does not have a name.
|
||||
"""
|
||||
raise KeyError(va)
|
||||
raise KeyError(addr)
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract function-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -144,14 +186,14 @@ class FeatureExtractor:
|
||||
|
||||
extractor = VivisectFeatureExtractor(vw, path)
|
||||
for function in extractor.get_functions():
|
||||
for feature, va in extractor.extract_function_features(function):
|
||||
print('0x%x: %s', va, feature)
|
||||
for feature, address in extractor.extract_function_features(function):
|
||||
print('0x%x: %s', address, feature)
|
||||
|
||||
args:
|
||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -164,7 +206,7 @@ class FeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract basic block-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -174,15 +216,15 @@ class FeatureExtractor:
|
||||
extractor = VivisectFeatureExtractor(vw, path)
|
||||
for function in extractor.get_functions():
|
||||
for bb in extractor.get_basic_blocks(function):
|
||||
for feature, va in extractor.extract_basic_block_features(function, bb):
|
||||
print('0x%x: %s', va, feature)
|
||||
for feature, address in extractor.extract_basic_block_features(function, bb):
|
||||
print('0x%x: %s', address, feature)
|
||||
|
||||
args:
|
||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -195,7 +237,9 @@ class FeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_insn_features(self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_insn_features(
|
||||
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract instruction-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -206,8 +250,8 @@ class FeatureExtractor:
|
||||
for function in extractor.get_functions():
|
||||
for bb in extractor.get_basic_blocks(function):
|
||||
for insn in extractor.get_instructions(function, bb):
|
||||
for feature, va in extractor.extract_insn_features(function, bb, insn):
|
||||
print('0x%x: %s', va, feature)
|
||||
for feature, address in extractor.extract_insn_features(function, bb, insn):
|
||||
print('0x%x: %s', address, feature)
|
||||
|
||||
args:
|
||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||
@@ -215,123 +259,6 @@ class FeatureExtractor:
|
||||
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class NullFeatureExtractor(FeatureExtractor):
|
||||
"""
|
||||
An extractor that extracts some user-provided features.
|
||||
The structure of the single parameter is demonstrated in the example below.
|
||||
|
||||
This is useful for testing, as we can provide expected values and see if matching works.
|
||||
Also, this is how we represent features deserialized from a freeze file.
|
||||
|
||||
example::
|
||||
|
||||
extractor = NullFeatureExtractor({
|
||||
'base address: 0x401000,
|
||||
'global features': [
|
||||
(0x0, capa.features.Arch('i386')),
|
||||
(0x0, capa.features.OS('linux')),
|
||||
],
|
||||
'file features': [
|
||||
(0x402345, capa.features.Characteristic('embedded pe')),
|
||||
],
|
||||
'functions': {
|
||||
0x401000: {
|
||||
'features': [
|
||||
(0x401000, capa.features.Characteristic('nzxor')),
|
||||
],
|
||||
'basic blocks': {
|
||||
0x401000: {
|
||||
'features': [
|
||||
(0x401000, capa.features.Characteristic('tight-loop')),
|
||||
],
|
||||
'instructions': {
|
||||
0x401000: {
|
||||
'features': [
|
||||
(0x401000, capa.features.Characteristic('nzxor')),
|
||||
],
|
||||
},
|
||||
0x401002: ...
|
||||
}
|
||||
},
|
||||
0x401005: ...
|
||||
}
|
||||
},
|
||||
0x40200: ...
|
||||
}
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(self, features):
|
||||
super(NullFeatureExtractor, self).__init__()
|
||||
self.features = features
|
||||
|
||||
def get_base_address(self):
|
||||
return self.features["base address"]
|
||||
|
||||
def extract_global_features(self):
|
||||
for p in self.features.get("global features", []):
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
def extract_file_features(self):
|
||||
for p in self.features.get("file features", []):
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
def get_functions(self):
|
||||
for va in sorted(self.features["functions"].keys()):
|
||||
yield va
|
||||
|
||||
def extract_function_features(self, f):
|
||||
for p in self.features.get("functions", {}).get(f, {}).get("features", []): # noqa: E127 line over-indented
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
for va in sorted(
|
||||
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||
.get(f, {})
|
||||
.get("basic blocks", {})
|
||||
.keys()
|
||||
):
|
||||
yield va
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
for p in (
|
||||
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||
.get(f, {})
|
||||
.get("basic blocks", {})
|
||||
.get(bb, {})
|
||||
.get("features", [])
|
||||
):
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
for va in sorted(
|
||||
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||
.get(f, {})
|
||||
.get("basic blocks", {})
|
||||
.get(bb, {})
|
||||
.get("instructions", {})
|
||||
.keys()
|
||||
):
|
||||
yield va
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for p in (
|
||||
self.features.get("functions", {}) # noqa: E127 line over-indented
|
||||
.get(f, {})
|
||||
.get("basic blocks", {})
|
||||
.get(bb, {})
|
||||
.get("instructions", {})
|
||||
.get(insn, {})
|
||||
.get("features", [])
|
||||
):
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
@@ -2,36 +2,38 @@ import io
|
||||
import logging
|
||||
import binascii
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import pefile
|
||||
|
||||
import capa.features
|
||||
import capa.features.extractors.elf
|
||||
import capa.features.extractors.pefile
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String, Feature
|
||||
from capa.features.freeze import is_freeze
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
yield String(s.s), FileOffsetAddress(s.offset)
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
yield String(s.s), FileOffsetAddress(s.offset)
|
||||
|
||||
|
||||
def extract_format(buf):
|
||||
def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
if buf.startswith(b"MZ"):
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif buf.startswith(b"\x7fELF"):
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
elif is_freeze(buf):
|
||||
yield Format(FORMAT_FREEZE), 0x0
|
||||
yield Format(FORMAT_FREEZE), NO_ADDRESS
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a file format (e.g. macho)
|
||||
@@ -41,7 +43,7 @@ def extract_format(buf):
|
||||
return
|
||||
|
||||
|
||||
def extract_arch(buf):
|
||||
def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
if buf.startswith(b"MZ"):
|
||||
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
||||
|
||||
@@ -53,7 +55,7 @@ def extract_arch(buf):
|
||||
logger.debug("unsupported arch: %s", arch)
|
||||
return
|
||||
|
||||
yield Arch(arch), 0x0
|
||||
yield Arch(arch), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
@@ -70,9 +72,9 @@ def extract_arch(buf):
|
||||
return
|
||||
|
||||
|
||||
def extract_os(buf):
|
||||
def extract_os(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||
if buf.startswith(b"MZ"):
|
||||
yield OS(OS_WINDOWS), 0x0
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
elif buf.startswith(b"\x7fELF"):
|
||||
with contextlib.closing(io.BytesIO(buf)) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
@@ -81,7 +83,7 @@ def extract_os(buf):
|
||||
logger.debug("unsupported os: %s", os)
|
||||
return
|
||||
|
||||
yield OS(os), 0x0
|
||||
yield OS(os), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
|
||||
@@ -8,17 +8,17 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, List, Tuple
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from capa.features.common import Feature
|
||||
from typing import List, Tuple, Iterator
|
||||
|
||||
import dnfile
|
||||
from dncil.clr.token import Token
|
||||
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.dnfile.file
|
||||
import capa.features.extractors.dnfile.insn
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies
|
||||
|
||||
|
||||
@@ -28,12 +28,12 @@ class DnfileFeatureExtractor(FeatureExtractor):
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, int]] = []
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
||||
|
||||
def get_base_address(self):
|
||||
return 0x0
|
||||
return NO_ADDRESS
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
@@ -41,30 +41,31 @@ class DnfileFeatureExtractor(FeatureExtractor):
|
||||
def extract_file_features(self):
|
||||
yield from capa.features.extractors.dnfile.file.extract_features(self.pe)
|
||||
|
||||
def get_functions(self):
|
||||
# data structure shared across functions yielded here.
|
||||
# useful for caching analysis relevant across a single workspace.
|
||||
ctx = {}
|
||||
ctx["pe"] = self.pe
|
||||
|
||||
for f in get_dotnet_managed_method_bodies(self.pe):
|
||||
setattr(f, "ctx", ctx)
|
||||
yield f
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
for token, f in get_dotnet_managed_method_bodies(self.pe):
|
||||
yield FunctionHandle(address=DNTokenAddress(Token(token)), inner=f, ctx={"pe": self.pe})
|
||||
|
||||
def extract_function_features(self, f):
|
||||
# TODO
|
||||
yield from []
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
def get_basic_blocks(self, f) -> Iterator[BBHandle]:
|
||||
# each dotnet method is considered 1 basic block
|
||||
yield f
|
||||
yield BBHandle(
|
||||
address=f.address,
|
||||
inner=f.inner,
|
||||
)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
def extract_basic_block_features(self, fh, bbh):
|
||||
# we don't support basic block features
|
||||
yield from []
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
yield from f.instructions
|
||||
def get_instructions(self, fh, bbh):
|
||||
for insn in bbh.inner.instructions:
|
||||
yield InsnHandle(
|
||||
address=DNTokenOffsetAddress(bbh.address.token, insn.offset - (fh.inner.offset + fh.inner.header_size)),
|
||||
inner=insn,
|
||||
)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
yield from capa.features.extractors.dnfile.insn.extract_features(f, bb, insn)
|
||||
def extract_insn_features(self, fh, bbh, ih) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
@@ -8,55 +8,48 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Tuple, Iterator
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import dnfile
|
||||
from capa.features.common import (
|
||||
Feature,
|
||||
Format,
|
||||
String,
|
||||
Characteristic,
|
||||
Namespace,
|
||||
Class,
|
||||
)
|
||||
import dnfile
|
||||
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.dotnetfile
|
||||
from capa.features.file import Import, FunctionName
|
||||
from capa.features.common import Class, Format, String, Feature, Namespace, Characteristic
|
||||
from capa.features.address import Address
|
||||
|
||||
|
||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]:
|
||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)
|
||||
|
||||
|
||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]:
|
||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
||||
|
||||
|
||||
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, int]]:
|
||||
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)
|
||||
|
||||
|
||||
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, int]]:
|
||||
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
|
||||
|
||||
|
||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, int]]:
|
||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
|
||||
|
||||
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, int]]:
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
|
||||
|
||||
|
||||
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, int]]:
|
||||
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
|
||||
|
||||
|
||||
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for (feature, token) in file_handler(pe):
|
||||
yield feature, token
|
||||
for (feature, address) in file_handler(pe):
|
||||
yield feature, address
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
|
||||
@@ -196,9 +196,12 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnMethod]:
|
||||
yield DnMethod(token, row.TypeNamespace, row.TypeName, index.row.Name)
|
||||
|
||||
|
||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]:
|
||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
|
||||
"""get managed methods from MethodDef table"""
|
||||
for row in iter_dotnet_table(pe, "MethodDef"):
|
||||
if not hasattr(pe.net.mdtables, "MethodDef"):
|
||||
return
|
||||
|
||||
for (rid, row) in enumerate(pe.net.mdtables.MethodDef):
|
||||
if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)):
|
||||
# skip methods that do not have a method body
|
||||
continue
|
||||
@@ -207,7 +210,8 @@ def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]
|
||||
if body is None:
|
||||
continue
|
||||
|
||||
yield body
|
||||
token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MethodDef.value, rid + 1)
|
||||
yield token, body
|
||||
|
||||
|
||||
def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]:
|
||||
|
||||
@@ -8,20 +8,19 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, Tuple, Union, Iterator, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from dncil.cil.instruction import Instruction
|
||||
from dncil.cil.body import CilMethodBody
|
||||
from capa.features.common import Feature
|
||||
from typing import Any, Dict, Tuple, Union, Iterator, Optional
|
||||
|
||||
import dnfile
|
||||
from dncil.cil.body import CilMethodBody
|
||||
from dncil.clr.token import Token, StringToken, InvalidToken
|
||||
from dncil.cil.opcode import OpCodes
|
||||
from dncil.cil.instruction import Instruction
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number
|
||||
from capa.features.common import Class, String, Namespace, Characteristic
|
||||
from capa.features.common import Class, String, Feature, Namespace, Characteristic
|
||||
from capa.features.address import Address
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
from capa.features.extractors.dnfile.helpers import (
|
||||
DnClass,
|
||||
DnMethod,
|
||||
@@ -70,47 +69,47 @@ def get_callee(ctx: Dict, token: int) -> Union[DnMethod, DnUnmanagedMethod, None
|
||||
return callee
|
||||
|
||||
|
||||
def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]:
|
||||
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction API features"""
|
||||
insn: Instruction = ih.inner
|
||||
|
||||
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
|
||||
return
|
||||
|
||||
callee: Union[DnMethod, DnUnmanagedMethod, None] = get_callee(f.ctx, insn.operand.value)
|
||||
callee: Union[DnMethod, DnUnmanagedMethod, None] = get_callee(fh.ctx, insn.operand.value)
|
||||
if callee is None:
|
||||
return
|
||||
|
||||
if isinstance(callee, DnUnmanagedMethod):
|
||||
# like kernel32.CreateFileA
|
||||
for name in capa.features.extractors.helpers.generate_symbols(callee.modulename, callee.methodname):
|
||||
yield API(name), insn.offset
|
||||
yield API(name), ih.address
|
||||
else:
|
||||
# like System.IO.File::Delete
|
||||
yield API(str(callee)), insn.offset
|
||||
yield API(str(callee)), ih.address
|
||||
|
||||
|
||||
def extract_insn_class_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Class, int]]:
|
||||
def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Class, Address]]:
|
||||
"""parse instruction class features"""
|
||||
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
|
||||
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
|
||||
return
|
||||
|
||||
row: Any = resolve_dotnet_token(f.ctx["pe"], Token(insn.operand.value))
|
||||
row: Any = resolve_dotnet_token(fh.ctx["pe"], Token(ih.inner.operand.value))
|
||||
|
||||
if not isinstance(row, dnfile.mdtable.MemberRefRow):
|
||||
return
|
||||
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)):
|
||||
return
|
||||
|
||||
yield Class(DnClass.format_name(row.Class.row.TypeNamespace, row.Class.row.TypeName)), insn.offset
|
||||
yield Class(DnClass.format_name(row.Class.row.TypeNamespace, row.Class.row.TypeName)), ih.address
|
||||
|
||||
|
||||
def extract_insn_namespace_features(
|
||||
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
|
||||
) -> Iterator[Tuple[Namespace, int]]:
|
||||
def extract_insn_namespace_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Namespace, Address]]:
|
||||
"""parse instruction namespace features"""
|
||||
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
|
||||
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
|
||||
return
|
||||
|
||||
row: Any = resolve_dotnet_token(f.ctx["pe"], Token(insn.operand.value))
|
||||
row: Any = resolve_dotnet_token(fh.ctx["pe"], Token(ih.inner.operand.value))
|
||||
|
||||
if not isinstance(row, dnfile.mdtable.MemberRefRow):
|
||||
return
|
||||
@@ -119,55 +118,58 @@ def extract_insn_namespace_features(
|
||||
if not row.Class.row.TypeNamespace:
|
||||
return
|
||||
|
||||
yield Namespace(row.Class.row.TypeNamespace), insn.offset
|
||||
yield Namespace(row.Class.row.TypeNamespace), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(
|
||||
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
|
||||
) -> Iterator[Tuple[Number, int]]:
|
||||
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction number features"""
|
||||
insn: Instruction = ih.inner
|
||||
|
||||
if insn.is_ldc():
|
||||
yield Number(insn.get_ldc()), insn.offset
|
||||
yield Number(insn.get_ldc()), ih.address
|
||||
|
||||
|
||||
def extract_insn_string_features(
|
||||
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
|
||||
) -> Iterator[Tuple[String, int]]:
|
||||
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction string features"""
|
||||
f: CilMethodBody = fh.inner
|
||||
insn: Instruction = ih.inner
|
||||
|
||||
if not insn.is_ldstr():
|
||||
return
|
||||
|
||||
if not isinstance(insn.operand, StringToken):
|
||||
return
|
||||
|
||||
user_string: Optional[str] = read_dotnet_user_string(f.ctx["pe"], insn.operand)
|
||||
user_string: Optional[str] = read_dotnet_user_string(fh.ctx["pe"], insn.operand)
|
||||
if user_string is None:
|
||||
return
|
||||
|
||||
yield String(user_string), insn.offset
|
||||
yield String(user_string), ih.address
|
||||
|
||||
|
||||
def extract_unmanaged_call_characteristic_features(
|
||||
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
|
||||
) -> Iterator[Tuple[Characteristic, int]]:
|
||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
insn: Instruction = ih.inner
|
||||
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
|
||||
return
|
||||
|
||||
token: Any = resolve_dotnet_token(f.ctx["pe"], insn.operand)
|
||||
token: Any = resolve_dotnet_token(fh.ctx["pe"], insn.operand)
|
||||
if isinstance(token, InvalidToken):
|
||||
return
|
||||
if not isinstance(token, dnfile.mdtable.MethodDefRow):
|
||||
return
|
||||
|
||||
if any((token.Flags.mdPinvokeImpl, token.ImplFlags.miUnmanaged, token.ImplFlags.miNative)):
|
||||
yield Characteristic("unmanaged call"), insn.offset
|
||||
yield Characteristic("unmanaged call"), ih.address
|
||||
|
||||
|
||||
def extract_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for (feature, offset) in inst_handler(f, bb, insn):
|
||||
yield feature, offset
|
||||
for (feature, addr) in inst_handler(fh, bbh, ih):
|
||||
assert isinstance(addr, Address)
|
||||
yield feature, addr
|
||||
|
||||
|
||||
INSTRUCTION_HANDLERS = (
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import dnfile
|
||||
import pefile
|
||||
|
||||
from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield OS(OS_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(pe, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||
if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE:
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS:
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, address in file_handler(pe=pe): # type: ignore
|
||||
yield feature, address
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
# extract_file_export_names,
|
||||
# extract_file_import_names,
|
||||
# extract_file_section_names,
|
||||
# extract_file_strings,
|
||||
# extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, addr in handler(pe=pe): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
|
||||
GLOBAL_HANDLERS = (
|
||||
extract_file_os,
|
||||
extract_file_arch,
|
||||
)
|
||||
|
||||
|
||||
class DnfileFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super(DnfileFeatureExtractor, self).__init__()
|
||||
self.path: str = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
|
||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||
return AbsoluteVirtualAddress(0x0)
|
||||
|
||||
def get_entry_point(self) -> int:
|
||||
# self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT
|
||||
# True: native EP: Token
|
||||
# False: managed EP: RVA
|
||||
return self.pe.net.struct.EntryPointTokenOrRva
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from extract_global_features(self.pe)
|
||||
|
||||
def extract_file_features(self):
|
||||
yield from extract_file_features(self.pe)
|
||||
|
||||
def is_dotnet_file(self) -> bool:
|
||||
return bool(self.pe.net)
|
||||
|
||||
def is_mixed_mode(self) -> bool:
|
||||
return not bool(self.pe.net.Flags.CLR_ILONLY)
|
||||
|
||||
def get_runtime_version(self) -> Tuple[int, int]:
|
||||
return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion
|
||||
|
||||
def get_meta_version_string(self) -> str:
|
||||
return self.pe.net.metadata.struct.Version.rstrip(b"\x00").decode("utf-8")
|
||||
|
||||
def get_functions(self):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_function_features(self, f):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def is_library_function(self, va):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_function_name(self, va):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
@@ -4,6 +4,7 @@ from typing import Tuple, Iterator
|
||||
|
||||
import dnfile
|
||||
import pefile
|
||||
from dncil.clr.token import Token
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.file import Import, FunctionName
|
||||
@@ -22,6 +23,7 @@ from capa.features.common import (
|
||||
Namespace,
|
||||
Characteristic,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import (
|
||||
DnClass,
|
||||
@@ -37,27 +39,27 @@ from capa.features.extractors.dnfile.helpers import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]:
|
||||
yield Format(FORMAT_DOTNET), 0x0
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
|
||||
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]:
|
||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
|
||||
for method in get_dotnet_managed_imports(pe):
|
||||
# like System.IO.File::OpenRead
|
||||
yield Import(str(method)), method.token
|
||||
yield Import(str(method)), DNTokenAddress(Token(method.token))
|
||||
|
||||
for imp in get_dotnet_unmanaged_imports(pe):
|
||||
# like kernel32.CreateFileA
|
||||
for name in capa.features.extractors.helpers.generate_symbols(imp.modulename, imp.methodname):
|
||||
yield Import(name), imp.token
|
||||
yield Import(name), DNTokenAddress(Token(imp.token))
|
||||
|
||||
|
||||
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, int]]:
|
||||
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]:
|
||||
for method in get_dotnet_managed_methods(pe):
|
||||
yield FunctionName(str(method)), method.token
|
||||
yield FunctionName(str(method)), DNTokenAddress(Token(method.token))
|
||||
|
||||
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, int]]:
|
||||
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]:
|
||||
"""emit namespace features from TypeRef and TypeDef tables"""
|
||||
|
||||
# namespaces may be referenced multiple times, so we need to filter
|
||||
@@ -74,48 +76,50 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
|
||||
|
||||
for namespace in namespaces:
|
||||
# namespace do not have an associated token, so we yield 0x0
|
||||
yield Namespace(namespace), 0x0
|
||||
yield Namespace(namespace), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, int]]:
|
||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
|
||||
"""emit class features from TypeRef and TypeDef tables"""
|
||||
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeDef")):
|
||||
token = calculate_dotnet_token_value(pe.net.mdtables.TypeDef.number, rid + 1)
|
||||
yield Class(DnClass.format_name(row.TypeNamespace, row.TypeName)), token
|
||||
yield Class(DnClass.format_name(row.TypeNamespace, row.TypeName)), DNTokenAddress(Token(token))
|
||||
|
||||
for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeRef")):
|
||||
token = calculate_dotnet_token_value(pe.net.mdtables.TypeRef.number, rid + 1)
|
||||
yield Class(DnClass.format_name(row.TypeNamespace, row.TypeName)), token
|
||||
yield Class(DnClass.format_name(row.TypeNamespace, row.TypeName)), DNTokenAddress(Token(token))
|
||||
|
||||
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]:
|
||||
yield OS(OS_ANY), 0x0
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
|
||||
yield OS(OS_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, int]]:
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]:
|
||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||
if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE:
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS:
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
yield Arch(ARCH_ANY), 0x0
|
||||
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, int]]:
|
||||
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
|
||||
|
||||
|
||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Characteristic, int]]:
|
||||
def extract_file_mixed_mode_characteristic_features(
|
||||
pe: dnfile.dnPE, **kwargs
|
||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
||||
if is_dotnet_mixed_mode(pe):
|
||||
yield Characteristic("mixed mode"), 0x0
|
||||
yield Characteristic("mixed mode"), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
for feature, addr in file_handler(pe=pe): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
@@ -129,7 +133,7 @@ FILE_HANDLERS = (
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, va in handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
@@ -147,8 +151,8 @@ class DotnetFileFeatureExtractor(FeatureExtractor):
|
||||
self.path: str = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
|
||||
def get_base_address(self) -> int:
|
||||
return 0x0
|
||||
def get_base_address(self):
|
||||
return NO_ADDRESS
|
||||
|
||||
def get_entry_point(self) -> int:
|
||||
# self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT
|
||||
|
||||
@@ -60,7 +60,10 @@ GNU_ABI_TAG = {
|
||||
}
|
||||
|
||||
|
||||
def detect_elf_os(f: BinaryIO) -> str:
|
||||
def detect_elf_os(f) -> str:
|
||||
"""
|
||||
f: type Union[BinaryIO, IDAIO]
|
||||
"""
|
||||
f.seek(0x0)
|
||||
file_header = f.read(0x40)
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ from elftools.elf.elffile import ELFFile, SymbolTableSection
|
||||
import capa.features.extractors.common
|
||||
from capa.features.file import Import, Section
|
||||
from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.elf import Arch as ElfArch
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
@@ -39,15 +40,15 @@ def extract_file_import_names(elf, **kwargs):
|
||||
if symbol.name and symbol.entry.st_info.type == "STT_FUNC":
|
||||
# TODO symbol address
|
||||
# TODO symbol version info?
|
||||
yield Import(symbol.name), 0x0
|
||||
yield Import(symbol.name), FileOffsetAddress(0x0)
|
||||
|
||||
|
||||
def extract_file_section_names(elf, **kwargs):
|
||||
for section in elf.iter_sections():
|
||||
if section.name:
|
||||
yield Section(section.name), section.header.sh_addr
|
||||
yield Section(section.name), AbsoluteVirtualAddress(section.header.sh_addr)
|
||||
elif section.is_null():
|
||||
yield Section("NULL"), section.header.sh_addr
|
||||
yield Section("NULL"), AbsoluteVirtualAddress(section.header.sh_addr)
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
@@ -58,31 +59,31 @@ def extract_file_os(elf, buf, **kwargs):
|
||||
# our current approach does not always get an OS value, e.g. for packed samples
|
||||
# for file limitation purposes, we're more lax here
|
||||
try:
|
||||
os = next(capa.features.extractors.common.extract_os(buf))
|
||||
yield os
|
||||
os_tuple = next(capa.features.extractors.common.extract_os(buf))
|
||||
yield os_tuple
|
||||
except StopIteration:
|
||||
yield OS("unknown"), 0x0
|
||||
yield OS("unknown"), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_format(**kwargs):
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(elf, **kwargs):
|
||||
# TODO merge with capa.features.extractors.elf.detect_elf_arch()
|
||||
arch = elf.get_machine_arch()
|
||||
if arch == "x86":
|
||||
yield Arch(ElfArch.I386), 0x0
|
||||
yield Arch(ElfArch.I386), NO_ADDRESS
|
||||
elif arch == "x64":
|
||||
yield Arch(ElfArch.AMD64), 0x0
|
||||
yield Arch(ElfArch.AMD64), NO_ADDRESS
|
||||
else:
|
||||
logger.warning("unsupported architecture: %s", arch)
|
||||
|
||||
|
||||
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, va
|
||||
for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
@@ -97,8 +98,8 @@ FILE_HANDLERS = (
|
||||
|
||||
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||
for global_handler in GLOBAL_HANDLERS:
|
||||
for feature, va in global_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, va
|
||||
for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
|
||||
GLOBAL_HANDLERS = (
|
||||
@@ -118,21 +119,21 @@ class ElfFeatureExtractor(FeatureExtractor):
|
||||
# virtual address of the first segment with type LOAD
|
||||
for segment in self.elf.iter_segments():
|
||||
if segment.header.p_type == "PT_LOAD":
|
||||
return segment.header.p_vaddr
|
||||
return AbsoluteVirtualAddress(segment.header.p_vaddr)
|
||||
|
||||
def extract_global_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, va in extract_global_features(self.elf, buf):
|
||||
yield feature, va
|
||||
for feature, addr in extract_global_features(self.elf, buf):
|
||||
yield feature, addr
|
||||
|
||||
def extract_file_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, va in extract_file_features(self.elf, buf):
|
||||
yield feature, va
|
||||
for feature, addr in extract_file_features(self.elf, buf):
|
||||
yield feature, addr
|
||||
|
||||
def get_functions(self):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
@@ -8,22 +8,21 @@
|
||||
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import idaapi
|
||||
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.common import Characteristic
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address
|
||||
from capa.features.basicblock import BasicBlock
|
||||
from capa.features.extractors.ida import helpers
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
|
||||
|
||||
def get_printable_len(op):
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable
|
||||
|
||||
args:
|
||||
op (IDA op_t)
|
||||
"""
|
||||
def get_printable_len(op: idaapi.op_t) -> int:
|
||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||
op_val = capa.features.extractors.ida.helpers.mask_op_val(op)
|
||||
|
||||
if op.dtype == idaapi.dt_byte:
|
||||
@@ -37,12 +36,12 @@ def get_printable_len(op):
|
||||
else:
|
||||
raise ValueError("Unhandled operand data type 0x%x." % op.dtype)
|
||||
|
||||
def is_printable_ascii(chars):
|
||||
return all(c < 127 and chr(c) in string.printable for c in chars)
|
||||
def is_printable_ascii(chars_: bytes):
|
||||
return all(c < 127 and chr(c) in string.printable for c in chars_)
|
||||
|
||||
def is_printable_utf16le(chars):
|
||||
if all(c == 0x00 for c in chars[1::2]):
|
||||
return is_printable_ascii(chars[::2])
|
||||
def is_printable_utf16le(chars_: bytes):
|
||||
if all(c == 0x00 for c in chars_[1::2]):
|
||||
return is_printable_ascii(chars_[::2])
|
||||
|
||||
if is_printable_ascii(chars):
|
||||
return idaapi.get_dtype_size(op.dtype)
|
||||
@@ -53,12 +52,8 @@ def get_printable_len(op):
|
||||
return 0
|
||||
|
||||
|
||||
def is_mov_imm_to_stack(insn):
|
||||
"""verify instruction moves immediate onto stack
|
||||
|
||||
args:
|
||||
insn (IDA insn_t)
|
||||
"""
|
||||
def is_mov_imm_to_stack(insn: idaapi.insn_t) -> bool:
|
||||
"""verify instruction moves immediate onto stack"""
|
||||
if insn.Op2.type != idaapi.o_imm:
|
||||
return False
|
||||
|
||||
@@ -71,14 +66,10 @@ def is_mov_imm_to_stack(insn):
|
||||
return True
|
||||
|
||||
|
||||
def bb_contains_stackstring(f, bb):
|
||||
def bb_contains_stackstring(f: idaapi.func_t, bb: idaapi.BasicBlock) -> bool:
|
||||
"""check basic block for stackstring indicators
|
||||
|
||||
true if basic block contains enough moves of constant bytes to the stack
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
"""
|
||||
count = 0
|
||||
for insn in capa.features.extractors.ida.helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||
@@ -89,39 +80,24 @@ def bb_contains_stackstring(f, bb):
|
||||
return False
|
||||
|
||||
|
||||
def extract_bb_stackstring(f, bb):
|
||||
"""extract stackstring indicators from basic block
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
"""
|
||||
if bb_contains_stackstring(f, bb):
|
||||
yield Characteristic("stack string"), bb.start_ea
|
||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract stackstring indicators from basic block"""
|
||||
if bb_contains_stackstring(fh.inner, bbh.inner):
|
||||
yield Characteristic("stack string"), bbh.address
|
||||
|
||||
|
||||
def extract_bb_tight_loop(f, bb):
|
||||
"""extract tight loop indicators from a basic block
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
"""
|
||||
if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bb):
|
||||
yield Characteristic("tight loop"), bb.start_ea
|
||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract tight loop indicators from a basic block"""
|
||||
if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bbh.inner):
|
||||
yield Characteristic("tight loop"), bbh.address
|
||||
|
||||
|
||||
def extract_features(f, bb):
|
||||
"""extract basic block features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
"""
|
||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract basic block features"""
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for (feature, ea) in bb_handler(f, bb):
|
||||
yield feature, ea
|
||||
yield BasicBlock(), bb.start_ea
|
||||
for (feature, addr) in bb_handler(fh, bbh):
|
||||
yield feature, addr
|
||||
yield BasicBlock(), bbh.address
|
||||
|
||||
|
||||
BASIC_BLOCK_HANDLERS = (
|
||||
@@ -132,9 +108,10 @@ BASIC_BLOCK_HANDLERS = (
|
||||
|
||||
def main():
|
||||
features = []
|
||||
for f in helpers.get_functions(skip_thunks=True, skip_libs=True):
|
||||
for fhandle in helpers.get_functions(skip_thunks=True, skip_libs=True):
|
||||
f: idaapi.func_t = fhandle.inner
|
||||
for bb in idaapi.FlowChart(f, flags=idaapi.FC_PREDS):
|
||||
features.extend(list(extract_features(f, bb)))
|
||||
features.extend(list(extract_features(fhandle, bb)))
|
||||
|
||||
import pprint
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Iterator
|
||||
|
||||
import idaapi
|
||||
|
||||
import capa.ida.helpers
|
||||
@@ -14,57 +16,20 @@ import capa.features.extractors.ida.insn
|
||||
import capa.features.extractors.ida.global_
|
||||
import capa.features.extractors.ida.function
|
||||
import capa.features.extractors.ida.basicblock
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
|
||||
class FunctionHandle:
|
||||
"""this acts like an idaapi.func_t but with __int__()"""
|
||||
|
||||
def __init__(self, inner):
|
||||
self._inner = inner
|
||||
|
||||
def __int__(self):
|
||||
return self.start_ea
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
|
||||
|
||||
class BasicBlockHandle:
|
||||
"""this acts like an idaapi.BasicBlock but with __int__()"""
|
||||
|
||||
def __init__(self, inner):
|
||||
self._inner = inner
|
||||
|
||||
def __int__(self):
|
||||
return self.start_ea
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
|
||||
|
||||
class InstructionHandle:
|
||||
"""this acts like an idaapi.insn_t but with __int__()"""
|
||||
|
||||
def __init__(self, inner):
|
||||
self._inner = inner
|
||||
|
||||
def __int__(self):
|
||||
return self.ea
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
|
||||
|
||||
class IdaFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self):
|
||||
super(IdaFeatureExtractor, self).__init__()
|
||||
self.global_features = []
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
|
||||
|
||||
def get_base_address(self):
|
||||
return idaapi.get_imagebase()
|
||||
return AbsoluteVirtualAddress(idaapi.get_imagebase())
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
@@ -72,41 +37,34 @@ class IdaFeatureExtractor(FeatureExtractor):
|
||||
def extract_file_features(self):
|
||||
yield from capa.features.extractors.ida.file.extract_features()
|
||||
|
||||
def get_functions(self):
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
|
||||
# data structure shared across functions yielded here.
|
||||
# useful for caching analysis relevant across a single workspace.
|
||||
ctx = {}
|
||||
|
||||
# ignore library functions and thunk functions as identified by IDA
|
||||
for f in ida_helpers.get_functions(skip_thunks=True, skip_libs=True):
|
||||
setattr(f, "ctx", ctx)
|
||||
yield FunctionHandle(f)
|
||||
yield from ida_helpers.get_functions(skip_thunks=True, skip_libs=True)
|
||||
|
||||
@staticmethod
|
||||
def get_function(ea):
|
||||
def get_function(ea: int) -> FunctionHandle:
|
||||
f = idaapi.get_func(ea)
|
||||
setattr(f, "ctx", {})
|
||||
return FunctionHandle(f)
|
||||
return FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f)
|
||||
|
||||
def extract_function_features(self, f):
|
||||
yield from capa.features.extractors.ida.function.extract_features(f)
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.ida.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
|
||||
for bb in ida_helpers.get_function_blocks(f):
|
||||
yield BasicBlockHandle(bb)
|
||||
for bb in ida_helpers.get_function_blocks(fh.inner):
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start_ea), inner=bb)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
yield from capa.features.extractors.ida.basicblock.extract_features(f, bb)
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.ida.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
|
||||
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||
yield InstructionHandle(insn)
|
||||
for insn in ida_helpers.get_instructions_in_range(bbh.inner.start_ea, bbh.inner.end_ea):
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(insn.ea), inner=insn)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
yield from capa.features.extractors.ida.insn.extract_features(f, bb, insn)
|
||||
def extract_insn_features(self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle):
|
||||
yield from capa.features.extractors.ida.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
@@ -7,27 +7,26 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
import idautils
|
||||
import ida_loader
|
||||
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.file import Export, Import, Section, FunctionName
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format, String, Characteristic
|
||||
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, Feature, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
|
||||
|
||||
def check_segment_for_pe(seg):
|
||||
def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]:
|
||||
"""check segment for embedded PE
|
||||
|
||||
adapted for IDA from:
|
||||
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
|
||||
|
||||
args:
|
||||
seg (IDA segment_t)
|
||||
"""
|
||||
seg_max = seg.end_ea
|
||||
mz_xor = [
|
||||
@@ -60,13 +59,13 @@ def check_segment_for_pe(seg):
|
||||
continue
|
||||
|
||||
if idc.get_bytes(peoff, 2) == pex:
|
||||
yield (off, i)
|
||||
yield off, i
|
||||
|
||||
for nextres in capa.features.extractors.ida.helpers.find_byte_sequence(off + 1, seg.end_ea, mzx):
|
||||
todo.append((nextres, mzx, pex, i))
|
||||
|
||||
|
||||
def extract_file_embedded_pe():
|
||||
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract embedded PE features
|
||||
|
||||
IDA must load resource sections for this to be complete
|
||||
@@ -75,16 +74,16 @@ def extract_file_embedded_pe():
|
||||
"""
|
||||
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
|
||||
for (ea, _) in check_segment_for_pe(seg):
|
||||
yield Characteristic("embedded pe"), ea
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||
|
||||
|
||||
def extract_file_export_names():
|
||||
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract function exports"""
|
||||
for (_, _, ea, name) in idautils.Entries():
|
||||
yield Export(name), ea
|
||||
yield Export(name), AbsoluteVirtualAddress(ea)
|
||||
|
||||
|
||||
def extract_file_import_names():
|
||||
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract function imports
|
||||
|
||||
1. imports by ordinal:
|
||||
@@ -96,11 +95,12 @@ def extract_file_import_names():
|
||||
- importname
|
||||
"""
|
||||
for (ea, info) in capa.features.extractors.ida.helpers.get_file_imports().items():
|
||||
addr = AbsoluteVirtualAddress(ea)
|
||||
if info[1] and info[2]:
|
||||
# e.g. in mimikatz: ('cabinet', 'FCIAddFile', 11L)
|
||||
# extract by name here and by ordinal below
|
||||
for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1]):
|
||||
yield Import(name), ea
|
||||
yield Import(name), addr
|
||||
dll = info[0]
|
||||
symbol = "#%d" % (info[2])
|
||||
elif info[1]:
|
||||
@@ -113,10 +113,10 @@ def extract_file_import_names():
|
||||
continue
|
||||
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield Import(name), ea
|
||||
yield Import(name), addr
|
||||
|
||||
|
||||
def extract_file_section_names():
|
||||
def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract section names
|
||||
|
||||
IDA must load resource sections for this to be complete
|
||||
@@ -124,10 +124,10 @@ def extract_file_section_names():
|
||||
- Check 'Load resource sections' when opening binary in IDA manually
|
||||
"""
|
||||
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
|
||||
yield Section(idaapi.get_segm_name(seg)), seg.start_ea
|
||||
yield Section(idaapi.get_segm_name(seg)), AbsoluteVirtualAddress(seg.start_ea)
|
||||
|
||||
|
||||
def extract_file_strings():
|
||||
def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract ASCII and UTF-16 LE strings
|
||||
|
||||
IDA must load resource sections for this to be complete
|
||||
@@ -136,37 +136,33 @@ def extract_file_strings():
|
||||
"""
|
||||
for seg in capa.features.extractors.ida.helpers.get_segments():
|
||||
seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)
|
||||
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(seg_buff):
|
||||
yield String(s.s), (seg.start_ea + s.offset)
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(seg_buff):
|
||||
yield String(s.s), (seg.start_ea + s.offset)
|
||||
yield from capa.features.extractors.common.extract_file_strings(seg_buff)
|
||||
|
||||
|
||||
def extract_file_function_names():
|
||||
def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
for ea in idautils.Functions():
|
||||
addr = AbsoluteVirtualAddress(ea)
|
||||
if idaapi.get_func(ea).flags & idaapi.FUNC_LIB:
|
||||
name = idaapi.get_name(ea)
|
||||
yield FunctionName(name), ea
|
||||
yield FunctionName(name), addr
|
||||
if name.startswith("_"):
|
||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||
# extract features for both the mangled and un-mangled representations.
|
||||
# e.g. `_fwrite` -> `fwrite`
|
||||
# see: https://stackoverflow.com/a/2628384/87207
|
||||
yield FunctionName(name[1:]), ea
|
||||
yield FunctionName(name[1:]), addr
|
||||
|
||||
|
||||
def extract_file_format():
|
||||
def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
||||
file_info = idaapi.get_inf_structure()
|
||||
|
||||
if file_info.filetype == idaapi.f_PE:
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
elif file_info.filetype == idaapi.f_ELF:
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
elif file_info.filetype == idaapi.f_BIN:
|
||||
# no file type to return when processing a binary file, but we want to continue processing
|
||||
return
|
||||
@@ -174,11 +170,11 @@ def extract_file_format():
|
||||
raise NotImplementedError("file format: %d" % file_info.filetype)
|
||||
|
||||
|
||||
def extract_features():
|
||||
def extract_features() -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler():
|
||||
yield feature, va
|
||||
for feature, addr in file_handler():
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
|
||||
@@ -5,31 +5,27 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import idaapi
|
||||
import idautils
|
||||
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.common import Characteristic
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors import loops
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
"""extract callers to a function
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
"""
|
||||
for ea in idautils.CodeRefsTo(f.start_ea, True):
|
||||
yield Characteristic("calls to"), ea
|
||||
def extract_function_calls_to(fh: FunctionHandle):
|
||||
"""extract callers to a function"""
|
||||
for ea in idautils.CodeRefsTo(fh.inner.start_ea, True):
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(ea)
|
||||
|
||||
|
||||
def extract_function_loop(f):
|
||||
"""extract loop indicators from a function
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
"""
|
||||
def extract_function_loop(fh: FunctionHandle):
|
||||
"""extract loop indicators from a function"""
|
||||
f: idaapi.func_t = fh.inner
|
||||
edges = []
|
||||
|
||||
# construct control flow graph
|
||||
@@ -41,25 +37,16 @@ def extract_function_loop(f):
|
||||
yield Characteristic("loop"), f.start_ea
|
||||
|
||||
|
||||
def extract_recursive_call(f):
|
||||
"""extract recursive function call
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
"""
|
||||
if capa.features.extractors.ida.helpers.is_function_recursive(f):
|
||||
yield Characteristic("recursive call"), f.start_ea
|
||||
def extract_recursive_call(fh: FunctionHandle):
|
||||
"""extract recursive function call"""
|
||||
if capa.features.extractors.ida.helpers.is_function_recursive(fh.inner):
|
||||
yield Characteristic("recursive call"), fh.address
|
||||
|
||||
|
||||
def extract_features(f):
|
||||
"""extract function features
|
||||
|
||||
arg:
|
||||
f (IDA func_t)
|
||||
"""
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for (feature, ea) in func_handler(f):
|
||||
yield feature, ea
|
||||
for (feature, addr) in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
|
||||
@@ -68,8 +55,8 @@ FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_r
|
||||
def main():
|
||||
""" """
|
||||
features = []
|
||||
for f in capa.features.extractors.ida.get_functions(skip_thunks=True, skip_libs=True):
|
||||
features.extend(list(extract_features(f)))
|
||||
for fhandle in capa.features.extractors.ida.helpers.get_functions(skip_thunks=True, skip_libs=True):
|
||||
features.extend(list(extract_features(fhandle)))
|
||||
|
||||
import pprint
|
||||
|
||||
|
||||
@@ -1,27 +1,29 @@
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import idaapi
|
||||
import ida_loader
|
||||
|
||||
import capa.ida.helpers
|
||||
import capa.features.extractors.elf
|
||||
from capa.features.common import OS, ARCH_I386, ARCH_AMD64, OS_WINDOWS, Arch
|
||||
from capa.features.common import OS, ARCH_I386, ARCH_AMD64, OS_WINDOWS, Arch, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_os():
|
||||
format_name = ida_loader.get_file_type_name()
|
||||
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
||||
format_name: str = ida_loader.get_file_type_name()
|
||||
|
||||
if "PE" in format_name:
|
||||
yield OS(OS_WINDOWS), 0x0
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
|
||||
elif "ELF" in format_name:
|
||||
with contextlib.closing(capa.ida.helpers.IDAIO()) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
|
||||
yield OS(os), 0x0
|
||||
yield OS(os), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
@@ -38,12 +40,12 @@ def extract_os():
|
||||
return
|
||||
|
||||
|
||||
def extract_arch():
|
||||
info = idaapi.get_inf_structure()
|
||||
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
||||
info: idaapi.idainfo = idaapi.get_inf_structure()
|
||||
if info.procname == "metapc" and info.is_64bit():
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
elif info.procname == "metapc" and info.is_32bit():
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif info.procname == "metapc":
|
||||
logger.debug("unsupported architecture: non-32-bit nor non-64-bit intel")
|
||||
return
|
||||
|
||||
@@ -5,14 +5,18 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Any, Dict, Tuple, Iterator
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
import idautils
|
||||
import ida_bytes
|
||||
|
||||
from capa.features.address import AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
def find_byte_sequence(start, end, seq):
|
||||
|
||||
def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]:
|
||||
"""yield all ea of a given byte sequence
|
||||
|
||||
args:
|
||||
@@ -20,32 +24,32 @@ def find_byte_sequence(start, end, seq):
|
||||
end: max virtual address
|
||||
seq: bytes to search e.g. b"\x01\x03"
|
||||
"""
|
||||
seq = " ".join(["%02x" % b for b in seq])
|
||||
seqstr = " ".join(["%02x" % b for b in seq])
|
||||
while True:
|
||||
ea = idaapi.find_binary(start, end, seq, 0, idaapi.SEARCH_DOWN)
|
||||
# TODO find_binary: Deprecated. Please use ida_bytes.bin_search() instead.
|
||||
ea = idaapi.find_binary(start, end, seqstr, 0, idaapi.SEARCH_DOWN)
|
||||
if ea == idaapi.BADADDR:
|
||||
break
|
||||
start = ea + 1
|
||||
yield ea
|
||||
|
||||
|
||||
def get_functions(start=None, end=None, skip_thunks=False, skip_libs=False):
|
||||
def get_functions(
|
||||
start: int = None, end: int = None, skip_thunks: bool = False, skip_libs: bool = False
|
||||
) -> Iterator[FunctionHandle]:
|
||||
"""get functions, range optional
|
||||
|
||||
args:
|
||||
start: min virtual address
|
||||
end: max virtual address
|
||||
|
||||
ret:
|
||||
yield func_t*
|
||||
"""
|
||||
for ea in idautils.Functions(start=start, end=end):
|
||||
f = idaapi.get_func(ea)
|
||||
if not (skip_thunks and (f.flags & idaapi.FUNC_THUNK) or skip_libs and (f.flags & idaapi.FUNC_LIB)):
|
||||
yield f
|
||||
yield FunctionHandle(address=AbsoluteVirtualAddress(ea), inner=f)
|
||||
|
||||
|
||||
def get_segments(skip_header_segments=False):
|
||||
def get_segments(skip_header_segments=False) -> Iterator[idaapi.segment_t]:
|
||||
"""get list of segments (sections) in the binary image
|
||||
|
||||
args:
|
||||
@@ -57,7 +61,7 @@ def get_segments(skip_header_segments=False):
|
||||
yield seg
|
||||
|
||||
|
||||
def get_segment_buffer(seg):
|
||||
def get_segment_buffer(seg: idaapi.segment_t) -> bytes:
|
||||
"""return bytes stored in a given segment
|
||||
|
||||
decrease buffer size until IDA is able to read bytes from the segment
|
||||
@@ -75,7 +79,7 @@ def get_segment_buffer(seg):
|
||||
return buff if buff else b""
|
||||
|
||||
|
||||
def get_file_imports():
|
||||
def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
|
||||
"""get file imports"""
|
||||
imports = {}
|
||||
|
||||
@@ -105,14 +109,12 @@ def get_file_imports():
|
||||
return imports
|
||||
|
||||
|
||||
def get_instructions_in_range(start, end):
|
||||
def get_instructions_in_range(start: int, end: int) -> Iterator[idaapi.insn_t]:
|
||||
"""yield instructions in range
|
||||
|
||||
args:
|
||||
start: virtual address (inclusive)
|
||||
end: virtual address (exclusive)
|
||||
yield:
|
||||
(insn_t*)
|
||||
"""
|
||||
for head in idautils.Heads(start, end):
|
||||
insn = idautils.DecodeInstruction(head)
|
||||
@@ -120,7 +122,7 @@ def get_instructions_in_range(start, end):
|
||||
yield insn
|
||||
|
||||
|
||||
def is_operand_equal(op1, op2):
|
||||
def is_operand_equal(op1: idaapi.op_t, op2: idaapi.op_t) -> bool:
|
||||
"""compare two IDA op_t"""
|
||||
if op1.flags != op2.flags:
|
||||
return False
|
||||
@@ -146,7 +148,7 @@ def is_operand_equal(op1, op2):
|
||||
return True
|
||||
|
||||
|
||||
def is_basic_block_equal(bb1, bb2):
|
||||
def is_basic_block_equal(bb1: idaapi.BasicBlock, bb2: idaapi.BasicBlock) -> bool:
|
||||
"""compare two IDA BasicBlock"""
|
||||
if bb1.start_ea != bb2.start_ea:
|
||||
return False
|
||||
@@ -160,12 +162,12 @@ def is_basic_block_equal(bb1, bb2):
|
||||
return True
|
||||
|
||||
|
||||
def basic_block_size(bb):
|
||||
def basic_block_size(bb: idaapi.BasicBlock) -> int:
|
||||
"""calculate size of basic block"""
|
||||
return bb.end_ea - bb.start_ea
|
||||
|
||||
|
||||
def read_bytes_at(ea, count):
|
||||
def read_bytes_at(ea: int, count: int) -> bytes:
|
||||
""" """
|
||||
# check if byte has a value, see get_wide_byte doc
|
||||
if not idc.is_loaded(ea):
|
||||
@@ -178,10 +180,10 @@ def read_bytes_at(ea, count):
|
||||
return idc.get_bytes(ea, count)
|
||||
|
||||
|
||||
def find_string_at(ea, min=4):
|
||||
def find_string_at(ea: int, min_: int = 4) -> str:
|
||||
"""check if ASCII string exists at a given virtual address"""
|
||||
found = idaapi.get_strlit_contents(ea, -1, idaapi.STRTYPE_C)
|
||||
if found and len(found) > min:
|
||||
if found and len(found) > min_:
|
||||
try:
|
||||
found = found.decode("ascii")
|
||||
# hacky check for IDA bug; get_strlit_contents also reads Unicode as
|
||||
@@ -195,7 +197,7 @@ def find_string_at(ea, min=4):
|
||||
return ""
|
||||
|
||||
|
||||
def get_op_phrase_info(op):
|
||||
def get_op_phrase_info(op: idaapi.op_t) -> Dict:
|
||||
"""parse phrase features from operand
|
||||
|
||||
Pretty much dup of sark's implementation:
|
||||
@@ -232,23 +234,23 @@ def get_op_phrase_info(op):
|
||||
return {"base": base, "index": index, "scale": scale, "offset": offset}
|
||||
|
||||
|
||||
def is_op_write(insn, op):
|
||||
def is_op_write(insn: idaapi.insn_t, op: idaapi.op_t) -> bool:
|
||||
"""Check if an operand is written to (destination operand)"""
|
||||
return idaapi.has_cf_chg(insn.get_canon_feature(), op.n)
|
||||
|
||||
|
||||
def is_op_read(insn, op):
|
||||
def is_op_read(insn: idaapi.insn_t, op: idaapi.op_t) -> bool:
|
||||
"""Check if an operand is read from (source operand)"""
|
||||
return idaapi.has_cf_use(insn.get_canon_feature(), op.n)
|
||||
|
||||
|
||||
def is_op_offset(insn, op):
|
||||
def is_op_offset(insn: idaapi.insn_t, op: idaapi.op_t) -> bool:
|
||||
"""Check is an operand has been marked as an offset (by auto-analysis or manually)"""
|
||||
flags = idaapi.get_flags(insn.ea)
|
||||
return ida_bytes.is_off(flags, op.n)
|
||||
|
||||
|
||||
def is_sp_modified(insn):
|
||||
def is_sp_modified(insn: idaapi.insn_t) -> bool:
|
||||
"""determine if instruction modifies SP, ESP, RSP"""
|
||||
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,)):
|
||||
if op.reg == idautils.procregs.sp.reg and is_op_write(insn, op):
|
||||
@@ -257,7 +259,7 @@ def is_sp_modified(insn):
|
||||
return False
|
||||
|
||||
|
||||
def is_bp_modified(insn):
|
||||
def is_bp_modified(insn: idaapi.insn_t) -> bool:
|
||||
"""check if instruction modifies BP, EBP, RBP"""
|
||||
for op in get_insn_ops(insn, target_ops=(idaapi.o_reg,)):
|
||||
if op.reg == idautils.procregs.bp.reg and is_op_write(insn, op):
|
||||
@@ -266,12 +268,12 @@ def is_bp_modified(insn):
|
||||
return False
|
||||
|
||||
|
||||
def is_frame_register(reg):
|
||||
def is_frame_register(reg: int) -> bool:
|
||||
"""check if register is sp or bp"""
|
||||
return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg)
|
||||
|
||||
|
||||
def get_insn_ops(insn, target_ops=()):
|
||||
def get_insn_ops(insn: idaapi.insn_t, target_ops: Tuple[Any] = None) -> idaapi.op_t:
|
||||
"""yield op_t for instruction, filter on type if specified"""
|
||||
for op in insn.ops:
|
||||
if op.type == idaapi.o_void:
|
||||
@@ -282,12 +284,12 @@ def get_insn_ops(insn, target_ops=()):
|
||||
yield op
|
||||
|
||||
|
||||
def is_op_stack_var(ea, index):
|
||||
def is_op_stack_var(ea: int, index: int) -> bool:
|
||||
"""check if operand is a stack variable"""
|
||||
return idaapi.is_stkvar(idaapi.get_flags(ea), index)
|
||||
|
||||
|
||||
def mask_op_val(op):
|
||||
def mask_op_val(op: idaapi.op_t) -> int:
|
||||
"""mask value by data type
|
||||
|
||||
necessary due to a bug in AMD64
|
||||
@@ -307,26 +309,18 @@ def mask_op_val(op):
|
||||
return masks.get(op.dtype, op.value) & op.value
|
||||
|
||||
|
||||
def is_function_recursive(f):
|
||||
"""check if function is recursive
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
"""
|
||||
def is_function_recursive(f: idaapi.func_t) -> bool:
|
||||
"""check if function is recursive"""
|
||||
for ref in idautils.CodeRefsTo(f.start_ea, True):
|
||||
if f.contains(ref):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_basic_block_tight_loop(bb):
|
||||
def is_basic_block_tight_loop(bb: idaapi.BasicBlock) -> bool:
|
||||
"""check basic block loops to self
|
||||
|
||||
true if last instruction in basic block branches to basic block start
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
"""
|
||||
bb_end = idc.prev_head(bb.end_ea)
|
||||
if bb.start_ea < bb_end:
|
||||
@@ -336,7 +330,7 @@ def is_basic_block_tight_loop(bb):
|
||||
return False
|
||||
|
||||
|
||||
def find_data_reference_from_insn(insn, max_depth=10):
|
||||
def find_data_reference_from_insn(insn: idaapi.insn_t, max_depth: int = 10) -> int:
|
||||
"""search for data reference from instruction, return address of instruction if no reference exists"""
|
||||
depth = 0
|
||||
ea = insn.ea
|
||||
@@ -366,24 +360,18 @@ def find_data_reference_from_insn(insn, max_depth=10):
|
||||
return ea
|
||||
|
||||
|
||||
def get_function_blocks(f):
|
||||
"""yield basic blocks contained in specified function
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
yield:
|
||||
block (IDA BasicBlock)
|
||||
"""
|
||||
def get_function_blocks(f: idaapi.func_t) -> Iterator[idaapi.BasicBlock]:
|
||||
"""yield basic blocks contained in specified function"""
|
||||
# leverage idaapi.FC_NOEXT flag to ignore useless external blocks referenced by the function
|
||||
for block in idaapi.FlowChart(f, flags=(idaapi.FC_PREDS | idaapi.FC_NOEXT)):
|
||||
yield block
|
||||
|
||||
|
||||
def is_basic_block_return(bb):
|
||||
def is_basic_block_return(bb: idaapi.BasicBlock) -> bool:
|
||||
"""check if basic block is return block"""
|
||||
return bb.type == idaapi.fcb_ret
|
||||
|
||||
|
||||
def has_sib(oper) -> bool:
|
||||
def has_sib(oper: idaapi.op_t) -> bool:
|
||||
# via: https://reverseengineering.stackexchange.com/a/14300
|
||||
return oper.specflag1 == 1
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Any, Dict, Tuple, Iterator
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
@@ -13,20 +14,22 @@ import idautils
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
# byte range within the first and returning basic blocks, this helps to reduce FP features
|
||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
|
||||
def get_imports(ctx):
|
||||
def get_imports(ctx: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if "imports_cache" not in ctx:
|
||||
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports()
|
||||
return ctx["imports_cache"]
|
||||
|
||||
|
||||
def check_for_api_call(ctx, insn):
|
||||
def check_for_api_call(ctx: Dict[str, Any], insn: idaapi.insn_t) -> Iterator[str]:
|
||||
"""check instruction for API call"""
|
||||
info = ()
|
||||
ref = insn.ea
|
||||
@@ -55,24 +58,22 @@ def check_for_api_call(ctx, insn):
|
||||
yield "%s.%s" % (info[0], info[1])
|
||||
|
||||
|
||||
def extract_insn_api_features(f, bb, insn):
|
||||
"""parse instruction API features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction API features
|
||||
|
||||
example:
|
||||
call dword [0x00473038]
|
||||
call dword [0x00473038]
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
if not insn.get_canon_mnem() in ("call", "jmp"):
|
||||
return
|
||||
|
||||
for api in check_for_api_call(f.ctx, insn):
|
||||
for api in check_for_api_call(fh.ctx, insn):
|
||||
dll, _, symbol = api.rpartition(".")
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield API(name), insn.ea
|
||||
yield API(name), ih.address
|
||||
|
||||
# extract IDA/FLIRT recognized API functions
|
||||
targets = tuple(idautils.CodeRefsFrom(insn.ea, False))
|
||||
@@ -87,26 +88,25 @@ def extract_insn_api_features(f, bb, insn):
|
||||
|
||||
if target_func.flags & idaapi.FUNC_LIB:
|
||||
name = idaapi.get_name(target_func.start_ea)
|
||||
yield API(name), insn.ea
|
||||
yield API(name), ih.address
|
||||
if name.startswith("_"):
|
||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||
# extract features for both the mangled and un-mangled representations.
|
||||
# e.g. `_fwrite` -> `fwrite`
|
||||
# see: https://stackoverflow.com/a/2628384/87207
|
||||
yield API(name[1:]), insn.ea
|
||||
yield API(name[1:]), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(f, bb, insn):
|
||||
"""parse instruction number features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction number features
|
||||
example:
|
||||
push 3136B0h ; dwControlCode
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
if idaapi.is_ret_insn(insn):
|
||||
# skip things like:
|
||||
# .text:0042250E retn 8
|
||||
@@ -132,8 +132,8 @@ def extract_insn_number_features(f, bb, insn):
|
||||
else:
|
||||
const = op.addr
|
||||
|
||||
yield Number(const), insn.ea
|
||||
yield OperandNumber(i, const), insn.ea
|
||||
yield Number(const), ih.address
|
||||
yield OperandNumber(i, const), ih.address
|
||||
|
||||
if insn.itype == idaapi.NN_add and 0 < const < MAX_STRUCTURE_SIZE and op.type == idaapi.o_imm:
|
||||
# for pattern like:
|
||||
@@ -141,21 +141,18 @@ def extract_insn_number_features(f, bb, insn):
|
||||
# add eax, 0x10
|
||||
#
|
||||
# assume 0x10 is also an offset (imagine eax is a pointer).
|
||||
yield Offset(const), insn.ea
|
||||
yield OperandOffset(i, const), insn.ea
|
||||
yield Offset(const), ih.address
|
||||
yield OperandOffset(i, const), ih.address
|
||||
|
||||
|
||||
def extract_insn_bytes_features(f, bb, insn):
|
||||
"""parse referenced byte sequences
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse referenced byte sequences
|
||||
example:
|
||||
push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
if idaapi.is_call_insn(insn):
|
||||
return
|
||||
|
||||
@@ -163,38 +160,38 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
if ref != insn.ea:
|
||||
extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE)
|
||||
if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes):
|
||||
yield Bytes(extracted_bytes), insn.ea
|
||||
yield Bytes(extracted_bytes), ih.address
|
||||
|
||||
|
||||
def extract_insn_string_features(f, bb, insn):
|
||||
"""parse instruction string features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
def extract_insn_string_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction string features
|
||||
|
||||
example:
|
||||
push offset aAcr ; "ACR > "
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||
if ref != insn.ea:
|
||||
found = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||
if found:
|
||||
yield String(found), insn.ea
|
||||
yield String(found), ih.address
|
||||
|
||||
|
||||
def extract_insn_offset_features(f, bb, insn):
|
||||
"""parse instruction structure offset features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
def extract_insn_offset_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction structure offset features
|
||||
|
||||
example:
|
||||
.text:0040112F cmp [esi+4], ebx
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
for i, op in enumerate(insn.ops):
|
||||
if op.type == idaapi.o_void:
|
||||
break
|
||||
@@ -215,8 +212,8 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
# https://stackoverflow.com/questions/31853189/x86-64-assembly-why-displacement-not-64-bits
|
||||
op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)
|
||||
|
||||
yield Offset(op_off), insn.ea
|
||||
yield OperandOffset(i, op_off), insn.ea
|
||||
yield Offset(op_off), ih.address
|
||||
yield OperandOffset(i, op_off), ih.address
|
||||
|
||||
if (
|
||||
insn.itype == idaapi.NN_lea
|
||||
@@ -234,12 +231,13 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
# lea eax, [ebx + 1]
|
||||
#
|
||||
# assume 1 is also an offset (imagine ebx is a zero register).
|
||||
yield Number(op_off), insn.ea
|
||||
yield OperandNumber(i, op_off), insn.ea
|
||||
yield Number(op_off), ih.address
|
||||
yield OperandNumber(i, op_off), ih.address
|
||||
|
||||
|
||||
def contains_stack_cookie_keywords(s):
|
||||
"""check if string contains stack cookie keywords
|
||||
def contains_stack_cookie_keywords(s: str) -> bool:
|
||||
"""
|
||||
check if string contains stack cookie keywords
|
||||
|
||||
Examples:
|
||||
xor ecx, ebp ; StackCookie
|
||||
@@ -253,7 +251,7 @@ def contains_stack_cookie_keywords(s):
|
||||
return any(keyword in s for keyword in ("stack", "security"))
|
||||
|
||||
|
||||
def bb_stack_cookie_registers(bb):
|
||||
def bb_stack_cookie_registers(bb: idaapi.BasicBlock) -> Iterator[int]:
|
||||
"""scan basic block for stack cookie operations
|
||||
|
||||
yield registers ids that may have been used for stack cookie operations
|
||||
@@ -287,7 +285,7 @@ def bb_stack_cookie_registers(bb):
|
||||
yield op.reg
|
||||
|
||||
|
||||
def is_nzxor_stack_cookie_delta(f, bb, insn):
|
||||
def is_nzxor_stack_cookie_delta(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.insn_t) -> bool:
|
||||
"""check if nzxor exists within stack cookie delta"""
|
||||
# security cookie check should use SP or BP
|
||||
if not capa.features.extractors.ida.helpers.is_frame_register(insn.Op2.reg):
|
||||
@@ -310,7 +308,7 @@ def is_nzxor_stack_cookie_delta(f, bb, insn):
|
||||
return False
|
||||
|
||||
|
||||
def is_nzxor_stack_cookie(f, bb, insn):
|
||||
def is_nzxor_stack_cookie(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi.insn_t) -> bool:
|
||||
"""check if nzxor is related to stack cookie"""
|
||||
if contains_stack_cookie_keywords(idaapi.get_cmt(insn.ea, False)):
|
||||
# Example:
|
||||
@@ -327,48 +325,49 @@ def is_nzxor_stack_cookie(f, bb, insn):
|
||||
return False
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
"""parse instruction non-zeroing XOR instruction
|
||||
|
||||
ignore expected non-zeroing XORs, e.g. security cookies
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse instruction non-zeroing XOR instruction
|
||||
ignore expected non-zeroing XORs, e.g. security cookies
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor):
|
||||
return
|
||||
if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2):
|
||||
return
|
||||
if is_nzxor_stack_cookie(f, bb, insn):
|
||||
if is_nzxor_stack_cookie(fh.inner, bbh.inner, insn):
|
||||
return
|
||||
yield Characteristic("nzxor"), insn.ea
|
||||
yield Characteristic("nzxor"), ih.address
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(f, bb, insn):
|
||||
"""parse instruction mnemonic features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
"""
|
||||
yield Mnemonic(idc.print_insn_mnem(insn.ea)), insn.ea
|
||||
def extract_insn_mnemonic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction mnemonic features"""
|
||||
yield Mnemonic(idc.print_insn_mnem(ih.inner.ea)), ih.address
|
||||
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn):
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
if not idaapi.is_call_insn(insn):
|
||||
return
|
||||
|
||||
if insn.ea + 5 == idc.get_operand_value(insn.ea, 0):
|
||||
yield Characteristic("call $+5"), insn.ea
|
||||
yield Characteristic("call $+5"), ih.address
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
def extract_insn_peb_access_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction peb access
|
||||
|
||||
fs:[0x30] on x86, gs:[0x60] on x64
|
||||
@@ -376,6 +375,8 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
TODO:
|
||||
IDA should be able to do this..
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
if insn.itype not in (idaapi.NN_push, idaapi.NN_mov):
|
||||
return
|
||||
|
||||
@@ -387,15 +388,19 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
|
||||
if " fs:30h" in disasm or " gs:60h" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
yield Characteristic("peb access"), insn.ea
|
||||
yield Characteristic("peb access"), ih.address
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(f, bb, insn):
|
||||
def extract_insn_segment_access_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse instruction fs or gs access
|
||||
|
||||
TODO:
|
||||
IDA should be able to do this...
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
if all(map(lambda op: op.type != idaapi.o_mem, insn.ops)):
|
||||
# try to optimize for only memory references
|
||||
return
|
||||
@@ -404,23 +409,21 @@ def extract_insn_segment_access_features(f, bb, insn):
|
||||
|
||||
if " fs:" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
yield Characteristic("fs access"), insn.ea
|
||||
yield Characteristic("fs access"), ih.address
|
||||
|
||||
if " gs:" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
yield Characteristic("gs access"), insn.ea
|
||||
yield Characteristic("gs access"), ih.address
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries
|
||||
def extract_insn_cross_section_cflow(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
"""
|
||||
for ref in idautils.CodeRefsFrom(insn.ea, False):
|
||||
if ref in get_imports(f.ctx).keys():
|
||||
if ref in get_imports(fh.ctx).keys():
|
||||
# ignore API calls
|
||||
continue
|
||||
if not idaapi.getseg(ref):
|
||||
@@ -428,50 +431,40 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
continue
|
||||
if idaapi.getseg(ref) == idaapi.getseg(insn.ea):
|
||||
continue
|
||||
yield Characteristic("cross section flow"), insn.ea
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
|
||||
|
||||
def extract_function_calls_from(f, bb, insn):
|
||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract functions calls from features
|
||||
|
||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
if idaapi.is_call_insn(insn):
|
||||
for ref in idautils.CodeRefsFrom(insn.ea, False):
|
||||
yield Characteristic("calls from"), ref
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(ref)
|
||||
|
||||
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
def extract_function_indirect_call_characteristic_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
|
||||
most relevant at the function or basic block scope;
|
||||
however, its most efficient to extract at the instruction scope
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
"""
|
||||
insn: idaapi.insn_t = ih.inner
|
||||
|
||||
if idaapi.is_call_insn(insn) and idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ):
|
||||
yield Characteristic("indirect call"), insn.ea
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
|
||||
|
||||
def extract_features(f, bb, insn):
|
||||
"""extract instruction features
|
||||
|
||||
args:
|
||||
f (IDA func_t)
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
"""
|
||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for (feature, ea) in inst_handler(f, bb, insn):
|
||||
for (feature, ea) in inst_handler(f, bbh, insn):
|
||||
yield feature, ea
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
from typing import Dict, List, Tuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
|
||||
|
||||
@dataclass
|
||||
class InstructionFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class BasicBlockFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
instructions: Dict[Address, InstructionFeatures]
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionFeatures:
|
||||
features: List[Tuple[Address, Feature]]
|
||||
basic_blocks: Dict[Address, BasicBlockFeatures]
|
||||
|
||||
|
||||
@dataclass
|
||||
class NullFeatureExtractor(FeatureExtractor):
|
||||
"""
|
||||
An extractor that extracts some user-provided features.
|
||||
|
||||
This is useful for testing, as we can provide expected values and see if matching works.
|
||||
"""
|
||||
|
||||
base_address: Address
|
||||
global_features: List[Feature]
|
||||
file_features: List[Tuple[Address, Feature]]
|
||||
functions: Dict[Address, FunctionFeatures]
|
||||
|
||||
def get_base_address(self):
|
||||
return self.base_address
|
||||
|
||||
def extract_global_features(self):
|
||||
for address, feature in self.global_features:
|
||||
yield feature, address
|
||||
|
||||
def extract_file_features(self):
|
||||
for address, feature in self.file_features:
|
||||
yield feature, address
|
||||
|
||||
def get_functions(self):
|
||||
for address in sorted(self.functions.keys()):
|
||||
yield FunctionHandle(address, None)
|
||||
|
||||
def extract_function_features(self, f):
|
||||
for address, feature in self.functions.get(f.address, {}).features:
|
||||
yield feature, address
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
for address in sorted(self.functions.get(f.address, {}).basic_blocks.keys()):
|
||||
yield BBHandle(address, None)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
for address, feature in self.functions.get(f.address, {}).basic_blocks.get(bb.address, {}).features:
|
||||
yield feature, address
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
for address in sorted(self.functions.get(f.address, {}).basic_blocks.get(bb.address, {}).instructions.keys()):
|
||||
yield InsnHandle(address, None)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for address, feature in (
|
||||
self.functions.get(f.address, {})
|
||||
.basic_blocks.get(bb.address, {})
|
||||
.instructions.get(insn.address, {})
|
||||
.features
|
||||
):
|
||||
yield feature, address
|
||||
@@ -17,6 +17,7 @@ import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -24,7 +25,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def extract_file_embedded_pe(buf, **kwargs):
|
||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, 1):
|
||||
yield Characteristic("embedded pe"), offset
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
|
||||
|
||||
|
||||
def extract_file_export_names(pe, **kwargs):
|
||||
@@ -39,7 +40,7 @@ def extract_file_export_names(pe, **kwargs):
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
va = base_address + export.address
|
||||
yield Export(name), va
|
||||
yield Export(name), AbsoluteVirtualAddress(va)
|
||||
|
||||
|
||||
def extract_file_import_names(pe, **kwargs):
|
||||
@@ -71,7 +72,7 @@ def extract_file_import_names(pe, **kwargs):
|
||||
continue
|
||||
|
||||
for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
|
||||
yield Import(name), imp.address
|
||||
yield Import(name), AbsoluteVirtualAddress(imp.address)
|
||||
|
||||
|
||||
def extract_file_section_names(pe, **kwargs):
|
||||
@@ -83,7 +84,7 @@ def extract_file_section_names(pe, **kwargs):
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
yield Section(name), base_address + section.VirtualAddress
|
||||
yield Section(name), AbsoluteVirtualAddress(base_address + section.VirtualAddress)
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
@@ -103,18 +104,18 @@ def extract_file_function_names(**kwargs):
|
||||
def extract_file_os(**kwargs):
|
||||
# assuming PE -> Windows
|
||||
# though i suppose they're also used by UEFI
|
||||
yield OS(OS_WINDOWS), 0x0
|
||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_format(**kwargs):
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
yield Format(FORMAT_PE), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(pe, **kwargs):
|
||||
if pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_I386"]:
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_AMD64"]:
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
logger.warning("unsupported architecture: %s", pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine])
|
||||
|
||||
@@ -176,7 +177,7 @@ class PefileFeatureExtractor(FeatureExtractor):
|
||||
self.pe = pefile.PE(path)
|
||||
|
||||
def get_base_address(self):
|
||||
return self.pe.OPTIONAL_HEADER.ImageBase
|
||||
return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase)
|
||||
|
||||
def extract_global_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from capa.features.common import Characteristic
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address
|
||||
from capa.features.basicblock import BasicBlock
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
|
||||
|
||||
def _bb_has_tight_loop(f, bb):
|
||||
@@ -13,10 +16,10 @@ def _bb_has_tight_loop(f, bb):
|
||||
return bb.offset in f.blockrefs[bb.offset] if bb.offset in f.blockrefs else False
|
||||
|
||||
|
||||
def extract_bb_tight_loop(f, bb):
|
||||
def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""check basic block for tight loop indicators"""
|
||||
if _bb_has_tight_loop(f, bb):
|
||||
yield Characteristic("tight loop"), bb.offset
|
||||
if _bb_has_tight_loop(f.inner, bb.inner):
|
||||
yield Characteristic("tight loop"), bb.address
|
||||
|
||||
|
||||
def _bb_has_stackstring(f, bb):
|
||||
@@ -37,10 +40,10 @@ def get_operands(smda_ins):
|
||||
return [o.strip() for o in smda_ins.operands.split(",")]
|
||||
|
||||
|
||||
def extract_stackstring(f, bb):
|
||||
def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""check basic block for stackstring indicators"""
|
||||
if _bb_has_stackstring(f, bb):
|
||||
yield Characteristic("stack string"), bb.offset
|
||||
if _bb_has_stackstring(f.inner, bb.inner):
|
||||
yield Characteristic("stack string"), bb.address
|
||||
|
||||
|
||||
def is_mov_imm_to_stack(smda_ins):
|
||||
@@ -107,21 +110,21 @@ def get_printable_len(instr):
|
||||
return 0
|
||||
|
||||
|
||||
def extract_features(f, bb):
|
||||
def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given basic block.
|
||||
|
||||
args:
|
||||
f (smda.common.SmdaFunction): the function from which to extract features
|
||||
bb (smda.common.SmdaBasicBlock): the basic block to process.
|
||||
f: the function from which to extract features
|
||||
bb: the basic block to process.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this basic block.
|
||||
Tuple[Feature, Address]: the features and their location found in this basic block.
|
||||
"""
|
||||
yield BasicBlock(), bb.offset
|
||||
yield BasicBlock(), bb.address
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, va in bb_handler(f, bb):
|
||||
yield feature, va
|
||||
for feature, addr in bb_handler(f, bb):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
BASIC_BLOCK_HANDLERS = (
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from typing import List, Tuple
|
||||
|
||||
from smda.common.SmdaReport import SmdaReport
|
||||
|
||||
import capa.features.extractors.common
|
||||
@@ -6,7 +8,9 @@ import capa.features.extractors.smda.insn
|
||||
import capa.features.extractors.smda.global_
|
||||
import capa.features.extractors.smda.function
|
||||
import capa.features.extractors.smda.basicblock
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
|
||||
|
||||
class SmdaFeatureExtractor(FeatureExtractor):
|
||||
@@ -18,12 +22,12 @@ class SmdaFeatureExtractor(FeatureExtractor):
|
||||
self.buf = f.read()
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features = []
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
|
||||
self.global_features.extend(capa.features.extractors.smda.global_.extract_arch(self.smda_report))
|
||||
|
||||
def get_base_address(self):
|
||||
return self.smda_report.base_addr
|
||||
return AbsoluteVirtualAddress(self.smda_report.base_addr)
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
@@ -33,21 +37,21 @@ class SmdaFeatureExtractor(FeatureExtractor):
|
||||
|
||||
def get_functions(self):
|
||||
for function in self.smda_report.getFunctions():
|
||||
yield function
|
||||
yield FunctionHandle(address=AbsoluteVirtualAddress(function.offset), inner=function)
|
||||
|
||||
def extract_function_features(self, f):
|
||||
yield from capa.features.extractors.smda.function.extract_features(f)
|
||||
def extract_function_features(self, fh):
|
||||
yield from capa.features.extractors.smda.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
for bb in f.getBlocks():
|
||||
yield bb
|
||||
def get_basic_blocks(self, fh):
|
||||
for bb in fh.inner.getBlocks():
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.offset), inner=bb)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
yield from capa.features.extractors.smda.basicblock.extract_features(f, bb)
|
||||
def extract_basic_block_features(self, fh, bbh):
|
||||
yield from capa.features.extractors.smda.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
for smda_ins in bb.getInstructions():
|
||||
yield smda_ins
|
||||
def get_instructions(self, fh, bbh):
|
||||
for smda_ins in bbh.inner.getInstructions():
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(smda_ins.offset), inner=smda_ins)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
yield from capa.features.extractors.smda.insn.extract_features(f, bb, insn)
|
||||
def extract_insn_features(self, fh, bbh, ih):
|
||||
yield from capa.features.extractors.smda.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
@@ -6,11 +6,12 @@ import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import String, Characteristic
|
||||
from capa.features.address import FileOffsetAddress, AbsoluteVirtualAddress
|
||||
|
||||
|
||||
def extract_file_embedded_pe(buf, **kwargs):
|
||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, 1):
|
||||
yield Characteristic("embedded pe"), offset
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
|
||||
|
||||
|
||||
def extract_file_export_names(buf, **kwargs):
|
||||
@@ -18,7 +19,7 @@ def extract_file_export_names(buf, **kwargs):
|
||||
|
||||
if lief_binary is not None:
|
||||
for function in lief_binary.exported_functions:
|
||||
yield Export(function.name), function.address
|
||||
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
||||
|
||||
|
||||
def extract_file_import_names(smda_report, buf):
|
||||
@@ -33,10 +34,10 @@ def extract_file_import_names(smda_report, buf):
|
||||
va = func.iat_address + smda_report.base_addr
|
||||
if func.name:
|
||||
for name in capa.features.extractors.helpers.generate_symbols(library_name, func.name):
|
||||
yield Import(name), va
|
||||
yield Import(name), AbsoluteVirtualAddress(va)
|
||||
elif func.is_ordinal:
|
||||
for name in capa.features.extractors.helpers.generate_symbols(library_name, "#%s" % func.ordinal):
|
||||
yield Import(name), va
|
||||
yield Import(name), AbsoluteVirtualAddress(va)
|
||||
|
||||
|
||||
def extract_file_section_names(buf, **kwargs):
|
||||
@@ -46,7 +47,7 @@ def extract_file_section_names(buf, **kwargs):
|
||||
if lief_binary and lief_binary.sections:
|
||||
base_address = lief_binary.optional_header.imagebase
|
||||
for section in lief_binary.sections:
|
||||
yield Section(section.name), base_address + section.virtual_address
|
||||
yield Section(section.name), AbsoluteVirtualAddress(base_address + section.virtual_address)
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
@@ -54,10 +55,10 @@ def extract_file_strings(buf, **kwargs):
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
yield String(s.s), FileOffsetAddress(s.offset)
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
yield String(s.s), FileOffsetAddress(s.offset)
|
||||
|
||||
|
||||
def extract_file_function_names(smda_report, **kwargs):
|
||||
@@ -87,8 +88,8 @@ def extract_features(smda_report, buf):
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(smda_report=smda_report, buf=buf):
|
||||
yield feature, va
|
||||
for feature, addr in file_handler(smda_report=smda_report, buf=buf):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
|
||||
@@ -1,38 +1,42 @@
|
||||
from capa.features.common import Characteristic
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors import loops
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
for inref in f.inrefs:
|
||||
yield Characteristic("calls to"), inref
|
||||
def extract_function_calls_to(f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
for inref in f.inner.inrefs:
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(inref)
|
||||
|
||||
|
||||
def extract_function_loop(f):
|
||||
def extract_function_loop(f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse if a function has a loop
|
||||
"""
|
||||
edges = []
|
||||
for bb_from, bb_tos in f.blockrefs.items():
|
||||
for bb_from, bb_tos in f.inner.blockrefs.items():
|
||||
for bb_to in bb_tos:
|
||||
edges.append((bb_from, bb_to))
|
||||
|
||||
if edges and loops.has_loop(edges):
|
||||
yield Characteristic("loop"), f.offset
|
||||
yield Characteristic("loop"), f.address
|
||||
|
||||
|
||||
def extract_features(f):
|
||||
def extract_features(f: FunctionHandle):
|
||||
"""
|
||||
extract features from the given function.
|
||||
|
||||
args:
|
||||
f (smda.common.SmdaFunction): the function from which to extract features
|
||||
f: the function from which to extract features
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this function.
|
||||
Tuple[Feature, Address]: the features and their location found in this function.
|
||||
"""
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, va in func_handler(f):
|
||||
yield feature, va
|
||||
for feature, addr in func_handler(f):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
|
||||
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch
|
||||
from capa.features.address import NO_ADDRESS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -8,9 +9,9 @@ logger = logging.getLogger(__name__)
|
||||
def extract_arch(smda_report):
|
||||
if smda_report.architecture == "intel":
|
||||
if smda_report.bitness == 32:
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
elif smda_report.bitness == 64:
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a new architecture (e.g. aarch64)
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import re
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from smda.common.SmdaReport import SmdaReport
|
||||
import smda
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
# byte range within the first and returning basic blocks, this helps to reduce FP features
|
||||
@@ -15,17 +18,20 @@ PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
|
||||
PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
|
||||
|
||||
|
||||
def extract_insn_api_features(f, bb, insn):
|
||||
def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse API features from the given instruction."""
|
||||
if insn.offset in f.apirefs:
|
||||
api_entry = f.apirefs[insn.offset]
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if ih.address in f.apirefs:
|
||||
api_entry = f.apirefs[ih.address]
|
||||
# reformat
|
||||
dll_name, api_name = api_entry.split("!")
|
||||
dll_name = dll_name.split(".")[0]
|
||||
dll_name = dll_name.lower()
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
|
||||
yield API(name), insn.offset
|
||||
elif insn.offset in f.outrefs:
|
||||
yield API(name), ih.address
|
||||
elif ih.address in f.outrefs:
|
||||
current_function = f
|
||||
current_instruction = insn
|
||||
for index in range(THUNK_CHAIN_DEPTH_DELTA):
|
||||
@@ -44,7 +50,7 @@ def extract_insn_api_features(f, bb, insn):
|
||||
dll_name = dll_name.split(".")[0]
|
||||
dll_name = dll_name.lower()
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll_name, api_name):
|
||||
yield API(name), insn.offset
|
||||
yield API(name), ih.address
|
||||
elif referenced_function.num_instructions == 1 and referenced_function.num_outrefs == 1:
|
||||
current_function = referenced_function
|
||||
current_instruction = [i for i in referenced_function.getInstructions()][0]
|
||||
@@ -52,11 +58,14 @@ def extract_insn_api_features(f, bb, insn):
|
||||
return
|
||||
|
||||
|
||||
def extract_insn_number_features(f, bb, insn):
|
||||
def extract_insn_number_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse number features from the given instruction."""
|
||||
# example:
|
||||
#
|
||||
# push 3136B0h ; dwControlCode
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
operands = [o.strip() for o in insn.operands.split(",")]
|
||||
if insn.mnemonic == "add" and operands[0] in ["esp", "rsp"]:
|
||||
# skip things like:
|
||||
@@ -72,8 +81,8 @@ def extract_insn_number_features(f, bb, insn):
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
yield Number(value), insn.offset
|
||||
yield OperandNumber(i, value), insn.offset
|
||||
yield Number(value), ih.address
|
||||
yield OperandNumber(i, value), ih.address
|
||||
|
||||
if insn.mnemonic == "add" and 0 < value < MAX_STRUCTURE_SIZE:
|
||||
# for pattern like:
|
||||
@@ -81,8 +90,8 @@ def extract_insn_number_features(f, bb, insn):
|
||||
# add eax, 0x10
|
||||
#
|
||||
# assume 0x10 is also an offset (imagine eax is a pointer).
|
||||
yield Offset(value), insn.offset
|
||||
yield OperandOffset(i, value), insn.offset
|
||||
yield Offset(value), ih.address
|
||||
yield OperandOffset(i, value), ih.address
|
||||
|
||||
|
||||
def read_bytes(smda_report, va, num_bytes=None):
|
||||
@@ -131,12 +140,15 @@ def derefs(smda_report, p):
|
||||
p = val
|
||||
|
||||
|
||||
def extract_insn_bytes_features(f, bb, insn):
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse byte sequence features from the given instruction.
|
||||
example:
|
||||
# push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
for data_ref in insn.getDataRefs():
|
||||
for v in derefs(f.smda_report, data_ref):
|
||||
bytes_read = read_bytes(f.smda_report, v)
|
||||
@@ -145,7 +157,7 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
if capa.features.extractors.helpers.all_zeros(bytes_read):
|
||||
continue
|
||||
|
||||
yield Bytes(bytes_read), insn.offset
|
||||
yield Bytes(bytes_read), ih.address
|
||||
|
||||
|
||||
def detect_ascii_len(smda_report, offset):
|
||||
@@ -189,24 +201,29 @@ def read_string(smda_report, offset):
|
||||
return read_bytes(smda_report, offset, ulen).decode("utf-16")
|
||||
|
||||
|
||||
def extract_insn_string_features(f, bb, insn):
|
||||
def extract_insn_string_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse string features from the given instruction."""
|
||||
# example:
|
||||
#
|
||||
# push offset aAcr ; "ACR > "
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
for data_ref in insn.getDataRefs():
|
||||
for v in derefs(f.smda_report, data_ref):
|
||||
string_read = read_string(f.smda_report, v)
|
||||
if string_read:
|
||||
yield String(string_read.rstrip("\x00")), insn.offset
|
||||
yield String(string_read.rstrip("\x00")), ih.address
|
||||
|
||||
|
||||
def extract_insn_offset_features(f, bb, insn):
|
||||
def extract_insn_offset_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse structure offset features from the given instruction."""
|
||||
# examples:
|
||||
#
|
||||
# mov eax, [esi + 4]
|
||||
# mov eax, [esi + ecx + 16384]
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
operands = [o.strip() for o in insn.operands.split(",")]
|
||||
for i, operand in enumerate(operands):
|
||||
if "esp" in operand or "ebp" in operand or "rbp" in operand:
|
||||
@@ -234,13 +251,13 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
# lea eax, [ebx + 1]
|
||||
#
|
||||
# assume 1 is also an offset (imagine ebx is a zero register).
|
||||
yield Number(number), insn.offset
|
||||
yield OperandNumber(i, number), insn.offset
|
||||
yield Number(number), ih.address
|
||||
yield OperandNumber(i, number), ih.address
|
||||
|
||||
continue
|
||||
|
||||
yield Offset(number), insn.offset
|
||||
yield OperandOffset(i, number), insn.offset
|
||||
yield Offset(number), ih.address
|
||||
yield OperandOffset(i, number), ih.address
|
||||
|
||||
|
||||
def is_security_cookie(f, bb, insn):
|
||||
@@ -264,11 +281,16 @@ def is_security_cookie(f, bb, insn):
|
||||
return False
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse non-zeroing XOR instruction from the given instruction.
|
||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||
"""
|
||||
f: smda.Function = fh.inner
|
||||
bb: smda.BasicBlock = bh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic not in ("xor", "xorpd", "xorps", "pxor"):
|
||||
return
|
||||
@@ -280,18 +302,20 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
if is_security_cookie(f, bb, insn):
|
||||
return
|
||||
|
||||
yield Characteristic("nzxor"), insn.offset
|
||||
yield Characteristic("nzxor"), ih.address
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(f, bb, insn):
|
||||
def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse mnemonic features from the given instruction."""
|
||||
yield Mnemonic(insn.mnemonic), insn.offset
|
||||
yield Mnemonic(ih.inner.mnemonic), ih.address
|
||||
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn):
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic != "call":
|
||||
return
|
||||
|
||||
@@ -299,13 +323,14 @@ def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn):
|
||||
return
|
||||
|
||||
if int(insn.operands, 16) == insn.offset + 5:
|
||||
yield Characteristic("call $+5"), insn.offset
|
||||
yield Characteristic("call $+5"), ih.address
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||
"""
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic not in ["push", "mov"]:
|
||||
return
|
||||
@@ -313,65 +338,75 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
operands = [o.strip() for o in insn.operands.split(",")]
|
||||
for operand in operands:
|
||||
if "fs:" in operand and "0x30" in operand:
|
||||
yield Characteristic("peb access"), insn.offset
|
||||
yield Characteristic("peb access"), ih.address
|
||||
elif "gs:" in operand and "0x60" in operand:
|
||||
yield Characteristic("peb access"), insn.offset
|
||||
yield Characteristic("peb access"), ih.address
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(f, bb, insn):
|
||||
def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse the instruction for access to fs or gs"""
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
operands = [o.strip() for o in insn.operands.split(",")]
|
||||
for operand in operands:
|
||||
if "fs:" in operand:
|
||||
yield Characteristic("fs access"), insn.offset
|
||||
yield Characteristic("fs access"), ih.address
|
||||
elif "gs:" in operand:
|
||||
yield Characteristic("gs access"), insn.offset
|
||||
yield Characteristic("gs access"), ih.address
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
||||
"""
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic in ["call", "jmp"]:
|
||||
if insn.offset in f.apirefs:
|
||||
if ih.address in f.apirefs:
|
||||
return
|
||||
|
||||
smda_report = insn.smda_function.smda_report
|
||||
if insn.offset in f.outrefs:
|
||||
for target in f.outrefs[insn.offset]:
|
||||
if smda_report.getSection(insn.offset) != smda_report.getSection(target):
|
||||
yield Characteristic("cross section flow"), insn.offset
|
||||
if ih.address in f.outrefs:
|
||||
for target in f.outrefs[ih.address]:
|
||||
if smda_report.getSection(ih.address) != smda_report.getSection(target):
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
elif insn.operands.startswith("0x"):
|
||||
target = int(insn.operands, 16)
|
||||
if smda_report.getSection(insn.offset) != smda_report.getSection(target):
|
||||
yield Characteristic("cross section flow"), insn.offset
|
||||
if smda_report.getSection(ih.address) != smda_report.getSection(target):
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
|
||||
|
||||
# this is a feature that's most relevant at the function scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_calls_from(f, bb, insn):
|
||||
def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
f: smda.Function = fh.inner
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic != "call":
|
||||
return
|
||||
|
||||
if insn.offset in f.outrefs:
|
||||
for outref in f.outrefs[insn.offset]:
|
||||
yield Characteristic("calls from"), outref
|
||||
if ih.address in f.outrefs:
|
||||
for outref in f.outrefs[ih.address]:
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(outref)
|
||||
|
||||
if outref == f.offset:
|
||||
# if we found a jump target and it's the function address
|
||||
# mark as recursive
|
||||
yield Characteristic("recursive call"), outref
|
||||
if insn.offset in f.apirefs:
|
||||
yield Characteristic("calls from"), insn.offset
|
||||
yield Characteristic("recursive call"), AbsoluteVirtualAddress(outref)
|
||||
if ih.address in f.apirefs:
|
||||
yield Characteristic("calls from"), ih.address
|
||||
|
||||
|
||||
# this is a feature that's most relevant at the function or basic block scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
"""
|
||||
insn: smda.Insn = ih.inner
|
||||
|
||||
if insn.mnemonic != "call":
|
||||
return
|
||||
if insn.operands.startswith("0x"):
|
||||
@@ -383,7 +418,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
# call edx
|
||||
# call dword ptr [eax+50h]
|
||||
# call qword ptr [rsp+78h]
|
||||
yield Characteristic("indirect call"), insn.offset
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
|
||||
|
||||
def extract_features(f, bb, insn):
|
||||
@@ -391,16 +426,16 @@ def extract_features(f, bb, insn):
|
||||
extract features from the given insn.
|
||||
|
||||
args:
|
||||
f (smda.common.SmdaFunction): the function to process.
|
||||
bb (smda.common.SmdaBasicBlock): the basic block to process.
|
||||
insn (smda.common.SmdaInstruction): the instruction to process.
|
||||
f: the function to process.
|
||||
bb: the basic block to process.
|
||||
insn: the instruction to process.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this insn.
|
||||
Tuple[Feature, Address]: the features and their location found in this insn.
|
||||
"""
|
||||
for insn_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, va in insn_handler(f, bb, insn):
|
||||
yield feature, va
|
||||
for feature, addr in insn_handler(f, bb, insn):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
INSTRUCTION_HANDLERS = (
|
||||
|
||||
@@ -8,27 +8,30 @@
|
||||
|
||||
import string
|
||||
import struct
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import envi
|
||||
import envi.archs.i386.disasm
|
||||
|
||||
from capa.features.common import Characteristic
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.basicblock import BasicBlock
|
||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||
|
||||
|
||||
def interface_extract_basic_block_XXX(f, bb):
|
||||
def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse features from the given basic block.
|
||||
|
||||
args:
|
||||
f (viv_utils.Function): the function to process.
|
||||
bb (viv_utils.BasicBlock): the basic block to process.
|
||||
f: the function to process.
|
||||
bb: the basic block to process.
|
||||
|
||||
yields:
|
||||
(Feature, int): the feature and the address at which its found.
|
||||
(Feature, Address): the feature and the address at which its found.
|
||||
"""
|
||||
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||
...
|
||||
|
||||
|
||||
def _bb_has_tight_loop(f, bb):
|
||||
@@ -44,10 +47,10 @@ def _bb_has_tight_loop(f, bb):
|
||||
return False
|
||||
|
||||
|
||||
def extract_bb_tight_loop(f, bb):
|
||||
def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""check basic block for tight loop indicators"""
|
||||
if _bb_has_tight_loop(f, bb):
|
||||
yield Characteristic("tight loop"), bb.va
|
||||
if _bb_has_tight_loop(f, bb.inner):
|
||||
yield Characteristic("tight loop"), bb.address
|
||||
|
||||
|
||||
def _bb_has_stackstring(f, bb):
|
||||
@@ -67,10 +70,10 @@ def _bb_has_stackstring(f, bb):
|
||||
return False
|
||||
|
||||
|
||||
def extract_stackstring(f, bb):
|
||||
def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""check basic block for stackstring indicators"""
|
||||
if _bb_has_stackstring(f, bb):
|
||||
yield Characteristic("stack string"), bb.va
|
||||
if _bb_has_stackstring(f, bb.inner):
|
||||
yield Characteristic("stack string"), bb.address
|
||||
|
||||
|
||||
def is_mov_imm_to_stack(instr: envi.archs.i386.disasm.i386Opcode) -> bool:
|
||||
@@ -143,7 +146,7 @@ def is_printable_utf16le(chars: bytes) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def extract_features(f, bb):
|
||||
def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given basic block.
|
||||
|
||||
@@ -154,10 +157,10 @@ def extract_features(f, bb):
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this basic block.
|
||||
"""
|
||||
yield BasicBlock(), bb.va
|
||||
yield BasicBlock(), AbsoluteVirtualAddress(bb.inner.va)
|
||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||
for feature, va in bb_handler(f, bb):
|
||||
yield feature, va
|
||||
for feature, addr in bb_handler(f, bb):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
BASIC_BLOCK_HANDLERS = (
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import List, Tuple, Iterator
|
||||
|
||||
import viv_utils
|
||||
import viv_utils.flirt
|
||||
@@ -16,24 +17,13 @@ import capa.features.extractors.viv.insn
|
||||
import capa.features.extractors.viv.global_
|
||||
import capa.features.extractors.viv.function
|
||||
import capa.features.extractors.viv.basicblock
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InstructionHandle:
|
||||
"""this acts like a vivisect.Opcode but with an __int__() method"""
|
||||
|
||||
def __init__(self, inner):
|
||||
self._inner = inner
|
||||
|
||||
def __int__(self):
|
||||
return self.va
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
|
||||
|
||||
class VivisectFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, vw, path):
|
||||
super(VivisectFeatureExtractor, self).__init__()
|
||||
@@ -43,13 +33,13 @@ class VivisectFeatureExtractor(FeatureExtractor):
|
||||
self.buf = f.read()
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features = []
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
|
||||
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
|
||||
|
||||
def get_base_address(self):
|
||||
# assume there is only one file loaded into the vw
|
||||
return list(self.vw.filemeta.values())[0]["imagebase"]
|
||||
return AbsoluteVirtualAddress(list(self.vw.filemeta.values())[0]["imagebase"])
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
@@ -57,28 +47,33 @@ class VivisectFeatureExtractor(FeatureExtractor):
|
||||
def extract_file_features(self):
|
||||
yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf)
|
||||
|
||||
def get_functions(self):
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
for va in sorted(self.vw.getFunctions()):
|
||||
yield viv_utils.Function(self.vw, va)
|
||||
yield FunctionHandle(address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va))
|
||||
|
||||
def extract_function_features(self, f):
|
||||
yield from capa.features.extractors.viv.function.extract_features(f)
|
||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.viv.function.extract_features(fh)
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
return f.basic_blocks
|
||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||
f: viv_utils.Function = fh.inner
|
||||
for bb in f.basic_blocks:
|
||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
yield from capa.features.extractors.viv.basicblock.extract_features(f, bb)
|
||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.viv.basicblock.extract_features(fh, bbh)
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||
bb: viv_utils.BasicBlock = bbh.inner
|
||||
for insn in bb.instructions:
|
||||
yield InstructionHandle(insn)
|
||||
yield InsnHandle(address=AbsoluteVirtualAddress(insn.va), inner=insn)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
yield from capa.features.extractors.viv.insn.extract_features(f, bb, insn)
|
||||
def extract_insn_features(
|
||||
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih)
|
||||
|
||||
def is_library_function(self, va):
|
||||
return viv_utils.flirt.is_library_function(self.vw, va)
|
||||
def is_library_function(self, addr):
|
||||
return viv_utils.flirt.is_library_function(self.vw, addr)
|
||||
|
||||
def get_function_name(self, va):
|
||||
return viv_utils.get_function_name(self.vw, va)
|
||||
def get_function_name(self, addr):
|
||||
return viv_utils.get_function_name(self.vw, addr)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import PE.carve as pe_carve # vivisect PE
|
||||
import viv_utils
|
||||
@@ -15,20 +16,21 @@ import capa.features.extractors.common
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section, FunctionName
|
||||
from capa.features.common import String, Characteristic
|
||||
from capa.features.common import String, Feature, Characteristic
|
||||
from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
|
||||
|
||||
def extract_file_embedded_pe(buf, **kwargs):
|
||||
def extract_file_embedded_pe(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
for offset, _ in pe_carve.carve(buf, 1):
|
||||
yield Characteristic("embedded pe"), offset
|
||||
yield Characteristic("embedded pe"), FileOffsetAddress(offset)
|
||||
|
||||
|
||||
def extract_file_export_names(vw, **kwargs):
|
||||
def extract_file_export_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
for va, _, name, _ in vw.getExports():
|
||||
yield Export(name), va
|
||||
yield Export(name), AbsoluteVirtualAddress(va)
|
||||
|
||||
|
||||
def extract_file_import_names(vw, **kwargs):
|
||||
def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract imported function names
|
||||
1. imports by ordinal:
|
||||
@@ -44,8 +46,9 @@ def extract_file_import_names(vw, **kwargs):
|
||||
# replace ord prefix with #
|
||||
impname = "#%s" % impname[len("ord") :]
|
||||
|
||||
addr = AbsoluteVirtualAddress(va)
|
||||
for name in capa.features.extractors.helpers.generate_symbols(modname, impname):
|
||||
yield Import(name), va
|
||||
yield Import(name), addr
|
||||
|
||||
|
||||
def is_viv_ord_impname(impname: str) -> bool:
|
||||
@@ -62,36 +65,37 @@ def is_viv_ord_impname(impname: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def extract_file_section_names(vw, **kwargs):
|
||||
def extract_file_section_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
for va, _, segname, _ in vw.getSegments():
|
||||
yield Section(segname), va
|
||||
yield Section(segname), AbsoluteVirtualAddress(va)
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_function_names(vw, **kwargs):
|
||||
def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
for va in sorted(vw.getFunctions()):
|
||||
addr = AbsoluteVirtualAddress(va)
|
||||
if viv_utils.flirt.is_library_function(vw, va):
|
||||
name = viv_utils.get_function_name(vw, va)
|
||||
yield FunctionName(name), va
|
||||
yield FunctionName(name), addr
|
||||
if name.startswith("_"):
|
||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||
# extract features for both the mangled and un-mangled representations.
|
||||
# e.g. `_fwrite` -> `fwrite`
|
||||
# see: https://stackoverflow.com/a/2628384/87207
|
||||
yield FunctionName(name[1:]), va
|
||||
yield FunctionName(name[1:]), addr
|
||||
|
||||
|
||||
def extract_file_format(buf, **kwargs):
|
||||
def extract_file_format(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from capa.features.extractors.common.extract_format(buf)
|
||||
|
||||
|
||||
def extract_features(vw, buf: bytes):
|
||||
def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract file features from given workspace
|
||||
|
||||
@@ -100,12 +104,12 @@ def extract_features(vw, buf: bytes):
|
||||
buf: the raw input file bytes
|
||||
|
||||
yields:
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
Tuple[Feature, Address]: a feature and its location.
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(vw=vw, buf=buf): # type: ignore
|
||||
yield feature, va
|
||||
for feature, addr in file_handler(vw=vw, buf=buf): # type: ignore
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
|
||||
@@ -5,36 +5,43 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import envi
|
||||
import viv_utils
|
||||
import vivisect.const
|
||||
|
||||
from capa.features.common import Characteristic
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors import loops
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
|
||||
def interface_extract_function_XXX(f):
|
||||
def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse features from the given function.
|
||||
|
||||
args:
|
||||
f (viv_utils.Function): the function to process.
|
||||
f: the function to process.
|
||||
|
||||
yields:
|
||||
(Feature, int): the feature and the address at which its found.
|
||||
(Feature, Address): the feature and the address at which its found.
|
||||
"""
|
||||
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||
...
|
||||
|
||||
|
||||
def extract_function_calls_to(f):
|
||||
def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
f: viv_utils.Function = fhandle.inner
|
||||
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
|
||||
yield Characteristic("calls to"), src
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(src)
|
||||
|
||||
|
||||
def extract_function_loop(f):
|
||||
def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse if a function has a loop
|
||||
"""
|
||||
f: viv_utils.Function = fhandle.inner
|
||||
|
||||
edges = []
|
||||
|
||||
for bb in f.basic_blocks:
|
||||
@@ -50,22 +57,22 @@ def extract_function_loop(f):
|
||||
edges.append((bb.va, bva))
|
||||
|
||||
if edges and loops.has_loop(edges):
|
||||
yield Characteristic("loop"), f.va
|
||||
yield Characteristic("loop"), fhandle.address
|
||||
|
||||
|
||||
def extract_features(f):
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given function.
|
||||
|
||||
args:
|
||||
f (viv_utils.Function): the function from which to extract features
|
||||
fh: the function handle from which to extract features
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this function.
|
||||
"""
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, va in func_handler(f):
|
||||
yield feature, va
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import envi.archs.i386
|
||||
import envi.archs.amd64
|
||||
|
||||
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch
|
||||
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch, Feature
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_arch(vw):
|
||||
def extract_arch(vw) -> Iterator[Tuple[Feature, Address]]:
|
||||
if isinstance(vw.arch, envi.archs.amd64.Amd64Module):
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||
|
||||
elif isinstance(vw.arch, envi.archs.i386.i386Module):
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
yield Arch(ARCH_I386), NO_ADDRESS
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import collections
|
||||
from typing import TYPE_CHECKING, Set, List, Deque, Tuple, Union, Optional
|
||||
from typing import Set, List, Deque, Tuple, Union, Optional
|
||||
|
||||
import envi
|
||||
import vivisect.const
|
||||
@@ -15,9 +15,6 @@ import envi.archs.i386.disasm
|
||||
import envi.archs.amd64.disasm
|
||||
from vivisect import VivWorkspace
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from capa.features.extractors.viv.extractor import InstructionHandle
|
||||
|
||||
# pull out consts for lookup performance
|
||||
i386RegOper = envi.archs.i386.disasm.i386RegOper
|
||||
i386ImmOper = envi.archs.i386.disasm.i386ImmOper
|
||||
@@ -135,16 +132,14 @@ def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Union[int
|
||||
raise NotFoundError()
|
||||
|
||||
|
||||
def is_indirect_call(vw: VivWorkspace, va: int, insn: Optional["InstructionHandle"] = None) -> bool:
|
||||
def is_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> bool:
|
||||
if insn is None:
|
||||
insn = vw.parseOpcode(va)
|
||||
|
||||
return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper)
|
||||
|
||||
|
||||
def resolve_indirect_call(
|
||||
vw: VivWorkspace, va: int, insn: Optional["InstructionHandle"] = None
|
||||
) -> Tuple[int, Optional[int]]:
|
||||
def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> Tuple[int, Optional[int]]:
|
||||
"""
|
||||
inspect the given indirect call instruction and attempt to resolve the target address.
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import List, Tuple, Callable, Iterator
|
||||
|
||||
import envi
|
||||
import envi.exc
|
||||
import viv_utils
|
||||
@@ -18,7 +20,9 @@ import envi.archs.amd64.disasm
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.viv.helpers
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
@@ -26,19 +30,21 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i
|
||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
|
||||
def interface_extract_instruction_XXX(f, bb, insn):
|
||||
def interface_extract_instruction_XXX(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse features from the given instruction.
|
||||
|
||||
args:
|
||||
f (viv_utils.Function): the function to process.
|
||||
bb (viv_utils.BasicBlock): the basic block to process.
|
||||
insn (vivisect...Instruction): the instruction to process.
|
||||
fh: the function handle to process.
|
||||
bbh: the basic block handle to process.
|
||||
ih: the instruction handle to process.
|
||||
|
||||
yields:
|
||||
(Feature, int): the feature and the address at which its found.
|
||||
(Feature, Address): the feature and the address at which its found.
|
||||
"""
|
||||
yield NotImplementedError("feature"), NotImplementedError("virtual address")
|
||||
...
|
||||
|
||||
|
||||
def get_imports(vw):
|
||||
@@ -58,12 +64,15 @@ def get_imports(vw):
|
||||
return imports
|
||||
|
||||
|
||||
def extract_insn_api_features(f, bb, insn):
|
||||
"""parse API features from the given instruction."""
|
||||
def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse API features from the given instruction.
|
||||
|
||||
# example:
|
||||
#
|
||||
# call dword [0x00473038]
|
||||
example:
|
||||
call dword [0x00473038]
|
||||
"""
|
||||
insn: envi.Opcode = ih.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
if insn.mnem not in ("call", "jmp"):
|
||||
return
|
||||
|
||||
@@ -80,7 +89,7 @@ def extract_insn_api_features(f, bb, insn):
|
||||
if target in imports:
|
||||
dll, symbol = imports[target]
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield API(name), insn.va
|
||||
yield API(name), ih.address
|
||||
|
||||
# call via thunk on x86,
|
||||
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
|
||||
@@ -102,20 +111,20 @@ def extract_insn_api_features(f, bb, insn):
|
||||
|
||||
if viv_utils.flirt.is_library_function(f.vw, target):
|
||||
name = viv_utils.get_function_name(f.vw, target)
|
||||
yield API(name), insn.va
|
||||
yield API(name), ih.address
|
||||
if name.startswith("_"):
|
||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||
# extract features for both the mangled and un-mangled representations.
|
||||
# e.g. `_fwrite` -> `fwrite`
|
||||
# see: https://stackoverflow.com/a/2628384/87207
|
||||
yield API(name[1:]), insn.va
|
||||
yield API(name[1:]), ih.address
|
||||
return
|
||||
|
||||
for _ in range(THUNK_CHAIN_DEPTH_DELTA):
|
||||
if target in imports:
|
||||
dll, symbol = imports[target]
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield API(name), insn.va
|
||||
yield API(name), ih.address
|
||||
|
||||
# if jump leads to an ENDBRANCH instruction, skip it
|
||||
if f.vw.getByteDef(target)[1].startswith(b"\xf3\x0f\x1e"):
|
||||
@@ -135,7 +144,7 @@ def extract_insn_api_features(f, bb, insn):
|
||||
if target in imports:
|
||||
dll, symbol = imports[target]
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield API(name), insn.va
|
||||
yield API(name), ih.address
|
||||
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
|
||||
try:
|
||||
@@ -152,7 +161,7 @@ def extract_insn_api_features(f, bb, insn):
|
||||
if target in imports:
|
||||
dll, symbol = imports[target]
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield API(name), insn.va
|
||||
yield API(name), ih.address
|
||||
|
||||
|
||||
def derefs(vw, p):
|
||||
@@ -225,12 +234,15 @@ def read_bytes(vw, va: int) -> bytes:
|
||||
raise
|
||||
|
||||
|
||||
def extract_insn_bytes_features(f, bb, insn):
|
||||
def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse byte sequence features from the given instruction.
|
||||
example:
|
||||
# push offset iid_004118d4_IShellLinkA ; riid
|
||||
"""
|
||||
insn: envi.Opcode = ih.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
|
||||
if insn.mnem == "call":
|
||||
return
|
||||
|
||||
@@ -259,7 +271,7 @@ def extract_insn_bytes_features(f, bb, insn):
|
||||
if capa.features.extractors.helpers.all_zeros(buf):
|
||||
continue
|
||||
|
||||
yield Bytes(buf), insn.va
|
||||
yield Bytes(buf), ih.address
|
||||
|
||||
|
||||
def read_string(vw, offset: int) -> str:
|
||||
@@ -321,11 +333,17 @@ def is_security_cookie(f, bb, insn) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
def extract_insn_nzxor_characteristic_features(
|
||||
fh: FunctionHandle, bbhandle: BBHandle, ih: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse non-zeroing XOR instruction from the given instruction.
|
||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||
"""
|
||||
insn: envi.Opcode = ih.inner
|
||||
bb: viv_utils.BasicBlock = bbhandle.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
|
||||
if insn.mnem not in ("xor", "xorpd", "xorps", "pxor"):
|
||||
return
|
||||
|
||||
@@ -335,37 +353,40 @@ def extract_insn_nzxor_characteristic_features(f, bb, insn):
|
||||
if is_security_cookie(f, bb, insn):
|
||||
return
|
||||
|
||||
yield Characteristic("nzxor"), insn.va
|
||||
yield Characteristic("nzxor"), ih.address
|
||||
|
||||
|
||||
def extract_insn_mnemonic_features(f, bb, insn):
|
||||
def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse mnemonic features from the given instruction."""
|
||||
yield Mnemonic(insn.mnem), insn.va
|
||||
yield Mnemonic(ih.inner.mnem), ih.address
|
||||
|
||||
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn):
|
||||
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse call $+5 instruction from the given instruction.
|
||||
"""
|
||||
insn: envi.Opcode = ih.inner
|
||||
|
||||
if insn.mnem != "call":
|
||||
return
|
||||
|
||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
|
||||
if insn.va + 5 == insn.opers[0].getOperValue(insn):
|
||||
yield Characteristic("call $+5"), insn.va
|
||||
yield Characteristic("call $+5"), ih.address
|
||||
|
||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper) or isinstance(
|
||||
insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper
|
||||
):
|
||||
if insn.va + 5 == insn.opers[0].getOperAddr(insn):
|
||||
yield Characteristic("call $+5"), insn.va
|
||||
yield Characteristic("call $+5"), ih.address
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||
"""
|
||||
# TODO handle where fs/gs are loaded into a register or onto the stack and used later
|
||||
insn: envi.Opcode = ih.inner
|
||||
|
||||
if insn.mnem not in ["push", "mov"]:
|
||||
return
|
||||
@@ -384,7 +405,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
if (isinstance(oper, envi.archs.i386.disasm.i386RegMemOper) and oper.disp == 0x30) or (
|
||||
isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper) and oper.imm == 0x30
|
||||
):
|
||||
yield Characteristic("peb access"), insn.va
|
||||
yield Characteristic("peb access"), ih.address
|
||||
elif "gs" in prefix:
|
||||
for oper in insn.opers:
|
||||
if (
|
||||
@@ -392,20 +413,22 @@ def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
or (isinstance(oper, envi.archs.amd64.disasm.i386SibOper) and oper.imm == 0x60)
|
||||
or (isinstance(oper, envi.archs.amd64.disasm.i386ImmMemOper) and oper.imm == 0x60)
|
||||
):
|
||||
yield Characteristic("peb access"), insn.va
|
||||
yield Characteristic("peb access"), ih.address
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
def extract_insn_segment_access_features(f, bb, insn):
|
||||
def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse the instruction for access to fs or gs"""
|
||||
insn: envi.Opcode = ih.inner
|
||||
|
||||
prefix = insn.getPrefixName()
|
||||
|
||||
if prefix == "fs":
|
||||
yield Characteristic("fs access"), insn.va
|
||||
yield Characteristic("fs access"), ih.address
|
||||
|
||||
if prefix == "gs":
|
||||
yield Characteristic("gs access"), insn.va
|
||||
yield Characteristic("gs access"), ih.address
|
||||
|
||||
|
||||
def get_section(vw, va: int):
|
||||
@@ -416,10 +439,13 @@ def get_section(vw, va: int):
|
||||
raise KeyError(va)
|
||||
|
||||
|
||||
def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
inspect the instruction for a CALL or JMP that crosses section boundaries.
|
||||
"""
|
||||
insn: envi.Opcode = ih.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
|
||||
for va, flags in insn.getBranches():
|
||||
if va is None:
|
||||
# va may be none for dynamic branches that haven't been resolved, such as `jmp eax`.
|
||||
@@ -446,7 +472,7 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
continue
|
||||
|
||||
if get_section(f.vw, insn.va) != get_section(f.vw, va):
|
||||
yield Characteristic("cross section flow"), insn.va
|
||||
yield Characteristic("cross section flow"), ih.address
|
||||
|
||||
except KeyError:
|
||||
continue
|
||||
@@ -454,7 +480,10 @@ def extract_insn_cross_section_cflow(f, bb, insn):
|
||||
|
||||
# this is a feature that's most relevant at the function scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_calls_from(f, bb, insn):
|
||||
def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
insn: envi.Opcode = ih.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
|
||||
if insn.mnem != "call":
|
||||
return
|
||||
|
||||
@@ -464,7 +493,7 @@ def extract_function_calls_from(f, bb, insn):
|
||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper):
|
||||
oper = insn.opers[0]
|
||||
target = oper.getOperAddr(insn)
|
||||
yield Characteristic("calls from"), target
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(target)
|
||||
|
||||
# call via thunk on x86,
|
||||
# see 9324d1a8ae37a36ae560c37448c9705a at 0x407985
|
||||
@@ -473,47 +502,54 @@ def extract_function_calls_from(f, bb, insn):
|
||||
# see Lab21-01.exe_:0x140001178
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper):
|
||||
target = insn.opers[0].getOperValue(insn)
|
||||
yield Characteristic("calls from"), target
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(target)
|
||||
|
||||
# call via IAT, x64
|
||||
elif isinstance(insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper):
|
||||
op = insn.opers[0]
|
||||
target = op.getOperAddr(insn)
|
||||
yield Characteristic("calls from"), target
|
||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(target)
|
||||
|
||||
if target and target == f.va:
|
||||
# if we found a jump target and it's the function address
|
||||
# mark as recursive
|
||||
yield Characteristic("recursive call"), target
|
||||
yield Characteristic("recursive call"), AbsoluteVirtualAddress(target)
|
||||
|
||||
|
||||
# this is a feature that's most relevant at the function or basic block scope,
|
||||
# however, its most efficient to extract at the instruction scope.
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, insn):
|
||||
def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4])
|
||||
does not include calls like => call ds:dword_ABD4974
|
||||
"""
|
||||
insn: envi.Opcode = ih.inner
|
||||
|
||||
if insn.mnem != "call":
|
||||
return
|
||||
|
||||
# Checks below work for x86 and x64
|
||||
if isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper):
|
||||
# call edx
|
||||
yield Characteristic("indirect call"), insn.va
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegMemOper):
|
||||
# call dword ptr [eax+50h]
|
||||
yield Characteristic("indirect call"), insn.va
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
elif isinstance(insn.opers[0], envi.archs.i386.disasm.i386SibOper):
|
||||
# call qword ptr [rsp+78h]
|
||||
yield Characteristic("indirect call"), insn.va
|
||||
yield Characteristic("indirect call"), ih.address
|
||||
|
||||
|
||||
def extract_op_number_features(f, bb, insn, i, oper):
|
||||
"""parse number features from the given operand."""
|
||||
# example:
|
||||
#
|
||||
# push 3136B0h ; dwControlCode
|
||||
def extract_op_number_features(
|
||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse number features from the given operand.
|
||||
|
||||
example:
|
||||
push 3136B0h ; dwControlCode
|
||||
"""
|
||||
insn: envi.Opcode = ih.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
|
||||
# this is for both x32 and x64
|
||||
if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper, envi.archs.i386.disasm.i386ImmMemOper)):
|
||||
@@ -536,8 +572,8 @@ def extract_op_number_features(f, bb, insn, i, oper):
|
||||
# .text:00401145 add esp, 0Ch
|
||||
return
|
||||
|
||||
yield Number(v), insn.va
|
||||
yield OperandNumber(i, v), insn.va
|
||||
yield Number(v), ih.address
|
||||
yield OperandNumber(i, v), ih.address
|
||||
|
||||
if insn.mnem == "add" and 0 < v < MAX_STRUCTURE_SIZE and isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
|
||||
# for pattern like:
|
||||
@@ -545,15 +581,19 @@ def extract_op_number_features(f, bb, insn, i, oper):
|
||||
# add eax, 0x10
|
||||
#
|
||||
# assume 0x10 is also an offset (imagine eax is a pointer).
|
||||
yield Offset(v), insn.va
|
||||
yield OperandOffset(i, v), insn.va
|
||||
yield Offset(v), ih.address
|
||||
yield OperandOffset(i, v), ih.address
|
||||
|
||||
|
||||
def extract_op_offset_features(f, bb, insn, i, oper):
|
||||
def extract_op_offset_features(
|
||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse structure offset features from the given operand."""
|
||||
# example:
|
||||
#
|
||||
# .text:0040112F cmp [esi+4], ebx
|
||||
insn: envi.Opcode = ih.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
|
||||
# this is for both x32 and x64
|
||||
# like [esi + 4]
|
||||
@@ -573,8 +613,8 @@ def extract_op_offset_features(f, bb, insn, i, oper):
|
||||
# viv already decodes offsets as signed
|
||||
v = oper.disp
|
||||
|
||||
yield Offset(v), insn.va
|
||||
yield OperandOffset(i, v), insn.va
|
||||
yield Offset(v), ih.address
|
||||
yield OperandOffset(i, v), ih.address
|
||||
|
||||
if insn.mnem == "lea" and i == 1 and not f.vw.probeMemory(v, 1, envi.memory.MM_READ):
|
||||
# for pattern like:
|
||||
@@ -582,8 +622,8 @@ def extract_op_offset_features(f, bb, insn, i, oper):
|
||||
# lea eax, [ebx + 1]
|
||||
#
|
||||
# assume 1 is also an offset (imagine ebx is a zero register).
|
||||
yield Number(v), insn.va
|
||||
yield OperandNumber(i, v), insn.va
|
||||
yield Number(v), ih.address
|
||||
yield OperandNumber(i, v), ih.address
|
||||
|
||||
# like: [esi + ecx + 16384]
|
||||
# reg ^ ^
|
||||
@@ -593,15 +633,19 @@ def extract_op_offset_features(f, bb, insn, i, oper):
|
||||
# viv already decodes offsets as signed
|
||||
v = oper.disp
|
||||
|
||||
yield Offset(v), insn.va
|
||||
yield OperandOffset(i, v), insn.va
|
||||
yield Offset(v), ih.address
|
||||
yield OperandOffset(i, v), ih.address
|
||||
|
||||
|
||||
def extract_op_string_features(f, bb, insn, i, oper):
|
||||
def extract_op_string_features(
|
||||
fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""parse string features from the given operand."""
|
||||
# example:
|
||||
#
|
||||
# push offset aAcr ; "ACR > "
|
||||
insn: envi.Opcode = ih.inner
|
||||
f: viv_utils.Function = fh.inner
|
||||
|
||||
if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
|
||||
v = oper.getOperValue(oper)
|
||||
@@ -622,24 +666,26 @@ def extract_op_string_features(f, bb, insn, i, oper):
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
yield String(s.rstrip("\x00")), insn.va
|
||||
yield String(s.rstrip("\x00")), ih.address
|
||||
|
||||
|
||||
def extract_operand_features(f, bb, insn):
|
||||
for i, oper in enumerate(insn.opers):
|
||||
def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
for i, oper in enumerate(insn.inner.opers):
|
||||
for op_handler in OPERAND_HANDLERS:
|
||||
for feature, va in op_handler(f, bb, insn, i, oper):
|
||||
yield feature, va
|
||||
for feature, addr in op_handler(f, bb, insn, i, oper):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
OPERAND_HANDLERS = (
|
||||
OPERAND_HANDLERS: List[
|
||||
Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[Tuple[Feature, Address]]]
|
||||
] = [
|
||||
extract_op_number_features,
|
||||
extract_op_offset_features,
|
||||
extract_op_string_features,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def extract_features(f, bb, insn):
|
||||
def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features from the given insn.
|
||||
|
||||
@@ -649,14 +695,14 @@ def extract_features(f, bb, insn):
|
||||
insn (vivisect...Instruction): the instruction to process.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: the features and their location found in this insn.
|
||||
Tuple[Feature, Address]: the features and their location found in this insn.
|
||||
"""
|
||||
for insn_handler in INSTRUCTION_HANDLERS:
|
||||
for feature, va in insn_handler(f, bb, insn):
|
||||
yield feature, va
|
||||
for feature, addr in insn_handler(f, bb, insn):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
INSTRUCTION_HANDLERS = (
|
||||
INSTRUCTION_HANDLERS: List[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[Tuple[Feature, Address]]]] = [
|
||||
extract_insn_api_features,
|
||||
extract_insn_bytes_features,
|
||||
extract_insn_nzxor_characteristic_features,
|
||||
@@ -668,4 +714,4 @@ INSTRUCTION_HANDLERS = (
|
||||
extract_function_calls_from,
|
||||
extract_function_indirect_call_characteristic_features,
|
||||
extract_operand_features,
|
||||
)
|
||||
]
|
||||
|
||||
@@ -1,283 +0,0 @@
|
||||
"""
|
||||
capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))`
|
||||
|
||||
json format:
|
||||
|
||||
{
|
||||
'version': 1,
|
||||
'base address': int(base address),
|
||||
'functions': {
|
||||
int(function va): {
|
||||
int(basic block va): [int(instruction va), ...]
|
||||
...
|
||||
},
|
||||
...
|
||||
},
|
||||
'scopes': {
|
||||
'global': [
|
||||
(str(name), [any(arg), ...], int(va), ()),
|
||||
...
|
||||
},
|
||||
'file': [
|
||||
(str(name), [any(arg), ...], int(va), ()),
|
||||
...
|
||||
},
|
||||
'function': [
|
||||
(str(name), [any(arg), ...], int(va), (int(function va), )),
|
||||
...
|
||||
],
|
||||
'basic block': [
|
||||
(str(name), [any(arg), ...], int(va), (int(function va),
|
||||
int(basic block va))),
|
||||
...
|
||||
],
|
||||
'instruction': [
|
||||
(str(name), [any(arg), ...], int(va), (int(function va),
|
||||
int(basic block va),
|
||||
int(instruction va))),
|
||||
...
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
import json
|
||||
import zlib
|
||||
import logging
|
||||
from typing import Dict, Type
|
||||
|
||||
import capa.helpers
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
import capa.features.basicblock
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.features.common import Feature
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def serialize_feature(feature):
|
||||
return feature.freeze_serialize()
|
||||
|
||||
|
||||
KNOWN_FEATURES: Dict[str, Type[Feature]] = {F.__name__: F for F in capa.features.common.Feature.__subclasses__()}
|
||||
KNOWN_FEATURES.update({F.__name__: F for F in capa.features.insn._Operand.__subclasses__()}) # type: ignore
|
||||
|
||||
|
||||
def deserialize_feature(doc):
|
||||
F = KNOWN_FEATURES[doc[0]]
|
||||
return F.freeze_deserialize(doc[1])
|
||||
|
||||
|
||||
def dumps(extractor):
|
||||
"""
|
||||
serialize the given extractor to a string
|
||||
|
||||
args:
|
||||
extractor: capa.features.extractors.base_extractor.FeatureExtractor:
|
||||
|
||||
returns:
|
||||
str: the serialized features.
|
||||
"""
|
||||
hex = capa.helpers.hex
|
||||
ret = {
|
||||
"version": 1,
|
||||
"base address": extractor.get_base_address(),
|
||||
"functions": {},
|
||||
"scopes": {
|
||||
"global": [],
|
||||
"file": [],
|
||||
"function": [],
|
||||
"basic block": [],
|
||||
"instruction": [],
|
||||
},
|
||||
}
|
||||
for feature, va in extractor.extract_global_features():
|
||||
ret["scopes"]["global"].append(serialize_feature(feature) + (hex(va), ()))
|
||||
|
||||
for feature, va in extractor.extract_file_features():
|
||||
ret["scopes"]["file"].append(serialize_feature(feature) + (hex(va), ()))
|
||||
|
||||
for f in extractor.get_functions():
|
||||
ret["functions"][hex(f)] = {}
|
||||
|
||||
for feature, va in extractor.extract_function_features(f):
|
||||
ret["scopes"]["function"].append(serialize_feature(feature) + (hex(va), (hex(f),)))
|
||||
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
ret["functions"][hex(f)][hex(bb)] = []
|
||||
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
ret["scopes"]["basic block"].append(
|
||||
serialize_feature(feature)
|
||||
+ (
|
||||
hex(va),
|
||||
(
|
||||
hex(f),
|
||||
hex(bb),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
for insnva, insn in sorted(
|
||||
[(int(insn), insn) for insn in extractor.get_instructions(f, bb)], key=lambda p: p[0]
|
||||
):
|
||||
ret["functions"][hex(f)][hex(bb)].append(hex(insnva))
|
||||
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
ret["scopes"]["instruction"].append(
|
||||
serialize_feature(feature)
|
||||
+ (
|
||||
hex(va),
|
||||
(
|
||||
hex(f),
|
||||
hex(bb),
|
||||
hex(insnva),
|
||||
),
|
||||
)
|
||||
)
|
||||
return json.dumps(ret)
|
||||
|
||||
|
||||
def loads(s):
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
|
||||
doc = json.loads(s)
|
||||
|
||||
if doc.get("version") != 1:
|
||||
raise ValueError("unsupported freeze format version: %d" % (doc.get("version")))
|
||||
|
||||
features = {
|
||||
"base address": doc.get("base address"),
|
||||
"global features": [],
|
||||
"file features": [],
|
||||
"functions": {},
|
||||
}
|
||||
|
||||
for fva, function in doc.get("functions", {}).items():
|
||||
fva = int(fva, 0x10)
|
||||
features["functions"][fva] = {
|
||||
"features": [],
|
||||
"basic blocks": {},
|
||||
}
|
||||
|
||||
for bbva, bb in function.items():
|
||||
bbva = int(bbva, 0x10)
|
||||
features["functions"][fva]["basic blocks"][bbva] = {
|
||||
"features": [],
|
||||
"instructions": {},
|
||||
}
|
||||
|
||||
for insnva in bb:
|
||||
insnva = int(insnva, 0x10)
|
||||
features["functions"][fva]["basic blocks"][bbva]["instructions"][insnva] = {
|
||||
"features": [],
|
||||
}
|
||||
|
||||
# in the following blocks, each entry looks like:
|
||||
#
|
||||
# ('MatchedRule', ('foo', ), '0x401000', ('0x401000', ))
|
||||
# ^^^^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^
|
||||
# feature name args addr func/bb/insn
|
||||
for feature in doc.get("scopes", {}).get("global", []):
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features["global features"].append((va, feature))
|
||||
|
||||
for feature in doc.get("scopes", {}).get("file", []):
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features["file features"].append((va, feature))
|
||||
|
||||
for feature in doc.get("scopes", {}).get("function", []):
|
||||
# fetch the pair like:
|
||||
#
|
||||
# ('0x401000', ('0x401000', ))
|
||||
# ^^^^^^^^^^ ^^^^^^^^^^^^^^
|
||||
# addr func/bb/insn
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
loc = [int(lo, 0x10) for lo in loc]
|
||||
|
||||
# decode the feature from the pair like:
|
||||
#
|
||||
# ('MatchedRule', ('foo', ))
|
||||
# ^^^^^^^^^^^^^ ^^^^^^^^^
|
||||
# feature name args
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features["functions"][loc[0]]["features"].append((va, feature))
|
||||
|
||||
for feature in doc.get("scopes", {}).get("basic block", []):
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
loc = [int(lo, 0x10) for lo in loc]
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features["functions"][loc[0]]["basic blocks"][loc[1]]["features"].append((va, feature))
|
||||
|
||||
for feature in doc.get("scopes", {}).get("instruction", []):
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
loc = [int(lo, 0x10) for lo in loc]
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features["functions"][loc[0]]["basic blocks"][loc[1]]["instructions"][loc[2]]["features"].append((va, feature))
|
||||
|
||||
return capa.features.extractors.base_extractor.NullFeatureExtractor(features)
|
||||
|
||||
|
||||
MAGIC = "capa0000".encode("ascii")
|
||||
|
||||
|
||||
def dump(extractor):
|
||||
"""serialize the given extractor to a byte array."""
|
||||
return MAGIC + zlib.compress(dumps(extractor).encode("utf-8"))
|
||||
|
||||
|
||||
def is_freeze(buf: bytes) -> bool:
|
||||
return buf[: len(MAGIC)] == MAGIC
|
||||
|
||||
|
||||
def load(buf):
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a byte array."""
|
||||
if not is_freeze(buf):
|
||||
raise ValueError("missing magic header")
|
||||
return loads(zlib.decompress(buf[len(MAGIC) :]).decode("utf-8"))
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
import capa.main
|
||||
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="save capa features to a file")
|
||||
capa.main.install_common_args(parser, {"sample", "format", "backend", "signatures"})
|
||||
parser.add_argument("output", type=str, help="Path to output file")
|
||||
args = parser.parse_args(args=argv)
|
||||
capa.main.handle_common_args(args)
|
||||
|
||||
sigpaths = capa.main.get_signatures(args.signatures)
|
||||
|
||||
extractor = capa.main.get_extractor(args.sample, args.format, args.backend, sigpaths, False)
|
||||
|
||||
with open(args.output, "wb") as f:
|
||||
f.write(dump(extractor))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,382 @@
|
||||
"""
|
||||
capa freeze file format: `| capa0000 | + zlib(utf-8(json(...)))`
|
||||
|
||||
Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
import zlib
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import Any, List, Tuple
|
||||
|
||||
import dncil.clr.token
|
||||
from pydantic import Field, BaseModel
|
||||
|
||||
import capa.helpers
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
import capa.features.address
|
||||
import capa.features.basicblock
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.helpers import assert_never
|
||||
from capa.features.freeze.features import Feature, feature_from_capa
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HashableModel(BaseModel):
|
||||
class Config:
|
||||
frozen = True
|
||||
|
||||
|
||||
class AddressType(str, Enum):
|
||||
ABSOLUTE = "absolute"
|
||||
RELATIVE = "relative"
|
||||
FILE = "file"
|
||||
DN_TOKEN = "dn token"
|
||||
DN_TOKEN_OFFSET = "dn token offset"
|
||||
NO_ADDRESS = "no address"
|
||||
|
||||
|
||||
class Address(HashableModel):
|
||||
type: AddressType
|
||||
value: Any
|
||||
|
||||
@classmethod
|
||||
def from_capa(cls, a: capa.features.address.Address) -> "Address":
|
||||
if isinstance(a, capa.features.address.AbsoluteVirtualAddress):
|
||||
return cls(type=AddressType.ABSOLUTE, value=int(a))
|
||||
|
||||
elif isinstance(a, capa.features.address.RelativeVirtualAddress):
|
||||
return cls(type=AddressType.RELATIVE, value=int(a))
|
||||
|
||||
elif isinstance(a, capa.features.address.FileOffsetAddress):
|
||||
return cls(type=AddressType.FILE, value=int(a))
|
||||
|
||||
elif isinstance(a, capa.features.address.DNTokenAddress):
|
||||
return cls(type=AddressType.DN_TOKEN, value=a.token.value)
|
||||
|
||||
elif isinstance(a, capa.features.address.DNTokenOffsetAddress):
|
||||
return cls(type=AddressType.DN_TOKEN_OFFSET, value=(a.token.value, a.offset))
|
||||
|
||||
elif a == capa.features.address.NO_ADDRESS or isinstance(a, capa.features.address._NoAddress):
|
||||
return cls(type=AddressType.NO_ADDRESS, value=None)
|
||||
|
||||
elif isinstance(a, capa.features.address.Address) and not issubclass(type(a), capa.features.address.Address):
|
||||
raise ValueError("don't use an Address instance directly")
|
||||
|
||||
elif isinstance(a, capa.features.address.Address):
|
||||
raise ValueError("don't use an Address instance directly")
|
||||
|
||||
else:
|
||||
assert_never(a)
|
||||
|
||||
def to_capa(self) -> capa.features.address.Address:
|
||||
if self.type is AddressType.ABSOLUTE:
|
||||
return capa.features.address.AbsoluteVirtualAddress(self.value)
|
||||
|
||||
elif self.type is AddressType.RELATIVE:
|
||||
return capa.features.address.RelativeVirtualAddress(self.value)
|
||||
|
||||
elif self.type is AddressType.FILE:
|
||||
return capa.features.address.FileOffsetAddress(self.value)
|
||||
|
||||
elif self.type is AddressType.DN_TOKEN:
|
||||
return capa.features.address.DNTokenAddress(dncil.clr.token.Token(self.value))
|
||||
|
||||
elif self.type is AddressType.DN_TOKEN_OFFSET:
|
||||
token, offset = self.value
|
||||
return capa.features.address.DNTokenOffsetAddress(dncil.clr.token.Token(token), offset)
|
||||
|
||||
elif self.type is AddressType.NO_ADDRESS:
|
||||
return capa.features.address.NO_ADDRESS
|
||||
|
||||
else:
|
||||
assert_never(self.type)
|
||||
|
||||
def __lt__(self, other: "Address") -> bool:
|
||||
if self.type != other.type:
|
||||
return self.type < other.type
|
||||
|
||||
if self.type is AddressType.NO_ADDRESS:
|
||||
return True
|
||||
|
||||
else:
|
||||
return self.value < other.value
|
||||
|
||||
|
||||
class GlobalFeature(HashableModel):
|
||||
feature: Feature
|
||||
|
||||
|
||||
class FileFeature(HashableModel):
|
||||
address: Address
|
||||
feature: Feature
|
||||
|
||||
|
||||
class FunctionFeature(HashableModel):
|
||||
"""
|
||||
args:
|
||||
function: the address of the function to which this feature belongs.
|
||||
address: the address at which this feature is found.
|
||||
|
||||
function != address because, e.g., the feature may be found *within* the scope (function).
|
||||
versus right at its starting address.
|
||||
"""
|
||||
|
||||
function: Address
|
||||
address: Address
|
||||
feature: Feature
|
||||
|
||||
|
||||
class BasicBlockFeature(HashableModel):
|
||||
"""
|
||||
args:
|
||||
basic_block: the address of the basic block to which this feature belongs.
|
||||
address: the address at which this feature is found.
|
||||
|
||||
basic_block != address because, e.g., the feature may be found *within* the scope (basic block).
|
||||
versus right at its starting address.
|
||||
"""
|
||||
|
||||
basic_block: Address
|
||||
address: Address
|
||||
feature: Feature
|
||||
|
||||
|
||||
class InstructionFeature(HashableModel):
|
||||
"""
|
||||
args:
|
||||
instruction: the address of the instruction to which this feature belongs.
|
||||
address: the address at which this feature is found.
|
||||
|
||||
instruction != address because, e.g., the feature may be found *within* the scope (basic block),
|
||||
versus right at its starting address.
|
||||
"""
|
||||
|
||||
instruction: Address
|
||||
address: Address
|
||||
feature: Feature
|
||||
|
||||
|
||||
class InstructionFeatures(BaseModel):
|
||||
address: Address
|
||||
features: Tuple[InstructionFeature, ...]
|
||||
|
||||
|
||||
class BasicBlockFeatures(BaseModel):
|
||||
address: Address
|
||||
features: Tuple[BasicBlockFeature, ...]
|
||||
instructions: Tuple[InstructionFeatures, ...]
|
||||
|
||||
|
||||
class FunctionFeatures(BaseModel):
|
||||
address: Address
|
||||
features: Tuple[FunctionFeature, ...]
|
||||
basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic block")
|
||||
|
||||
class Config:
|
||||
allow_population_by_field_name = True
|
||||
|
||||
|
||||
class Features(BaseModel):
|
||||
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
|
||||
file: Tuple[FileFeature, ...]
|
||||
functions: Tuple[FunctionFeatures, ...]
|
||||
|
||||
class Config:
|
||||
allow_population_by_field_name = True
|
||||
|
||||
|
||||
class Freeze(BaseModel):
|
||||
version: int = 2
|
||||
base_address: Address = Field(alias="base address")
|
||||
features: Features
|
||||
|
||||
class Config:
|
||||
allow_population_by_field_name = True
|
||||
|
||||
|
||||
def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> str:
|
||||
"""
|
||||
serialize the given extractor to a string
|
||||
"""
|
||||
|
||||
global_features: List[GlobalFeature] = []
|
||||
for feature, _ in extractor.extract_global_features():
|
||||
global_features.append(
|
||||
GlobalFeature(
|
||||
feature=feature_from_capa(feature),
|
||||
)
|
||||
)
|
||||
|
||||
file_features: List[FileFeature] = []
|
||||
for feature, address in extractor.extract_file_features():
|
||||
file_features.append(
|
||||
FileFeature(
|
||||
feature=feature_from_capa(feature),
|
||||
address=Address.from_capa(address),
|
||||
)
|
||||
)
|
||||
|
||||
function_features: List[FunctionFeatures] = []
|
||||
for f in extractor.get_functions():
|
||||
faddr = Address.from_capa(f.address)
|
||||
ffeatures = [
|
||||
FunctionFeature(
|
||||
function=faddr,
|
||||
address=Address.from_capa(addr),
|
||||
feature=feature_from_capa(feature),
|
||||
)
|
||||
for feature, addr in extractor.extract_function_features(f)
|
||||
]
|
||||
|
||||
basic_blocks = []
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
bbaddr = Address.from_capa(bb.address)
|
||||
bbfeatures = [
|
||||
BasicBlockFeature(
|
||||
basic_block=bbaddr,
|
||||
address=Address.from_capa(addr),
|
||||
feature=feature_from_capa(feature),
|
||||
)
|
||||
for feature, addr in extractor.extract_basic_block_features(f, bb)
|
||||
]
|
||||
|
||||
instructions = []
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
iaddr = Address.from_capa(insn.address)
|
||||
ifeatures = [
|
||||
InstructionFeature(
|
||||
instruction=iaddr,
|
||||
address=Address.from_capa(addr),
|
||||
feature=feature_from_capa(feature),
|
||||
)
|
||||
for feature, addr in extractor.extract_insn_features(f, bb, insn)
|
||||
]
|
||||
|
||||
instructions.append(
|
||||
InstructionFeatures(
|
||||
address=iaddr,
|
||||
features=ifeatures,
|
||||
)
|
||||
)
|
||||
|
||||
basic_blocks.append(
|
||||
BasicBlockFeatures(
|
||||
address=bbaddr,
|
||||
features=bbfeatures,
|
||||
instructions=instructions,
|
||||
)
|
||||
)
|
||||
|
||||
function_features.append(
|
||||
FunctionFeatures(
|
||||
address=faddr,
|
||||
features=ffeatures,
|
||||
basic_blocks=basic_blocks,
|
||||
)
|
||||
)
|
||||
|
||||
features = Features(
|
||||
global_=global_features,
|
||||
file=file_features,
|
||||
functions=function_features,
|
||||
)
|
||||
|
||||
freeze = Freeze(
|
||||
version=2,
|
||||
base_address=Address.from_capa(extractor.get_base_address()),
|
||||
features=features,
|
||||
)
|
||||
|
||||
return freeze.json()
|
||||
|
||||
|
||||
def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a string."""
|
||||
import capa.features.extractors.null as null
|
||||
|
||||
freeze = Freeze.parse_raw(s)
|
||||
if freeze.version != 2:
|
||||
raise ValueError("unsupported freeze format version: %d", freeze.version)
|
||||
|
||||
return null.NullFeatureExtractor(
|
||||
base_address=freeze.base_address.to_capa(),
|
||||
global_features=[f.feature.to_capa() for f in freeze.features.global_],
|
||||
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
|
||||
functions={
|
||||
f.address.to_capa(): null.FunctionFeatures(
|
||||
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in f.features],
|
||||
basic_blocks={
|
||||
bb.address.to_capa(): null.BasicBlockFeatures(
|
||||
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in bb.features],
|
||||
instructions={
|
||||
i.address.to_capa(): null.InstructionFeatures(
|
||||
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in i.features]
|
||||
)
|
||||
for i in bb.instructions
|
||||
},
|
||||
)
|
||||
for bb in f.basic_blocks
|
||||
},
|
||||
)
|
||||
for f in freeze.features.functions
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
MAGIC = "capa0000".encode("ascii")
|
||||
|
||||
|
||||
def dump(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -> bytes:
|
||||
"""serialize the given extractor to a byte array."""
|
||||
return MAGIC + zlib.compress(dumps(extractor).encode("utf-8"))
|
||||
|
||||
|
||||
def is_freeze(buf: bytes) -> bool:
|
||||
return buf[: len(MAGIC)] == MAGIC
|
||||
|
||||
|
||||
def load(buf: bytes) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
||||
"""deserialize a set of features (as a NullFeatureExtractor) from a byte array."""
|
||||
if not is_freeze(buf):
|
||||
raise ValueError("missing magic header")
|
||||
return loads(zlib.decompress(buf[len(MAGIC) :]).decode("utf-8"))
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
import capa.main
|
||||
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="save capa features to a file")
|
||||
capa.main.install_common_args(parser, {"sample", "format", "backend", "signatures"})
|
||||
parser.add_argument("output", type=str, help="Path to output file")
|
||||
args = parser.parse_args(args=argv)
|
||||
capa.main.handle_common_args(args)
|
||||
|
||||
sigpaths = capa.main.get_signatures(args.signatures)
|
||||
|
||||
extractor = capa.main.get_extractor(args.sample, args.format, args.backend, sigpaths, False)
|
||||
|
||||
with open(args.output, "wb") as f:
|
||||
f.write(dump(extractor))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,332 @@
|
||||
import binascii
|
||||
from typing import Union, Optional
|
||||
|
||||
from pydantic import Field, BaseModel
|
||||
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
import capa.features.basicblock
|
||||
|
||||
|
||||
class FeatureModel(BaseModel):
|
||||
class Config:
|
||||
frozen = True
|
||||
allow_population_by_field_name = True
|
||||
|
||||
def to_capa(self) -> capa.features.common.Feature:
|
||||
if isinstance(self, OSFeature):
|
||||
return capa.features.common.OS(self.os, description=self.description)
|
||||
|
||||
elif isinstance(self, ArchFeature):
|
||||
return capa.features.common.Arch(self.arch, description=self.description)
|
||||
|
||||
elif isinstance(self, FormatFeature):
|
||||
return capa.features.common.Format(self.format, description=self.description)
|
||||
|
||||
elif isinstance(self, MatchFeature):
|
||||
return capa.features.common.MatchedRule(self.match, description=self.description)
|
||||
|
||||
elif isinstance(
|
||||
self,
|
||||
CharacteristicFeature,
|
||||
):
|
||||
return capa.features.common.Characteristic(self.characteristic, description=self.description)
|
||||
|
||||
elif isinstance(self, ExportFeature):
|
||||
return capa.features.file.Export(self.export, description=self.description)
|
||||
|
||||
elif isinstance(self, ImportFeature):
|
||||
return capa.features.file.Import(self.import_, description=self.description)
|
||||
|
||||
elif isinstance(self, SectionFeature):
|
||||
return capa.features.file.Section(self.section, description=self.description)
|
||||
|
||||
elif isinstance(self, FunctionNameFeature):
|
||||
return capa.features.file.FunctionName(self.function_name, description=self.description)
|
||||
|
||||
elif isinstance(self, SubstringFeature):
|
||||
return capa.features.common.Substring(self.substring, description=self.description)
|
||||
|
||||
elif isinstance(self, RegexFeature):
|
||||
return capa.features.common.Regex(self.regex, description=self.description)
|
||||
|
||||
elif isinstance(self, StringFeature):
|
||||
return capa.features.common.String(self.string, description=self.description)
|
||||
|
||||
elif isinstance(self, ClassFeature):
|
||||
return capa.features.common.Class(self.class_, description=self.description)
|
||||
|
||||
elif isinstance(self, NamespaceFeature):
|
||||
return capa.features.common.Namespace(self.namespace, description=self.description)
|
||||
|
||||
elif isinstance(self, BasicBlockFeature):
|
||||
return capa.features.basicblock.BasicBlock(description=self.description)
|
||||
|
||||
elif isinstance(self, APIFeature):
|
||||
return capa.features.insn.API(self.api, description=self.description)
|
||||
|
||||
elif isinstance(self, NumberFeature):
|
||||
return capa.features.insn.Number(self.number, description=self.description)
|
||||
|
||||
elif isinstance(self, BytesFeature):
|
||||
return capa.features.common.Bytes(binascii.unhexlify(self.bytes), description=self.description)
|
||||
|
||||
elif isinstance(self, OffsetFeature):
|
||||
return capa.features.insn.Offset(self.offset, description=self.description)
|
||||
|
||||
elif isinstance(self, MnemonicFeature):
|
||||
return capa.features.insn.Mnemonic(self.mnemonic, description=self.description)
|
||||
|
||||
elif isinstance(self, OperandNumberFeature):
|
||||
return capa.features.insn.OperandNumber(
|
||||
self.index,
|
||||
self.operand_number,
|
||||
description=self.description,
|
||||
)
|
||||
|
||||
elif isinstance(self, OperandOffsetFeature):
|
||||
return capa.features.insn.OperandOffset(
|
||||
self.index,
|
||||
self.operand_offset,
|
||||
description=self.description,
|
||||
)
|
||||
|
||||
else:
|
||||
raise NotImplementedError(f"Feature.to_capa({type(self)}) not implemented")
|
||||
|
||||
|
||||
def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
|
||||
if isinstance(f, capa.features.common.OS):
|
||||
return OSFeature(os=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Arch):
|
||||
return ArchFeature(arch=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Format):
|
||||
return FormatFeature(format=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.MatchedRule):
|
||||
return MatchFeature(match=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Characteristic):
|
||||
return CharacteristicFeature(characteristic=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.file.Export):
|
||||
return ExportFeature(export=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.file.Import):
|
||||
return ImportFeature(import_=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.file.Section):
|
||||
return SectionFeature(section=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.file.FunctionName):
|
||||
return FunctionNameFeature(function_name=f.value, description=f.description)
|
||||
|
||||
# must come before check for String due to inheritance
|
||||
elif isinstance(f, capa.features.common.Substring):
|
||||
return SubstringFeature(substring=f.value, description=f.description)
|
||||
|
||||
# must come before check for String due to inheritance
|
||||
elif isinstance(f, capa.features.common.Regex):
|
||||
return RegexFeature(regex=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.String):
|
||||
return StringFeature(string=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Class):
|
||||
return ClassFeature(class_=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Namespace):
|
||||
return NamespaceFeature(namespace=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.basicblock.BasicBlock):
|
||||
return BasicBlockFeature(description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.API):
|
||||
return APIFeature(api=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.Number):
|
||||
return NumberFeature(number=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.common.Bytes):
|
||||
buf = f.value
|
||||
assert isinstance(buf, bytes)
|
||||
return BytesFeature(bytes=binascii.hexlify(buf).decode("ascii"), description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.Offset):
|
||||
return OffsetFeature(offset=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.Mnemonic):
|
||||
return MnemonicFeature(mnemonic=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.OperandNumber):
|
||||
return OperandNumberFeature(index=f.index, operand_number=f.value, description=f.description)
|
||||
|
||||
elif isinstance(f, capa.features.insn.OperandOffset):
|
||||
return OperandOffsetFeature(index=f.index, operand_offset=f.value, description=f.description)
|
||||
|
||||
else:
|
||||
raise NotImplementedError(f"feature_from_capa({type(f)}) not implemented")
|
||||
|
||||
|
||||
class OSFeature(FeatureModel):
|
||||
type: str = "os"
|
||||
os: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class ArchFeature(FeatureModel):
|
||||
type: str = "arch"
|
||||
arch: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class FormatFeature(FeatureModel):
|
||||
type: str = "format"
|
||||
format: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class MatchFeature(FeatureModel):
|
||||
type: str = "match"
|
||||
match: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class CharacteristicFeature(FeatureModel):
|
||||
type: str = "characteristic"
|
||||
characteristic: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class ExportFeature(FeatureModel):
|
||||
type: str = "export"
|
||||
export: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class ImportFeature(FeatureModel):
|
||||
type: str = "import"
|
||||
import_: str = Field(alias="import")
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class SectionFeature(FeatureModel):
|
||||
type: str = "section"
|
||||
section: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class FunctionNameFeature(FeatureModel):
|
||||
type: str = "function name"
|
||||
function_name: str = Field(alias="function name")
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class SubstringFeature(FeatureModel):
|
||||
type: str = "substring"
|
||||
substring: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class RegexFeature(FeatureModel):
|
||||
type: str = "regex"
|
||||
regex: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class StringFeature(FeatureModel):
|
||||
type: str = "string"
|
||||
string: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class ClassFeature(FeatureModel):
|
||||
type: str = "class"
|
||||
class_: str = Field(alias="class")
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class NamespaceFeature(FeatureModel):
|
||||
type: str = "namespace"
|
||||
namespace: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class BasicBlockFeature(FeatureModel):
|
||||
type: str = "basic block"
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class APIFeature(FeatureModel):
|
||||
type: str = "api"
|
||||
api: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class NumberFeature(FeatureModel):
|
||||
type: str = "number"
|
||||
number: Union[int, float]
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class BytesFeature(FeatureModel):
|
||||
type: str = "bytes"
|
||||
bytes: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class OffsetFeature(FeatureModel):
|
||||
type: str = "offset"
|
||||
offset: int
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class MnemonicFeature(FeatureModel):
|
||||
type: str = "mnemonic"
|
||||
mnemonic: str
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class OperandNumberFeature(FeatureModel):
|
||||
type: str = "operand number"
|
||||
index: int
|
||||
operand_number: int = Field(alias="operand number")
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class OperandOffsetFeature(FeatureModel):
|
||||
type: str = "operand offset"
|
||||
index: int
|
||||
operand_offset: int = Field(alias="operand offset")
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
Feature = Union[
|
||||
OSFeature,
|
||||
ArchFeature,
|
||||
FormatFeature,
|
||||
MatchFeature,
|
||||
CharacteristicFeature,
|
||||
ExportFeature,
|
||||
ImportFeature,
|
||||
SectionFeature,
|
||||
FunctionNameFeature,
|
||||
SubstringFeature,
|
||||
RegexFeature,
|
||||
StringFeature,
|
||||
ClassFeature,
|
||||
NamespaceFeature,
|
||||
APIFeature,
|
||||
NumberFeature,
|
||||
BytesFeature,
|
||||
OffsetFeature,
|
||||
MnemonicFeature,
|
||||
OperandNumberFeature,
|
||||
OperandOffsetFeature,
|
||||
# this has to go last because...? pydantic fails to serialize correctly otherwise.
|
||||
# possibly because this feature has no associated value?
|
||||
BasicBlockFeature,
|
||||
]
|
||||
+19
-9
@@ -6,22 +6,35 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import abc
|
||||
from typing import Union
|
||||
|
||||
import capa.render.utils
|
||||
from capa.features.common import Feature
|
||||
|
||||
|
||||
def hex(n: int) -> str:
|
||||
"""render the given number using upper case hex, like: 0x123ABC"""
|
||||
if n < 0:
|
||||
return "-0x%X" % (-n)
|
||||
else:
|
||||
return "0x%X" % n
|
||||
|
||||
|
||||
class API(Feature):
|
||||
def __init__(self, name: str, description=None):
|
||||
super(API, self).__init__(name, description=description)
|
||||
|
||||
|
||||
class Number(Feature):
|
||||
def __init__(self, value: int, description=None):
|
||||
def __init__(self, value: Union[int, float], description=None):
|
||||
super(Number, self).__init__(value, description=description)
|
||||
|
||||
def get_value_str(self):
|
||||
return capa.render.utils.hex(self.value)
|
||||
if isinstance(self.value, int):
|
||||
return hex(self.value)
|
||||
elif isinstance(self.value, float):
|
||||
return str(self.value)
|
||||
else:
|
||||
raise ValueError("invalid value type")
|
||||
|
||||
|
||||
# max recognized structure size (and therefore, offset size)
|
||||
@@ -33,7 +46,7 @@ class Offset(Feature):
|
||||
super(Offset, self).__init__(value, description=description)
|
||||
|
||||
def get_value_str(self):
|
||||
return capa.render.utils.hex(self.value)
|
||||
return hex(self.value)
|
||||
|
||||
|
||||
class Mnemonic(Feature):
|
||||
@@ -61,9 +74,6 @@ class _Operand(Feature, abc.ABC):
|
||||
def __eq__(self, other):
|
||||
return super().__eq__(other) and self.index == other.index
|
||||
|
||||
def freeze_serialize(self):
|
||||
return (self.__class__.__name__, [self.index, self.value])
|
||||
|
||||
|
||||
class OperandNumber(_Operand):
|
||||
# cached names so we don't do extra string formatting every ctor
|
||||
@@ -76,7 +86,7 @@ class OperandNumber(_Operand):
|
||||
|
||||
def get_value_str(self) -> str:
|
||||
assert isinstance(self.value, int)
|
||||
return capa.render.utils.hex(self.value)
|
||||
return hex(self.value)
|
||||
|
||||
|
||||
class OperandOffset(_Operand):
|
||||
@@ -90,4 +100,4 @@ class OperandOffset(_Operand):
|
||||
|
||||
def get_value_str(self) -> str:
|
||||
assert isinstance(self.value, int)
|
||||
return capa.render.utils.hex(self.value)
|
||||
return hex(self.value)
|
||||
|
||||
+3
-1
@@ -11,7 +11,6 @@ from typing import NoReturn
|
||||
|
||||
from capa.exceptions import UnsupportedFormatError
|
||||
from capa.features.common import FORMAT_SC32, FORMAT_SC64, FORMAT_UNKNOWN
|
||||
from capa.features.extractors.common import extract_format
|
||||
|
||||
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
|
||||
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
|
||||
@@ -65,6 +64,9 @@ def get_auto_format(path: str) -> str:
|
||||
|
||||
|
||||
def get_format(sample: str) -> str:
|
||||
# imported locally to avoid import cycle
|
||||
from capa.features.extractors.common import extract_format
|
||||
|
||||
with open(sample, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
|
||||
+29
-2
@@ -8,6 +8,7 @@
|
||||
|
||||
import logging
|
||||
import datetime
|
||||
import contextlib
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
@@ -107,14 +108,31 @@ def get_file_sha256():
|
||||
return sha256
|
||||
|
||||
|
||||
def collect_metadata():
|
||||
def collect_metadata(rules):
|
||||
""" """
|
||||
md5 = get_file_md5()
|
||||
sha256 = get_file_sha256()
|
||||
|
||||
info: idaapi.idainfo = idaapi.get_inf_structure()
|
||||
if info.procname == "metapc" and info.is_64bit():
|
||||
arch = "x86_64"
|
||||
elif info.procname == "metapc" and info.is_32bit():
|
||||
arch = "x86"
|
||||
else:
|
||||
arch = "unknown arch"
|
||||
|
||||
format_name: str = ida_loader.get_file_type_name()
|
||||
if "PE" in format_name:
|
||||
os = "windows"
|
||||
elif "ELF" in format_name:
|
||||
with contextlib.closing(capa.ida.helpers.IDAIO()) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
else:
|
||||
os = "unknown os"
|
||||
|
||||
return {
|
||||
"timestamp": datetime.datetime.now().isoformat(),
|
||||
# "argv" is not relevant here
|
||||
"argv": [],
|
||||
"sample": {
|
||||
"md5": md5,
|
||||
"sha1": "", # not easily accessible
|
||||
@@ -123,7 +141,10 @@ def collect_metadata():
|
||||
},
|
||||
"analysis": {
|
||||
"format": idaapi.get_file_type_name(),
|
||||
"arch": arch,
|
||||
"os": os,
|
||||
"extractor": "ida",
|
||||
"rules": rules,
|
||||
"base_address": idaapi.get_imagebase(),
|
||||
"layout": {
|
||||
# this is updated after capabilities have been collected.
|
||||
@@ -131,6 +152,12 @@ def collect_metadata():
|
||||
#
|
||||
# "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... }
|
||||
},
|
||||
# ignore these for now - not used by IDA plugin.
|
||||
"feature_counts": {
|
||||
"file": {},
|
||||
"functions": {},
|
||||
},
|
||||
"library_functions": {},
|
||||
},
|
||||
"version": capa.version.__version__,
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
import idaapi
|
||||
|
||||
+62
-57
@@ -8,10 +8,10 @@
|
||||
|
||||
import os
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import itertools
|
||||
import collections
|
||||
from typing import Set, Dict, Optional
|
||||
|
||||
import idaapi
|
||||
import ida_kernwin
|
||||
@@ -26,16 +26,20 @@ import capa.render.json
|
||||
import capa.features.common
|
||||
import capa.render.result_document
|
||||
import capa.features.extractors.ida.extractor
|
||||
from capa.engine import FeatureSet
|
||||
from capa.features.common import Feature
|
||||
from capa.ida.plugin.icon import QICON
|
||||
from capa.ida.plugin.view import (
|
||||
CapaExplorerQtreeView,
|
||||
CapaExplorerRulgenEditor,
|
||||
CapaExplorerRulgenPreview,
|
||||
CapaExplorerRulegenEditor,
|
||||
CapaExplorerRulegenPreview,
|
||||
CapaExplorerRulegenFeatures,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS, Address
|
||||
from capa.ida.plugin.hooks import CapaExplorerIdaHooks
|
||||
from capa.ida.plugin.model import CapaExplorerDataModel
|
||||
from capa.ida.plugin.proxy import CapaExplorerRangeProxyModel, CapaExplorerSearchProxyModel
|
||||
from capa.features.extractors.base_extractor import FunctionHandle
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = ida_settings.IDASettings("capa")
|
||||
@@ -66,32 +70,32 @@ def trim_function_name(f, max_length=25):
|
||||
return n
|
||||
|
||||
|
||||
def find_func_features(f, extractor):
|
||||
def find_func_features(fh: FunctionHandle, extractor):
|
||||
""" """
|
||||
func_features = collections.defaultdict(set)
|
||||
bb_features = collections.defaultdict(dict)
|
||||
func_features: Dict[Feature, Set] = collections.defaultdict(set)
|
||||
bb_features: Dict[Address, Dict] = collections.defaultdict(dict)
|
||||
|
||||
for (feature, ea) in extractor.extract_function_features(f):
|
||||
func_features[feature].add(ea)
|
||||
for (feature, addr) in extractor.extract_function_features(fh):
|
||||
func_features[feature].add(addr)
|
||||
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
for bbh in extractor.get_basic_blocks(fh):
|
||||
_bb_features = collections.defaultdict(set)
|
||||
|
||||
for (feature, ea) in extractor.extract_basic_block_features(f, bb):
|
||||
_bb_features[feature].add(ea)
|
||||
func_features[feature].add(ea)
|
||||
for (feature, addr) in extractor.extract_basic_block_features(fh, bbh):
|
||||
_bb_features[feature].add(addr)
|
||||
func_features[feature].add(addr)
|
||||
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for (feature, ea) in extractor.extract_insn_features(f, bb, insn):
|
||||
_bb_features[feature].add(ea)
|
||||
func_features[feature].add(ea)
|
||||
for insn in extractor.get_instructions(fh, bbh):
|
||||
for (feature, addr) in extractor.extract_insn_features(fh, bbh, insn):
|
||||
_bb_features[feature].add(addr)
|
||||
func_features[feature].add(addr)
|
||||
|
||||
bb_features[int(bb)] = _bb_features
|
||||
bb_features[bbh.address] = _bb_features
|
||||
|
||||
return func_features, bb_features
|
||||
|
||||
|
||||
def find_func_matches(f, ruleset, func_features, bb_features):
|
||||
def find_func_matches(f: FunctionHandle, ruleset, func_features, bb_features):
|
||||
""" """
|
||||
func_matches = collections.defaultdict(list)
|
||||
bb_matches = collections.defaultdict(list)
|
||||
@@ -108,7 +112,7 @@ def find_func_matches(f, ruleset, func_features, bb_features):
|
||||
func_features[capa.features.common.MatchedRule(name)].add(ea)
|
||||
|
||||
# find rule matches for function, function features include rule matches for basic blocks
|
||||
_, matches = capa.engine.match(ruleset.function_rules, func_features, int(f))
|
||||
_, matches = capa.engine.match(ruleset.function_rules, func_features, f.address)
|
||||
for (name, res) in matches.items():
|
||||
func_matches[name].extend(res)
|
||||
|
||||
@@ -117,19 +121,19 @@ def find_func_matches(f, ruleset, func_features, bb_features):
|
||||
|
||||
def find_file_features(extractor):
|
||||
""" """
|
||||
file_features = collections.defaultdict(set)
|
||||
for (feature, ea) in extractor.extract_file_features():
|
||||
if ea:
|
||||
file_features[feature].add(ea)
|
||||
file_features = collections.defaultdict(set) # type: FeatureSet
|
||||
for (feature, addr) in extractor.extract_file_features():
|
||||
if addr:
|
||||
file_features[feature].add(addr)
|
||||
else:
|
||||
if feature not in file_features:
|
||||
file_features[feature] = set()
|
||||
return file_features
|
||||
|
||||
|
||||
def find_file_matches(ruleset, file_features):
|
||||
def find_file_matches(ruleset, file_features: FeatureSet):
|
||||
""" """
|
||||
_, matches = capa.engine.match(ruleset.file_rules, file_features, 0x0)
|
||||
_, matches = capa.engine.match(ruleset.file_rules, file_features, NO_ADDRESS)
|
||||
return matches
|
||||
|
||||
|
||||
@@ -173,9 +177,9 @@ class CapaExplorerFeatureExtractor(capa.features.extractors.ida.extractor.IdaFea
|
||||
super(CapaExplorerFeatureExtractor, self).__init__()
|
||||
self.indicator = CapaExplorerProgressIndicator()
|
||||
|
||||
def extract_function_features(self, f):
|
||||
self.indicator.update("function at 0x%X" % f.start_ea)
|
||||
return super(CapaExplorerFeatureExtractor, self).extract_function_features(f)
|
||||
def extract_function_features(self, fh: FunctionHandle):
|
||||
self.indicator.update("function at 0x%X" % fh.inner.start_ea)
|
||||
return super(CapaExplorerFeatureExtractor, self).extract_function_features(fh)
|
||||
|
||||
|
||||
class QLineEditClicked(QtWidgets.QLineEdit):
|
||||
@@ -245,8 +249,9 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
self.parent = None
|
||||
self.ida_hooks = None
|
||||
self.doc = None
|
||||
self.doc: Optional[capa.render.result_document.ResultDocument] = None
|
||||
|
||||
self.rule_paths = None
|
||||
self.rules_cache = None
|
||||
self.ruleset_cache = None
|
||||
|
||||
@@ -484,8 +489,8 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
self.view_rulegen_header_label.setText("Features")
|
||||
self.view_rulegen_header_label.setFont(font)
|
||||
|
||||
self.view_rulegen_preview = CapaExplorerRulgenPreview(parent=self.parent)
|
||||
self.view_rulegen_editor = CapaExplorerRulgenEditor(self.view_rulegen_preview, parent=self.parent)
|
||||
self.view_rulegen_preview = CapaExplorerRulegenPreview(parent=self.parent)
|
||||
self.view_rulegen_editor = CapaExplorerRulegenEditor(self.view_rulegen_preview, parent=self.parent)
|
||||
self.view_rulegen_features = CapaExplorerRulegenFeatures(self.view_rulegen_editor, parent=self.parent)
|
||||
|
||||
self.view_rulegen_preview.textChanged.connect(self.slot_rulegen_preview_update)
|
||||
@@ -617,6 +622,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
def load_capa_rules(self):
|
||||
""" """
|
||||
self.rule_paths = None
|
||||
self.ruleset_cache = None
|
||||
self.rules_cache = None
|
||||
|
||||
@@ -701,6 +707,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
settings.user[CAPA_SETTINGS_RULE_PATH] = ""
|
||||
return False
|
||||
|
||||
self.rule_paths = rule_paths
|
||||
self.ruleset_cache = ruleset
|
||||
self.rules_cache = rules
|
||||
|
||||
@@ -750,7 +757,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
update_wait_box("extracting features")
|
||||
|
||||
try:
|
||||
meta = capa.ida.helpers.collect_metadata()
|
||||
meta = capa.ida.helpers.collect_metadata(self.rule_paths)
|
||||
capabilities, counts = capa.main.find_capabilities(self.ruleset_cache, extractor, disable_progress=True)
|
||||
meta["analysis"].update(counts)
|
||||
meta["analysis"]["layout"] = capa.main.compute_layout(self.ruleset_cache, extractor, capabilities)
|
||||
@@ -797,11 +804,9 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
update_wait_box("rendering results")
|
||||
|
||||
try:
|
||||
self.doc = capa.render.result_document.convert_capabilities_to_result_document(
|
||||
meta, self.ruleset_cache, capabilities
|
||||
)
|
||||
self.doc = capa.render.result_document.ResultDocument.from_capa(meta, self.ruleset_cache, capabilities)
|
||||
except Exception as e:
|
||||
logger.error("Failed to render results (error: %s)", e)
|
||||
logger.error("Failed to collect results (error: %s)", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
@@ -863,7 +868,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
# must use extractor to get function, as capa analysis requires casted object
|
||||
extractor = CapaExplorerFeatureExtractor()
|
||||
except Exception as e:
|
||||
logger.error("Failed to load IDA feature extractor (error: %s)" % e)
|
||||
logger.error("Failed to load IDA feature extractor (error: %s)", e)
|
||||
return False
|
||||
|
||||
if ida_kernwin.user_cancelled():
|
||||
@@ -874,10 +879,10 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
try:
|
||||
f = idaapi.get_func(idaapi.get_screen_ea())
|
||||
if f:
|
||||
f = extractor.get_function(f.start_ea)
|
||||
self.rulegen_current_function = f
|
||||
fh: FunctionHandle = extractor.get_function(f.start_ea)
|
||||
self.rulegen_current_function = fh
|
||||
|
||||
func_features, bb_features = find_func_features(f, extractor)
|
||||
func_features, bb_features = find_func_features(fh, extractor)
|
||||
self.rulegen_func_features_cache = collections.defaultdict(set, copy.copy(func_features))
|
||||
self.rulegen_bb_features_cache = collections.defaultdict(dict, copy.copy(bb_features))
|
||||
|
||||
@@ -888,15 +893,15 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
try:
|
||||
# add function and bb rule matches to function features, for display purposes
|
||||
func_matches, bb_matches = find_func_matches(f, self.ruleset_cache, func_features, bb_features)
|
||||
for (name, res) in itertools.chain(func_matches.items(), bb_matches.items()):
|
||||
func_matches, bb_matches = find_func_matches(fh, self.ruleset_cache, func_features, bb_features)
|
||||
for (name, addrs) in itertools.chain(func_matches.items(), bb_matches.items()):
|
||||
rule = self.ruleset_cache[name]
|
||||
if rule.meta.get("capa/subscope-rule"):
|
||||
if rule.is_subscope_rule():
|
||||
continue
|
||||
for (ea, _) in res:
|
||||
func_features[capa.features.common.MatchedRule(name)].add(ea)
|
||||
for (addr, _) in addrs:
|
||||
func_features[capa.features.common.MatchedRule(name)].add(addr)
|
||||
except Exception as e:
|
||||
logger.error("Failed to match function/basic block rule scope (error: %s)" % e)
|
||||
logger.error("Failed to match function/basic block rule scope (error: %s)", e)
|
||||
return False
|
||||
else:
|
||||
func_features = {}
|
||||
@@ -904,7 +909,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
logger.info("User cancelled analysis.")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error("Failed to extract function features (error: %s)" % e)
|
||||
logger.error("Failed to extract function features (error: %s)", e)
|
||||
return False
|
||||
|
||||
if ida_kernwin.user_cancelled():
|
||||
@@ -914,7 +919,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
try:
|
||||
file_features = find_file_features(extractor)
|
||||
self.rulegen_file_features_cache = collections.defaultdict(dict, copy.copy(file_features))
|
||||
self.rulegen_file_features_cache = copy.copy(file_features)
|
||||
|
||||
if ida_kernwin.user_cancelled():
|
||||
logger.info("User cancelled analysis.")
|
||||
@@ -923,17 +928,17 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
try:
|
||||
# add file matches to file features, for display purposes
|
||||
for (name, res) in find_file_matches(self.ruleset_cache, file_features).items():
|
||||
for (name, addrs) in find_file_matches(self.ruleset_cache, file_features).items():
|
||||
rule = self.ruleset_cache[name]
|
||||
if rule.meta.get("capa/subscope-rule"):
|
||||
if rule.is_subscope_rule():
|
||||
continue
|
||||
for (ea, _) in res:
|
||||
file_features[capa.features.common.MatchedRule(name)].add(ea)
|
||||
for (addr, _) in addrs:
|
||||
file_features[capa.features.common.MatchedRule(name)].add(addr)
|
||||
except Exception as e:
|
||||
logger.error("Failed to match file scope rules (error: %s)" % e)
|
||||
logger.error("Failed to match file scope rules (error: %s)", e)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error("Failed to extract file features (error: %s)" % e)
|
||||
logger.error("Failed to extract file features (error: %s)", e)
|
||||
return False
|
||||
|
||||
if ida_kernwin.user_cancelled():
|
||||
@@ -944,7 +949,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
try:
|
||||
# load preview and feature tree
|
||||
self.view_rulegen_preview.load_preview_meta(
|
||||
f.start_ea if f else None,
|
||||
fh.address if fh else None,
|
||||
settings.user.get(CAPA_SETTINGS_RULEGEN_AUTHOR, "<insert_author>"),
|
||||
settings.user.get(CAPA_SETTINGS_RULEGEN_SCOPE, "function"),
|
||||
)
|
||||
@@ -955,7 +960,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
"capa rules directory: %s (%d rules)" % (settings.user[CAPA_SETTINGS_RULE_PATH], len(self.rules_cache))
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to render views (error: %s)" % e)
|
||||
logger.error("Failed to render views (error: %s)", e, exc_info=True)
|
||||
return False
|
||||
|
||||
return True
|
||||
@@ -1153,7 +1158,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
idaapi.info("No program analysis to save.")
|
||||
return
|
||||
|
||||
s = json.dumps(self.doc, sort_keys=True, cls=capa.render.json.CapaJsonObjectEncoder).encode("utf-8")
|
||||
s = self.doc.json().encode("utf-8")
|
||||
|
||||
path = self.ask_user_capa_json_file()
|
||||
if not path:
|
||||
|
||||
+72
-42
@@ -7,12 +7,14 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import codecs
|
||||
from typing import List, Iterator, Optional
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
from PyQt5 import QtCore
|
||||
|
||||
import capa.ida.helpers
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
|
||||
|
||||
def info_to_name(display):
|
||||
@@ -26,19 +28,19 @@ def info_to_name(display):
|
||||
return ""
|
||||
|
||||
|
||||
def location_to_hex(location):
|
||||
"""convert location to hex for display"""
|
||||
return "%08X" % location
|
||||
def ea_to_hex(ea):
|
||||
"""convert effective address (ea) to hex for display"""
|
||||
return "%08X" % ea
|
||||
|
||||
|
||||
class CapaExplorerDataItem:
|
||||
"""store data for CapaExplorerDataModel"""
|
||||
|
||||
def __init__(self, parent, data, can_check=True):
|
||||
def __init__(self, parent: "CapaExplorerDataItem", data: List[str], can_check=True):
|
||||
"""initialize item"""
|
||||
self.pred = parent
|
||||
self._data = data
|
||||
self.children = []
|
||||
self._children: List["CapaExplorerDataItem"] = []
|
||||
self._checked = False
|
||||
self._can_check = can_check
|
||||
|
||||
@@ -76,29 +78,29 @@ class CapaExplorerDataItem:
|
||||
"""get item is checked"""
|
||||
return self._checked
|
||||
|
||||
def appendChild(self, item):
|
||||
def appendChild(self, item: "CapaExplorerDataItem"):
|
||||
"""add a new child to specified item
|
||||
|
||||
@param item: CapaExplorerDataItem
|
||||
"""
|
||||
self.children.append(item)
|
||||
self._children.append(item)
|
||||
|
||||
def child(self, row):
|
||||
def child(self, row: int) -> "CapaExplorerDataItem":
|
||||
"""get child row
|
||||
|
||||
@param row: row number
|
||||
"""
|
||||
return self.children[row]
|
||||
return self._children[row]
|
||||
|
||||
def childCount(self):
|
||||
def childCount(self) -> int:
|
||||
"""get child count"""
|
||||
return len(self.children)
|
||||
return len(self._children)
|
||||
|
||||
def columnCount(self):
|
||||
def columnCount(self) -> int:
|
||||
"""get column count"""
|
||||
return len(self._data)
|
||||
|
||||
def data(self, column):
|
||||
def data(self, column: int) -> Optional[str]:
|
||||
"""get data at column
|
||||
|
||||
@param: column number
|
||||
@@ -108,17 +110,17 @@ class CapaExplorerDataItem:
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def parent(self):
|
||||
def parent(self) -> "CapaExplorerDataItem":
|
||||
"""get parent"""
|
||||
return self.pred
|
||||
|
||||
def row(self):
|
||||
def row(self) -> int:
|
||||
"""get row location"""
|
||||
if self.pred:
|
||||
return self.pred.children.index(self)
|
||||
return self.pred._children.index(self)
|
||||
return 0
|
||||
|
||||
def setData(self, column, value):
|
||||
def setData(self, column: int, value: str):
|
||||
"""set data in column
|
||||
|
||||
@param column: column number
|
||||
@@ -126,14 +128,14 @@ class CapaExplorerDataItem:
|
||||
"""
|
||||
self._data[column] = value
|
||||
|
||||
def children(self):
|
||||
def children(self) -> Iterator["CapaExplorerDataItem"]:
|
||||
"""yield children"""
|
||||
for child in self.children:
|
||||
for child in self._children:
|
||||
yield child
|
||||
|
||||
def removeChildren(self):
|
||||
"""remove children"""
|
||||
del self.children[:]
|
||||
del self._children[:]
|
||||
|
||||
def __str__(self):
|
||||
"""get string representation of columns
|
||||
@@ -148,7 +150,7 @@ class CapaExplorerDataItem:
|
||||
return self._data[0]
|
||||
|
||||
@property
|
||||
def location(self):
|
||||
def location(self) -> Optional[int]:
|
||||
"""return data stored in location column"""
|
||||
try:
|
||||
# address stored as str, convert to int before return
|
||||
@@ -167,7 +169,9 @@ class CapaExplorerRuleItem(CapaExplorerDataItem):
|
||||
|
||||
fmt = "%s (%d matches)"
|
||||
|
||||
def __init__(self, parent, name, namespace, count, source, can_check=True):
|
||||
def __init__(
|
||||
self, parent: CapaExplorerDataItem, name: str, namespace: str, count: int, source: str, can_check=True
|
||||
):
|
||||
"""initialize item
|
||||
|
||||
@param parent: parent node
|
||||
@@ -189,7 +193,7 @@ class CapaExplorerRuleItem(CapaExplorerDataItem):
|
||||
class CapaExplorerRuleMatchItem(CapaExplorerDataItem):
|
||||
"""store data for rule match"""
|
||||
|
||||
def __init__(self, parent, display, source=""):
|
||||
def __init__(self, parent: CapaExplorerDataItem, display: str, source=""):
|
||||
"""initialize item
|
||||
|
||||
@param parent: parent node
|
||||
@@ -210,14 +214,16 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem):
|
||||
|
||||
fmt = "function(%s)"
|
||||
|
||||
def __init__(self, parent, location, can_check=True):
|
||||
def __init__(self, parent: CapaExplorerDataItem, location: Address, can_check=True):
|
||||
"""initialize item
|
||||
|
||||
@param parent: parent node
|
||||
@param location: virtual address of function as seen by IDA
|
||||
"""
|
||||
assert isinstance(location, AbsoluteVirtualAddress)
|
||||
ea = int(location)
|
||||
super(CapaExplorerFunctionItem, self).__init__(
|
||||
parent, [self.fmt % idaapi.get_name(location), location_to_hex(location), ""], can_check
|
||||
parent, [self.fmt % idaapi.get_name(ea), ea_to_hex(ea), ""], can_check
|
||||
)
|
||||
|
||||
@property
|
||||
@@ -243,7 +249,7 @@ class CapaExplorerSubscopeItem(CapaExplorerDataItem):
|
||||
|
||||
fmt = "subscope(%s)"
|
||||
|
||||
def __init__(self, parent, scope):
|
||||
def __init__(self, parent: CapaExplorerDataItem, scope):
|
||||
"""initialize item
|
||||
|
||||
@param parent: parent node
|
||||
@@ -257,19 +263,23 @@ class CapaExplorerBlockItem(CapaExplorerDataItem):
|
||||
|
||||
fmt = "basic block(loc_%08X)"
|
||||
|
||||
def __init__(self, parent, location):
|
||||
def __init__(self, parent: CapaExplorerDataItem, location: Address):
|
||||
"""initialize item
|
||||
|
||||
@param parent: parent node
|
||||
@param location: virtual address of basic block as seen by IDA
|
||||
"""
|
||||
super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % location, location_to_hex(location), ""])
|
||||
assert isinstance(location, AbsoluteVirtualAddress)
|
||||
ea = int(location)
|
||||
super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % ea, ea_to_hex(ea), ""])
|
||||
|
||||
|
||||
class CapaExplorerDefaultItem(CapaExplorerDataItem):
|
||||
"""store data for default match e.g. statement (and, or)"""
|
||||
|
||||
def __init__(self, parent, display, details="", location=None):
|
||||
def __init__(
|
||||
self, parent: CapaExplorerDataItem, display: str, details: str = "", location: Optional[Address] = None
|
||||
):
|
||||
"""initialize item
|
||||
|
||||
@param parent: parent node
|
||||
@@ -277,14 +287,22 @@ class CapaExplorerDefaultItem(CapaExplorerDataItem):
|
||||
@param details: text to display in details section of UI
|
||||
@param location: virtual address as seen by IDA
|
||||
"""
|
||||
location = location_to_hex(location) if location else ""
|
||||
super(CapaExplorerDefaultItem, self).__init__(parent, [display, location, details])
|
||||
ea = None
|
||||
if location:
|
||||
assert isinstance(location, AbsoluteVirtualAddress)
|
||||
ea = int(location)
|
||||
|
||||
super(CapaExplorerDefaultItem, self).__init__(
|
||||
parent, [display, ea_to_hex(ea) if ea is not None else "", details]
|
||||
)
|
||||
|
||||
|
||||
class CapaExplorerFeatureItem(CapaExplorerDataItem):
|
||||
"""store data for feature match"""
|
||||
|
||||
def __init__(self, parent, display, location="", details=""):
|
||||
def __init__(
|
||||
self, parent: CapaExplorerDataItem, display: str, location: Optional[Address] = None, details: str = ""
|
||||
):
|
||||
"""initialize item
|
||||
|
||||
@param parent: parent node
|
||||
@@ -292,14 +310,18 @@ class CapaExplorerFeatureItem(CapaExplorerDataItem):
|
||||
@param details: text to display in details section of UI
|
||||
@param location: virtual address as seen by IDA
|
||||
"""
|
||||
location = location_to_hex(location) if location else ""
|
||||
super(CapaExplorerFeatureItem, self).__init__(parent, [display, location, details])
|
||||
if location:
|
||||
assert isinstance(location, AbsoluteVirtualAddress)
|
||||
ea = int(location)
|
||||
super(CapaExplorerFeatureItem, self).__init__(parent, [display, ea_to_hex(ea), details])
|
||||
else:
|
||||
super(CapaExplorerFeatureItem, self).__init__(parent, [display, "global", details])
|
||||
|
||||
|
||||
class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
|
||||
"""store data for instruction match"""
|
||||
|
||||
def __init__(self, parent, display, location):
|
||||
def __init__(self, parent: CapaExplorerDataItem, display: str, location: Address):
|
||||
"""initialize item
|
||||
|
||||
details section shows disassembly view for match
|
||||
@@ -308,15 +330,17 @@ class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem):
|
||||
@param display: text to display in UI
|
||||
@param location: virtual address as seen by IDA
|
||||
"""
|
||||
details = capa.ida.helpers.get_disasm_line(location)
|
||||
assert isinstance(location, AbsoluteVirtualAddress)
|
||||
ea = int(location)
|
||||
details = capa.ida.helpers.get_disasm_line(ea)
|
||||
super(CapaExplorerInstructionViewItem, self).__init__(parent, display, location=location, details=details)
|
||||
self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)
|
||||
self.ida_highlight = idc.get_color(ea, idc.CIC_ITEM)
|
||||
|
||||
|
||||
class CapaExplorerByteViewItem(CapaExplorerFeatureItem):
|
||||
"""store data for byte match"""
|
||||
|
||||
def __init__(self, parent, display, location):
|
||||
def __init__(self, parent: CapaExplorerDataItem, display: str, location: Address):
|
||||
"""initialize item
|
||||
|
||||
details section shows byte preview for match
|
||||
@@ -325,7 +349,10 @@ class CapaExplorerByteViewItem(CapaExplorerFeatureItem):
|
||||
@param display: text to display in UI
|
||||
@param location: virtual address as seen by IDA
|
||||
"""
|
||||
byte_snap = idaapi.get_bytes(location, 32)
|
||||
assert isinstance(location, AbsoluteVirtualAddress)
|
||||
ea = int(location)
|
||||
|
||||
byte_snap = idaapi.get_bytes(ea, 32)
|
||||
|
||||
details = ""
|
||||
if byte_snap:
|
||||
@@ -333,18 +360,21 @@ class CapaExplorerByteViewItem(CapaExplorerFeatureItem):
|
||||
details = " ".join([byte_snap[i : i + 2].decode() for i in range(0, len(byte_snap), 2)])
|
||||
|
||||
super(CapaExplorerByteViewItem, self).__init__(parent, display, location=location, details=details)
|
||||
self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)
|
||||
self.ida_highlight = idc.get_color(ea, idc.CIC_ITEM)
|
||||
|
||||
|
||||
class CapaExplorerStringViewItem(CapaExplorerFeatureItem):
|
||||
"""store data for string match"""
|
||||
|
||||
def __init__(self, parent, display, location, value):
|
||||
def __init__(self, parent: CapaExplorerDataItem, display: str, location: Address, value: str):
|
||||
"""initialize item
|
||||
|
||||
@param parent: parent node
|
||||
@param display: text to display in UI
|
||||
@param location: virtual address as seen by IDA
|
||||
"""
|
||||
assert isinstance(location, AbsoluteVirtualAddress)
|
||||
ea = int(location)
|
||||
|
||||
super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location, details=value)
|
||||
self.ida_highlight = idc.get_color(location, idc.CIC_ITEM)
|
||||
self.ida_highlight = idc.get_color(ea, idc.CIC_ITEM)
|
||||
|
||||
+149
-93
@@ -6,7 +6,8 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from collections import deque, defaultdict
|
||||
from typing import Set, Dict, List, Tuple
|
||||
from collections import deque
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
@@ -16,6 +17,8 @@ import capa.rules
|
||||
import capa.ida.helpers
|
||||
import capa.render.utils as rutils
|
||||
import capa.features.common
|
||||
import capa.render.result_document as rd
|
||||
import capa.features.freeze.features as frzf
|
||||
from capa.ida.plugin.item import (
|
||||
CapaExplorerDataItem,
|
||||
CapaExplorerRuleItem,
|
||||
@@ -29,6 +32,7 @@ from capa.ida.plugin.item import (
|
||||
CapaExplorerStringViewItem,
|
||||
CapaExplorerInstructionViewItem,
|
||||
)
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
|
||||
# default highlight color used in IDA window
|
||||
DEFAULT_HIGHLIGHT = 0xE6C700
|
||||
@@ -342,7 +346,14 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
|
||||
return item.childCount()
|
||||
|
||||
def render_capa_doc_statement_node(self, parent, statement, locations, doc):
|
||||
def render_capa_doc_statement_node(
|
||||
self,
|
||||
parent: CapaExplorerDataItem,
|
||||
match: rd.Match,
|
||||
statement: rd.Statement,
|
||||
locations: List[Address],
|
||||
doc: rd.ResultDocument,
|
||||
):
|
||||
"""render capa statement read from doc
|
||||
|
||||
@param parent: parent to which new child is assigned
|
||||
@@ -350,132 +361,141 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
@param locations: locations of children (applies to range only?)
|
||||
@param doc: result doc
|
||||
"""
|
||||
if statement["type"] in ("and", "or", "optional"):
|
||||
display = statement["type"]
|
||||
if statement.get("description"):
|
||||
display += " (%s)" % statement["description"]
|
||||
|
||||
if isinstance(statement, (rd.AndStatement, rd.OrStatement, rd.OptionalStatement)):
|
||||
display = statement.type
|
||||
if statement.description:
|
||||
display += " (%s)" % statement.description
|
||||
return CapaExplorerDefaultItem(parent, display)
|
||||
elif statement["type"] == "not":
|
||||
elif isinstance(statement, rd.NotStatement):
|
||||
# TODO: do we display 'not'
|
||||
pass
|
||||
elif statement["type"] == "some":
|
||||
display = "%d or more" % statement["count"]
|
||||
if statement.get("description"):
|
||||
display += " (%s)" % statement["description"]
|
||||
elif isinstance(statement, rd.SomeStatement):
|
||||
display = "%d or more" % statement.count
|
||||
if statement.description:
|
||||
display += " (%s)" % statement.description
|
||||
return CapaExplorerDefaultItem(parent, display)
|
||||
elif statement["type"] == "range":
|
||||
elif isinstance(statement, rd.RangeStatement):
|
||||
# `range` is a weird node, its almost a hybrid of statement + feature.
|
||||
# it is a specific feature repeated multiple times.
|
||||
# there's no additional logic in the feature part, just the existence of a feature.
|
||||
# so, we have to inline some of the feature rendering here.
|
||||
display = "count(%s): " % self.capa_doc_feature_to_display(statement["child"])
|
||||
display = "count(%s): " % self.capa_doc_feature_to_display(statement.child)
|
||||
|
||||
if statement["max"] == statement["min"]:
|
||||
display += "%d" % (statement["min"])
|
||||
elif statement["min"] == 0:
|
||||
display += "%d or fewer" % (statement["max"])
|
||||
elif statement["max"] == (1 << 64 - 1):
|
||||
display += "%d or more" % (statement["min"])
|
||||
if statement.max == statement.min:
|
||||
display += "%d" % (statement.min)
|
||||
elif statement.min == 0:
|
||||
display += "%d or fewer" % (statement.max)
|
||||
elif statement.max == (1 << 64 - 1):
|
||||
display += "%d or more" % (statement.min)
|
||||
else:
|
||||
display += "between %d and %d" % (statement["min"], statement["max"])
|
||||
display += "between %d and %d" % (statement.min, statement.max)
|
||||
|
||||
if statement.get("description"):
|
||||
display += " (%s)" % statement["description"]
|
||||
if statement.description:
|
||||
display += " (%s)" % statement.description
|
||||
|
||||
parent2 = CapaExplorerFeatureItem(parent, display=display)
|
||||
|
||||
for location in locations:
|
||||
# for each location render child node for range statement
|
||||
self.render_capa_doc_feature(parent2, statement["child"], location, doc)
|
||||
self.render_capa_doc_feature(parent2, match, statement.child, location, doc)
|
||||
|
||||
return parent2
|
||||
elif statement["type"] == "subscope":
|
||||
display = statement[statement["type"]]
|
||||
if statement.get("description"):
|
||||
display += " (%s)" % statement["description"]
|
||||
elif isinstance(statement, rd.SubscopeStatement):
|
||||
display = str(statement.scope)
|
||||
if statement.description:
|
||||
display += " (%s)" % statement.description
|
||||
return CapaExplorerSubscopeItem(parent, display)
|
||||
else:
|
||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||
|
||||
def render_capa_doc_match(self, parent, match, doc):
|
||||
def render_capa_doc_match(self, parent: CapaExplorerDataItem, match: rd.Match, doc: rd.ResultDocument):
|
||||
"""render capa match read from doc
|
||||
|
||||
@param parent: parent node to which new child is assigned
|
||||
@param match: match read from doc
|
||||
@param doc: result doc
|
||||
"""
|
||||
if not match["success"]:
|
||||
if not match.success:
|
||||
# TODO: display failed branches at some point? Help with debugging rules?
|
||||
return
|
||||
|
||||
# optional statement with no successful children is empty
|
||||
if match["node"].get("statement", {}).get("type") == "optional" and not any(
|
||||
map(lambda m: m["success"], match["children"])
|
||||
):
|
||||
return
|
||||
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.OptionalStatement):
|
||||
if not any(map(lambda m: m.success, match.children)):
|
||||
return
|
||||
|
||||
if match["node"]["type"] == "statement":
|
||||
if isinstance(match.node, rd.StatementNode):
|
||||
parent2 = self.render_capa_doc_statement_node(
|
||||
parent, match["node"]["statement"], match.get("locations", []), doc
|
||||
parent, match, match.node.statement, [addr.to_capa() for addr in match.locations], doc
|
||||
)
|
||||
elif match["node"]["type"] == "feature":
|
||||
elif isinstance(match.node, rd.FeatureNode):
|
||||
parent2 = self.render_capa_doc_feature_node(
|
||||
parent, match["node"]["feature"], match.get("locations", []), doc
|
||||
parent, match, match.node.feature, [addr.to_capa() for addr in match.locations], doc
|
||||
)
|
||||
else:
|
||||
raise RuntimeError("unexpected node type: " + str(match["node"]["type"]))
|
||||
raise RuntimeError("unexpected node type: " + str(match.node.type))
|
||||
|
||||
for child in match.get("children", []):
|
||||
for child in match.children:
|
||||
self.render_capa_doc_match(parent2, child, doc)
|
||||
|
||||
def render_capa_doc_by_function(self, doc):
|
||||
def render_capa_doc_by_function(self, doc: rd.ResultDocument):
|
||||
""" """
|
||||
matches_by_function = {}
|
||||
matches_by_function: Dict[int, Tuple[CapaExplorerFunctionItem, Set[str]]] = {}
|
||||
for rule in rutils.capability_rules(doc):
|
||||
for ea in rule["matches"].keys():
|
||||
for location_, _ in rule.matches:
|
||||
location = location_.to_capa()
|
||||
# within IDA, assume that all addresses are virtual addresses.
|
||||
assert isinstance(location, AbsoluteVirtualAddress)
|
||||
ea = int(location)
|
||||
|
||||
ea = capa.ida.helpers.get_func_start_ea(ea)
|
||||
if ea is None:
|
||||
# file scope, skip rendering in this mode
|
||||
continue
|
||||
if not matches_by_function.get(ea, ()):
|
||||
# new function root
|
||||
matches_by_function[ea] = (CapaExplorerFunctionItem(self.root_node, ea, can_check=False), [])
|
||||
matches_by_function[ea] = (
|
||||
CapaExplorerFunctionItem(self.root_node, location, can_check=False),
|
||||
set(),
|
||||
)
|
||||
function_root, match_cache = matches_by_function[ea]
|
||||
if rule["meta"]["name"] in match_cache:
|
||||
if rule.meta.name in match_cache:
|
||||
# rule match already rendered for this function root, skip it
|
||||
continue
|
||||
match_cache.append(rule["meta"]["name"])
|
||||
match_cache.add(rule.meta.name)
|
||||
CapaExplorerRuleItem(
|
||||
function_root,
|
||||
rule["meta"]["name"],
|
||||
rule["meta"].get("namespace"),
|
||||
len(rule["matches"]),
|
||||
rule["source"],
|
||||
rule.meta.name,
|
||||
rule.meta.namespace or "",
|
||||
len(rule.matches),
|
||||
rule.source,
|
||||
can_check=False,
|
||||
)
|
||||
|
||||
def render_capa_doc_by_program(self, doc):
|
||||
def render_capa_doc_by_program(self, doc: rd.ResultDocument):
|
||||
""" """
|
||||
for rule in rutils.capability_rules(doc):
|
||||
rule_name = rule["meta"]["name"]
|
||||
rule_namespace = rule["meta"].get("namespace")
|
||||
parent = CapaExplorerRuleItem(
|
||||
self.root_node, rule_name, rule_namespace, len(rule["matches"]), rule["source"]
|
||||
)
|
||||
rule_name = rule.meta.name
|
||||
rule_namespace = rule.meta.namespace or ""
|
||||
parent = CapaExplorerRuleItem(self.root_node, rule_name, rule_namespace, len(rule.matches), rule.source)
|
||||
|
||||
for (location, match) in doc["rules"][rule["meta"]["name"]]["matches"].items():
|
||||
if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
|
||||
for (location_, match) in rule.matches:
|
||||
location = location_.to_capa()
|
||||
|
||||
parent2: CapaExplorerDataItem
|
||||
if rule.meta.scope == capa.rules.FILE_SCOPE:
|
||||
parent2 = parent
|
||||
elif rule["meta"]["scope"] == capa.rules.FUNCTION_SCOPE:
|
||||
elif rule.meta.scope == capa.rules.FUNCTION_SCOPE:
|
||||
parent2 = CapaExplorerFunctionItem(parent, location)
|
||||
elif rule["meta"]["scope"] == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
elif rule.meta.scope == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
parent2 = CapaExplorerBlockItem(parent, location)
|
||||
else:
|
||||
raise RuntimeError("unexpected rule scope: " + str(rule["meta"]["scope"]))
|
||||
raise RuntimeError("unexpected rule scope: " + str(rule.meta.scope))
|
||||
|
||||
self.render_capa_doc_match(parent2, match, doc)
|
||||
|
||||
def render_capa_doc(self, doc, by_function):
|
||||
def render_capa_doc(self, doc: rd.ResultDocument, by_function: bool):
|
||||
"""render capa features specified in doc
|
||||
|
||||
@param doc: capa result doc
|
||||
@@ -491,24 +511,32 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
# inform model changes have ended
|
||||
self.endResetModel()
|
||||
|
||||
def capa_doc_feature_to_display(self, feature):
|
||||
def capa_doc_feature_to_display(self, feature: frzf.Feature):
|
||||
"""convert capa doc feature type string to display string for ui
|
||||
|
||||
@param feature: capa feature read from doc
|
||||
"""
|
||||
key = feature["type"]
|
||||
value = feature[feature["type"]]
|
||||
key = feature.type
|
||||
value = getattr(feature, feature.type)
|
||||
|
||||
if value:
|
||||
if key == "string":
|
||||
if isinstance(feature, frzf.StringFeature):
|
||||
value = '"%s"' % capa.features.common.escape_string(value)
|
||||
if feature.get("description", ""):
|
||||
return "%s(%s = %s)" % (key, value, feature["description"])
|
||||
if feature.description:
|
||||
return "%s(%s = %s)" % (key, value, feature.description)
|
||||
else:
|
||||
return "%s(%s)" % (key, value)
|
||||
else:
|
||||
return "%s" % key
|
||||
|
||||
def render_capa_doc_feature_node(self, parent, feature, locations, doc):
|
||||
def render_capa_doc_feature_node(
|
||||
self,
|
||||
parent: CapaExplorerDataItem,
|
||||
match: rd.Match,
|
||||
feature: frzf.Feature,
|
||||
locations: List[Address],
|
||||
doc: rd.ResultDocument,
|
||||
):
|
||||
"""process capa doc feature node
|
||||
|
||||
@param parent: parent node to which child is assigned
|
||||
@@ -522,6 +550,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
# only one location for feature so no need to nest children
|
||||
parent2 = self.render_capa_doc_feature(
|
||||
parent,
|
||||
match,
|
||||
feature,
|
||||
next(iter(locations)),
|
||||
doc,
|
||||
@@ -532,11 +561,19 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
parent2 = CapaExplorerFeatureItem(parent, display)
|
||||
|
||||
for location in sorted(locations):
|
||||
self.render_capa_doc_feature(parent2, feature, location, doc)
|
||||
self.render_capa_doc_feature(parent2, match, feature, location, doc)
|
||||
|
||||
return parent2
|
||||
|
||||
def render_capa_doc_feature(self, parent, feature, location, doc, display="-"):
|
||||
def render_capa_doc_feature(
|
||||
self,
|
||||
parent: CapaExplorerDataItem,
|
||||
match: rd.Match,
|
||||
feature: frzf.Feature,
|
||||
location: Address,
|
||||
doc: rd.ResultDocument,
|
||||
display="-",
|
||||
):
|
||||
"""render capa feature read from doc
|
||||
|
||||
@param parent: parent node to which new child is assigned
|
||||
@@ -545,64 +582,83 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
@param location: address of feature
|
||||
@param display: text to display in plugin UI
|
||||
"""
|
||||
|
||||
# special handling for characteristic pending type
|
||||
if feature["type"] == "characteristic":
|
||||
if feature[feature["type"]] in ("embedded pe",):
|
||||
if isinstance(feature, frzf.CharacteristicFeature):
|
||||
characteristic = feature.characteristic
|
||||
if characteristic in ("embedded pe",):
|
||||
return CapaExplorerByteViewItem(parent, display, location)
|
||||
|
||||
if feature[feature["type"]] in ("loop", "recursive call", "tight loop"):
|
||||
if characteristic in ("loop", "recursive call", "tight loop"):
|
||||
return CapaExplorerFeatureItem(parent, display=display)
|
||||
|
||||
# default to instruction view for all other characteristics
|
||||
return CapaExplorerInstructionViewItem(parent, display, location)
|
||||
|
||||
if feature["type"] == "match":
|
||||
elif isinstance(feature, frzf.MatchFeature):
|
||||
# display content of rule for all rule matches
|
||||
return CapaExplorerRuleMatchItem(
|
||||
parent, display, source=doc["rules"].get(feature[feature["type"]], {}).get("source", "")
|
||||
)
|
||||
matched_rule_name = feature.match
|
||||
return CapaExplorerRuleMatchItem(parent, display, source=doc.rules[matched_rule_name].source)
|
||||
|
||||
if feature["type"] in ("regex", "substring"):
|
||||
for s, locations in feature["matches"].items():
|
||||
# wb: 614: substring feature?
|
||||
elif isinstance(feature, (frzf.RegexFeature, frzf.SubstringFeature)):
|
||||
for capture, locations in sorted(match.captures.items()):
|
||||
if location in locations:
|
||||
return CapaExplorerStringViewItem(
|
||||
parent, display, location, '"' + capa.features.common.escape_string(s) + '"'
|
||||
parent, display, location, '"' + capa.features.common.escape_string(capture) + '"'
|
||||
)
|
||||
|
||||
# programming error: the given location should always be found in the regex matches
|
||||
raise ValueError("regex match at location not found")
|
||||
|
||||
if feature["type"] == "basicblock":
|
||||
elif isinstance(feature, frzf.BasicBlockFeature):
|
||||
return CapaExplorerBlockItem(parent, location)
|
||||
|
||||
if feature["type"] in (
|
||||
"bytes",
|
||||
"api",
|
||||
"mnemonic",
|
||||
"number",
|
||||
"offset",
|
||||
elif isinstance(
|
||||
feature,
|
||||
(
|
||||
frzf.BytesFeature,
|
||||
frzf.APIFeature,
|
||||
frzf.MnemonicFeature,
|
||||
frzf.NumberFeature,
|
||||
frzf.OffsetFeature,
|
||||
),
|
||||
):
|
||||
# display instruction preview
|
||||
return CapaExplorerInstructionViewItem(parent, display, location)
|
||||
|
||||
if feature["type"] in ("section",):
|
||||
elif isinstance(feature, frzf.SectionFeature):
|
||||
# display byte preview
|
||||
return CapaExplorerByteViewItem(parent, display, location)
|
||||
|
||||
if feature["type"] in ("string",):
|
||||
elif isinstance(feature, frzf.StringFeature):
|
||||
# display string preview
|
||||
return CapaExplorerStringViewItem(
|
||||
parent, display, location, '"%s"' % capa.features.common.escape_string(feature[feature["type"]])
|
||||
parent, display, location, '"%s"' % capa.features.common.escape_string(feature.string)
|
||||
)
|
||||
|
||||
if feature["type"] in ("import", "export", "function-name"):
|
||||
elif isinstance(
|
||||
feature,
|
||||
(
|
||||
frzf.ImportFeature,
|
||||
frzf.ExportFeature,
|
||||
frzf.FunctionNameFeature,
|
||||
),
|
||||
):
|
||||
# display no preview
|
||||
return CapaExplorerFeatureItem(parent, location=location, display=display)
|
||||
|
||||
if feature["type"] in ("arch", "os", "format"):
|
||||
elif isinstance(
|
||||
feature,
|
||||
(
|
||||
frzf.ArchFeature,
|
||||
frzf.OSFeature,
|
||||
frzf.FormatFeature,
|
||||
),
|
||||
):
|
||||
return CapaExplorerFeatureItem(parent, display=display)
|
||||
|
||||
raise RuntimeError("unexpected feature type: " + str(feature["type"]))
|
||||
raise RuntimeError("unexpected feature type: " + str(feature.type))
|
||||
|
||||
def update_function_name(self, old_name, new_name):
|
||||
"""update all instances of old function name with new function name
|
||||
|
||||
+37
-34
@@ -18,6 +18,7 @@ import capa.ida.helpers
|
||||
import capa.features.common
|
||||
import capa.features.basicblock
|
||||
from capa.ida.plugin.item import CapaExplorerFunctionItem
|
||||
from capa.features.address import NO_ADDRESS
|
||||
from capa.ida.plugin.model import CapaExplorerDataModel
|
||||
|
||||
MAX_SECTION_SIZE = 750
|
||||
@@ -172,13 +173,13 @@ def resize_columns_to_content(header):
|
||||
header.resizeSection(0, MAX_SECTION_SIZE)
|
||||
|
||||
|
||||
class CapaExplorerRulgenPreview(QtWidgets.QTextEdit):
|
||||
class CapaExplorerRulegenPreview(QtWidgets.QTextEdit):
|
||||
|
||||
INDENT = " " * 2
|
||||
|
||||
def __init__(self, parent=None):
|
||||
""" """
|
||||
super(CapaExplorerRulgenPreview, self).__init__(parent)
|
||||
super(CapaExplorerRulegenPreview, self).__init__(parent)
|
||||
|
||||
self.setFont(QtGui.QFont("Courier", weight=QtGui.QFont.Bold))
|
||||
self.setLineWrapMode(QtWidgets.QTextEdit.NoWrap)
|
||||
@@ -283,7 +284,7 @@ class CapaExplorerRulgenPreview(QtWidgets.QTextEdit):
|
||||
self.set_selection(select_start_ppos, select_end_ppos, len(self.toPlainText()))
|
||||
self.verticalScrollBar().setSliderPosition(scroll_ppos)
|
||||
else:
|
||||
super(CapaExplorerRulgenPreview, self).keyPressEvent(e)
|
||||
super(CapaExplorerRulegenPreview, self).keyPressEvent(e)
|
||||
|
||||
def count_previous_lines_from_block(self, block):
|
||||
"""calculate number of lines preceding block"""
|
||||
@@ -303,13 +304,13 @@ class CapaExplorerRulgenPreview(QtWidgets.QTextEdit):
|
||||
self.setTextCursor(cursor)
|
||||
|
||||
|
||||
class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget):
|
||||
|
||||
updated = QtCore.pyqtSignal()
|
||||
|
||||
def __init__(self, preview, parent=None):
|
||||
""" """
|
||||
super(CapaExplorerRulgenEditor, self).__init__(parent)
|
||||
super(CapaExplorerRulegenEditor, self).__init__(parent)
|
||||
|
||||
self.preview = preview
|
||||
|
||||
@@ -373,18 +374,18 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
|
||||
def dragMoveEvent(self, e):
|
||||
""" """
|
||||
super(CapaExplorerRulgenEditor, self).dragMoveEvent(e)
|
||||
super(CapaExplorerRulegenEditor, self).dragMoveEvent(e)
|
||||
|
||||
def dragEventEnter(self, e):
|
||||
""" """
|
||||
super(CapaExplorerRulgenEditor, self).dragEventEnter(e)
|
||||
super(CapaExplorerRulegenEditor, self).dragEventEnter(e)
|
||||
|
||||
def dropEvent(self, e):
|
||||
""" """
|
||||
if not self.indexAt(e.pos()).isValid():
|
||||
return
|
||||
|
||||
super(CapaExplorerRulgenEditor, self).dropEvent(e)
|
||||
super(CapaExplorerRulegenEditor, self).dropEvent(e)
|
||||
|
||||
self.update_preview()
|
||||
expand_tree(self.invisibleRootItem())
|
||||
@@ -438,7 +439,7 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
""" """
|
||||
expression, o = action.data()
|
||||
if "basic block" in expression and "basic block" not in o.text(
|
||||
CapaExplorerRulgenEditor.get_column_feature_index()
|
||||
CapaExplorerRulegenEditor.get_column_feature_index()
|
||||
):
|
||||
# current expression is "basic block", and not changing to "basic block" expression
|
||||
children = o.takeChildren()
|
||||
@@ -446,7 +447,7 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
for child in children:
|
||||
new_parent.addChild(child)
|
||||
new_parent.setExpanded(True)
|
||||
o.setText(CapaExplorerRulgenEditor.get_column_feature_index(), expression)
|
||||
o.setText(CapaExplorerRulegenEditor.get_column_feature_index(), expression)
|
||||
|
||||
def slot_clear_all(self, action):
|
||||
""" """
|
||||
@@ -457,7 +458,7 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
if not self.indexAt(pos).isValid():
|
||||
# user selected invalid index
|
||||
self.load_custom_context_menu_invalid_index(pos)
|
||||
elif self.itemAt(pos).capa_type == CapaExplorerRulgenEditor.get_node_type_expression():
|
||||
elif self.itemAt(pos).capa_type == CapaExplorerRulegenEditor.get_node_type_expression():
|
||||
# user selected expression node
|
||||
self.load_custom_context_menu_expression(pos)
|
||||
else:
|
||||
@@ -469,8 +470,8 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
def slot_item_double_clicked(self, o, column):
|
||||
""" """
|
||||
if column in (
|
||||
CapaExplorerRulgenEditor.get_column_comment_index(),
|
||||
CapaExplorerRulgenEditor.get_column_description_index(),
|
||||
CapaExplorerRulegenEditor.get_column_comment_index(),
|
||||
CapaExplorerRulegenEditor.get_column_description_index(),
|
||||
):
|
||||
o.setFlags(o.flags() | QtCore.Qt.ItemIsEditable)
|
||||
self.editItem(o, column)
|
||||
@@ -556,7 +557,7 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
font = QtGui.QFont()
|
||||
font.setBold(True)
|
||||
|
||||
o.setFont(CapaExplorerRulgenEditor.get_column_feature_index(), font)
|
||||
o.setFont(CapaExplorerRulegenEditor.get_column_feature_index(), font)
|
||||
|
||||
def style_feature_node(self, o):
|
||||
""" """
|
||||
@@ -567,8 +568,8 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
font.setWeight(QtGui.QFont.Medium)
|
||||
brush.setColor(QtGui.QColor(*COLOR_GREEN_RGB))
|
||||
|
||||
o.setFont(CapaExplorerRulgenEditor.get_column_feature_index(), font)
|
||||
o.setForeground(CapaExplorerRulgenEditor.get_column_feature_index(), brush)
|
||||
o.setFont(CapaExplorerRulegenEditor.get_column_feature_index(), font)
|
||||
o.setForeground(CapaExplorerRulegenEditor.get_column_feature_index(), brush)
|
||||
|
||||
def style_comment_node(self, o):
|
||||
""" """
|
||||
@@ -576,22 +577,22 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
font.setBold(True)
|
||||
font.setFamily("Courier")
|
||||
|
||||
o.setFont(CapaExplorerRulgenEditor.get_column_feature_index(), font)
|
||||
o.setFont(CapaExplorerRulegenEditor.get_column_feature_index(), font)
|
||||
|
||||
def set_expression_node(self, o):
|
||||
""" """
|
||||
setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_expression())
|
||||
setattr(o, "capa_type", CapaExplorerRulegenEditor.get_node_type_expression())
|
||||
self.style_expression_node(o)
|
||||
|
||||
def set_feature_node(self, o):
|
||||
""" """
|
||||
setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_feature())
|
||||
setattr(o, "capa_type", CapaExplorerRulegenEditor.get_node_type_feature())
|
||||
o.setFlags(o.flags() & ~QtCore.Qt.ItemIsDropEnabled)
|
||||
self.style_feature_node(o)
|
||||
|
||||
def set_comment_node(self, o):
|
||||
""" """
|
||||
setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_comment())
|
||||
setattr(o, "capa_type", CapaExplorerRulegenEditor.get_node_type_comment())
|
||||
o.setFlags(o.flags() & ~QtCore.Qt.ItemIsDropEnabled)
|
||||
|
||||
self.style_comment_node(o)
|
||||
@@ -693,11 +694,11 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
|
||||
# we need to set our own type so we can control the GUI accordingly
|
||||
if feature.startswith(("- and:", "- or:", "- not:", "- basic block:", "- optional:")):
|
||||
setattr(node, "capa_type", CapaExplorerRulgenEditor.get_node_type_expression())
|
||||
setattr(node, "capa_type", CapaExplorerRulegenEditor.get_node_type_expression())
|
||||
elif feature.startswith("#"):
|
||||
setattr(node, "capa_type", CapaExplorerRulgenEditor.get_node_type_comment())
|
||||
setattr(node, "capa_type", CapaExplorerRulegenEditor.get_node_type_comment())
|
||||
else:
|
||||
setattr(node, "capa_type", CapaExplorerRulgenEditor.get_node_type_feature())
|
||||
setattr(node, "capa_type", CapaExplorerRulegenEditor.get_node_type_feature())
|
||||
|
||||
# format the node based on its type
|
||||
(self.set_expression_node, self.set_feature_node, self.set_comment_node)[node.capa_type](node)
|
||||
@@ -759,7 +760,7 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
""" """
|
||||
for feature in filter(
|
||||
lambda o: o.capa_type
|
||||
in (CapaExplorerRulgenEditor.get_node_type_feature(), CapaExplorerRulgenEditor.get_node_type_comment()),
|
||||
in (CapaExplorerRulegenEditor.get_node_type_feature(), CapaExplorerRulegenEditor.get_node_type_comment()),
|
||||
tuple(iterate_tree(self)),
|
||||
):
|
||||
if feature in ignore:
|
||||
@@ -771,7 +772,7 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
def get_expressions(self, selected=False, ignore=()):
|
||||
""" """
|
||||
for expression in filter(
|
||||
lambda o: o.capa_type == CapaExplorerRulgenEditor.get_node_type_expression(), tuple(iterate_tree(self))
|
||||
lambda o: o.capa_type == CapaExplorerRulegenEditor.get_node_type_expression(), tuple(iterate_tree(self))
|
||||
):
|
||||
if expression in ignore:
|
||||
continue
|
||||
@@ -1011,6 +1012,8 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
self.parent_items = {}
|
||||
|
||||
def format_address(e):
|
||||
if e == NO_ADDRESS:
|
||||
return ""
|
||||
return "%X" % e if e else ""
|
||||
|
||||
def format_feature(feature):
|
||||
@@ -1021,7 +1024,7 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
value = '"%s"' % capa.features.common.escape_string(value)
|
||||
return "%s(%s)" % (name, value)
|
||||
|
||||
for (feature, eas) in sorted(features.items(), key=lambda k: sorted(k[1])):
|
||||
for (feature, addrs) in sorted(features.items(), key=lambda k: sorted(k[1])):
|
||||
if isinstance(feature, capa.features.basicblock.BasicBlock):
|
||||
# filter basic blocks for now, we may want to add these back in some time
|
||||
# in the future
|
||||
@@ -1033,7 +1036,7 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
|
||||
# level 1
|
||||
if feature not in self.parent_items:
|
||||
if len(eas) > 1:
|
||||
if len(addrs) > 1:
|
||||
self.parent_items[feature] = self.new_parent_node(
|
||||
self.parent_items[type(feature)], (format_feature(feature),), feature=feature
|
||||
)
|
||||
@@ -1043,18 +1046,18 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
)
|
||||
|
||||
# level n > 1
|
||||
if len(eas) > 1:
|
||||
for ea in sorted(eas):
|
||||
if len(addrs) > 1:
|
||||
for addr in sorted(addrs):
|
||||
self.new_leaf_node(
|
||||
self.parent_items[feature], (format_feature(feature), format_address(ea)), feature=feature
|
||||
self.parent_items[feature], (format_feature(feature), format_address(addr)), feature=feature
|
||||
)
|
||||
else:
|
||||
if eas:
|
||||
ea = eas.pop()
|
||||
if addrs:
|
||||
addr = addrs.pop()
|
||||
else:
|
||||
# some features may not have an address e.g. "format"
|
||||
ea = ""
|
||||
for (i, v) in enumerate((format_feature(feature), format_address(ea))):
|
||||
addr = ""
|
||||
for (i, v) in enumerate((format_feature(feature), format_address(addr))):
|
||||
self.parent_items[feature].setText(i, v)
|
||||
self.parent_items[feature].setData(0, 0x100, feature)
|
||||
|
||||
|
||||
+38
-32
@@ -41,8 +41,10 @@ import capa.render.vverbose
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.pefile
|
||||
import capa.features.extractors.dnfile_
|
||||
import capa.features.extractors.elffile
|
||||
import capa.features.extractors.dotnetfile
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.rules import Rule, Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.helpers import (
|
||||
@@ -63,6 +65,7 @@ from capa.features.common import (
|
||||
FORMAT_DOTNET,
|
||||
FORMAT_FREEZE,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
|
||||
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
|
||||
@@ -113,18 +116,18 @@ def find_instruction_capabilities(
|
||||
# all features found for the instruction.
|
||||
features = collections.defaultdict(set) # type: FeatureSet
|
||||
|
||||
for feature, va in itertools.chain(
|
||||
for feature, addr in itertools.chain(
|
||||
extractor.extract_insn_features(f, bb, insn), extractor.extract_global_features()
|
||||
):
|
||||
features[feature].add(va)
|
||||
features[feature].add(addr)
|
||||
|
||||
# matches found at this instruction.
|
||||
_, matches = ruleset.match(Scope.INSTRUCTION, features, int(insn))
|
||||
_, matches = ruleset.match(Scope.INSTRUCTION, features, insn.address)
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
rule = ruleset[rule_name]
|
||||
for va, _ in res:
|
||||
capa.engine.index_rule_matches(features, rule, [va])
|
||||
for addr, _ in res:
|
||||
capa.engine.index_rule_matches(features, rule, [addr])
|
||||
|
||||
return features, matches
|
||||
|
||||
@@ -159,7 +162,7 @@ def find_basic_block_capabilities(
|
||||
features[feature].add(va)
|
||||
|
||||
# matches found within this basic block.
|
||||
_, matches = ruleset.match(Scope.BASIC_BLOCK, features, int(bb))
|
||||
_, matches = ruleset.match(Scope.BASIC_BLOCK, features, bb.address)
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
rule = ruleset[rule_name]
|
||||
@@ -170,7 +173,7 @@ def find_basic_block_capabilities(
|
||||
|
||||
|
||||
def find_code_capabilities(
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle
|
||||
ruleset: RuleSet, extractor: FeatureExtractor, fh: FunctionHandle
|
||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||
"""
|
||||
find matches for the given rules within the given function.
|
||||
@@ -189,8 +192,8 @@ def find_code_capabilities(
|
||||
# might be found at different instructions, thats ok.
|
||||
insn_matches = collections.defaultdict(list) # type: MatchResults
|
||||
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
features, bmatches, imatches = find_basic_block_capabilities(ruleset, extractor, f, bb)
|
||||
for bb in extractor.get_basic_blocks(fh):
|
||||
features, bmatches, imatches = find_basic_block_capabilities(ruleset, extractor, fh, bb)
|
||||
for feature, vas in features.items():
|
||||
function_features[feature].update(vas)
|
||||
|
||||
@@ -200,10 +203,10 @@ def find_code_capabilities(
|
||||
for rule_name, res in imatches.items():
|
||||
insn_matches[rule_name].extend(res)
|
||||
|
||||
for feature, va in itertools.chain(extractor.extract_function_features(f), extractor.extract_global_features()):
|
||||
for feature, va in itertools.chain(extractor.extract_function_features(fh), extractor.extract_global_features()):
|
||||
function_features[feature].add(va)
|
||||
|
||||
_, function_matches = ruleset.match(Scope.FUNCTION, function_features, int(f))
|
||||
_, function_matches = ruleset.match(Scope.FUNCTION, function_features, fh.address)
|
||||
return function_matches, bb_matches, insn_matches, len(function_features)
|
||||
|
||||
|
||||
@@ -224,7 +227,7 @@ def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, functi
|
||||
|
||||
file_features.update(function_features)
|
||||
|
||||
_, matches = ruleset.match(Scope.FILE, file_features, 0x0)
|
||||
_, matches = ruleset.match(Scope.FILE, file_features, NO_ADDRESS)
|
||||
return matches, len(file_features)
|
||||
|
||||
|
||||
@@ -252,12 +255,10 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
|
||||
|
||||
pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions")
|
||||
for f in pb:
|
||||
function_address = int(f)
|
||||
|
||||
if extractor.is_library_function(function_address):
|
||||
function_name = extractor.get_function_name(function_address)
|
||||
logger.debug("skipping library function 0x%x (%s)", function_address, function_name)
|
||||
meta["library_functions"][function_address] = function_name
|
||||
if extractor.is_library_function(f.address):
|
||||
function_name = extractor.get_function_name(f.address)
|
||||
logger.debug("skipping library function 0x%x (%s)", f.address, function_name)
|
||||
meta["library_functions"][f.address] = function_name
|
||||
n_libs = len(meta["library_functions"])
|
||||
percentage = 100 * (n_libs / n_funcs)
|
||||
if isinstance(pb, tqdm.tqdm):
|
||||
@@ -265,8 +266,8 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
|
||||
continue
|
||||
|
||||
function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(ruleset, extractor, f)
|
||||
meta["feature_counts"]["functions"][function_address] = feature_count
|
||||
logger.debug("analyzed function 0x%x and extracted %d features", function_address, feature_count)
|
||||
meta["feature_counts"]["functions"][f.address] = feature_count
|
||||
logger.debug("analyzed function 0x%x and extracted %d features", f.address, feature_count)
|
||||
|
||||
for rule_name, res in function_matches.items():
|
||||
all_function_matches[rule_name].extend(res)
|
||||
@@ -549,9 +550,9 @@ def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
|
||||
if format_ == capa.features.extractors.common.FORMAT_PE:
|
||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
|
||||
|
||||
dotnetfile_extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(sample)
|
||||
if dotnetfile_extractor.is_dotnet_file():
|
||||
file_extractors.append(dotnetfile_extractor)
|
||||
dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)
|
||||
if dnfile_extractor.is_dotnet_file():
|
||||
file_extractors.append(dnfile_extractor)
|
||||
|
||||
elif format_ == capa.features.extractors.common.FORMAT_ELF:
|
||||
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
|
||||
@@ -652,7 +653,12 @@ def get_signatures(sigs_path):
|
||||
return paths
|
||||
|
||||
|
||||
def collect_metadata(argv, sample_path, rules_path, extractor):
|
||||
def collect_metadata(
|
||||
argv: List[str],
|
||||
sample_path: str,
|
||||
rules_path: List[str],
|
||||
extractor: capa.features.extractors.base_extractor.FeatureExtractor,
|
||||
):
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
sha256 = hashlib.sha256()
|
||||
@@ -710,10 +716,10 @@ def compute_layout(rules, extractor, capabilities):
|
||||
functions_by_bb = {}
|
||||
bbs_by_function = {}
|
||||
for f in extractor.get_functions():
|
||||
bbs_by_function[int(f)] = []
|
||||
bbs_by_function[f.address] = []
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
functions_by_bb[int(bb)] = int(f)
|
||||
bbs_by_function[int(f)].append(int(bb))
|
||||
functions_by_bb[bb.address] = f.address
|
||||
bbs_by_function[f.address].append(bb.address)
|
||||
|
||||
matched_bbs = set()
|
||||
for rule_name, matches in capabilities.items():
|
||||
@@ -1013,7 +1019,7 @@ def main(argv=None):
|
||||
# during the load of the RuleSet, we extract subscope statements into their own rules
|
||||
# that are subsequently `match`ed upon. this inflates the total rule count.
|
||||
# so, filter out the subscope rules when reporting total number of loaded rules.
|
||||
len([i for i in filter(lambda r: "capa/subscope-rule" not in r.meta, rules.rules.values())]),
|
||||
len([i for i in filter(lambda r: not r.is_subscope_rule(), rules.rules.values())]),
|
||||
)
|
||||
if args.tag:
|
||||
rules = rules.filter_rules_by_meta(args.tag)
|
||||
@@ -1051,6 +1057,9 @@ def main(argv=None):
|
||||
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
|
||||
return E_CORRUPT_FILE
|
||||
|
||||
if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor):
|
||||
format_ = FORMAT_DOTNET
|
||||
|
||||
# file limitations that rely on non-file scope won't be detected here.
|
||||
# nor on FunctionName features, because pefile doesn't support this.
|
||||
if has_file_limitation(rules, pure_file_capabilities):
|
||||
@@ -1060,9 +1069,6 @@ def main(argv=None):
|
||||
logger.debug("file limitation short circuit, won't analyze fully.")
|
||||
return E_FILE_LIMITATION
|
||||
|
||||
if isinstance(file_extractor, capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor):
|
||||
format_ = FORMAT_DOTNET
|
||||
|
||||
if format_ == FORMAT_FREEZE:
|
||||
with open(args.sample, "rb") as f:
|
||||
extractor = capa.features.freeze.load(f.read())
|
||||
@@ -1147,7 +1153,7 @@ def ida_main():
|
||||
rules = get_rules(rules_path)
|
||||
rules = capa.rules.RuleSet(rules)
|
||||
|
||||
meta = capa.ida.helpers.collect_metadata()
|
||||
meta = capa.ida.helpers.collect_metadata([rules_path])
|
||||
|
||||
capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.extractor.IdaFeatureExtractor())
|
||||
meta["analysis"].update(counts)
|
||||
|
||||
+36
-41
@@ -11,7 +11,9 @@ import collections
|
||||
import tabulate
|
||||
|
||||
import capa.render.utils as rutils
|
||||
import capa.render.result_document
|
||||
import capa.features.freeze as frz
|
||||
import capa.render.result_document as rd
|
||||
import capa.features.freeze.features as frzf
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.render.utils import StringIO
|
||||
@@ -27,50 +29,49 @@ def width(s: str, character_count: int) -> str:
|
||||
return s
|
||||
|
||||
|
||||
def render_meta(doc, ostream: StringIO):
|
||||
def render_meta(doc: rd.ResultDocument, ostream: StringIO):
|
||||
rows = [
|
||||
(width("md5", 22), width(doc["meta"]["sample"]["md5"], 82)),
|
||||
("sha1", doc["meta"]["sample"]["sha1"]),
|
||||
("sha256", doc["meta"]["sample"]["sha256"]),
|
||||
("os", doc["meta"]["analysis"]["os"]),
|
||||
("format", doc["meta"]["analysis"]["format"]),
|
||||
("arch", doc["meta"]["analysis"]["arch"]),
|
||||
("path", doc["meta"]["sample"]["path"]),
|
||||
(width("md5", 22), width(doc.meta.sample.md5, 82)),
|
||||
("sha1", doc.meta.sample.sha1),
|
||||
("sha256", doc.meta.sample.sha256),
|
||||
("os", doc.meta.analysis.os),
|
||||
("format", doc.meta.analysis.format),
|
||||
("arch", doc.meta.analysis.arch),
|
||||
("path", doc.meta.sample.path),
|
||||
]
|
||||
|
||||
ostream.write(tabulate.tabulate(rows, tablefmt="psql"))
|
||||
ostream.write("\n")
|
||||
|
||||
|
||||
def find_subrule_matches(doc):
|
||||
def find_subrule_matches(doc: rd.ResultDocument):
|
||||
"""
|
||||
collect the rule names that have been matched as a subrule match.
|
||||
this way we can avoid displaying entries for things that are too specific.
|
||||
"""
|
||||
matches = set([])
|
||||
|
||||
def rec(node):
|
||||
if not node["success"]:
|
||||
def rec(match: rd.Match):
|
||||
if not match.success:
|
||||
# there's probably a bug here for rules that do `not: match: ...`
|
||||
# but we don't have any examples of this yet
|
||||
return
|
||||
|
||||
elif node["node"]["type"] == "statement":
|
||||
for child in node["children"]:
|
||||
elif isinstance(match.node, rd.StatementNode):
|
||||
for child in match.children:
|
||||
rec(child)
|
||||
|
||||
elif node["node"]["type"] == "feature":
|
||||
if node["node"]["feature"]["type"] == "match":
|
||||
matches.add(node["node"]["feature"]["match"])
|
||||
elif isinstance(match.node, rd.FeatureNode) and isinstance(match.node.feature, frzf.MatchFeature):
|
||||
matches.add(match.node.feature.match)
|
||||
|
||||
for rule in rutils.capability_rules(doc):
|
||||
for node in rule["matches"].values():
|
||||
rec(node)
|
||||
for address, match in rule.matches:
|
||||
rec(match)
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
def render_capabilities(doc, ostream: StringIO):
|
||||
def render_capabilities(doc: rd.ResultDocument, ostream: StringIO):
|
||||
"""
|
||||
example::
|
||||
|
||||
@@ -86,18 +87,18 @@ def render_capabilities(doc, ostream: StringIO):
|
||||
|
||||
rows = []
|
||||
for rule in rutils.capability_rules(doc):
|
||||
if rule["meta"]["name"] in subrule_matches:
|
||||
if rule.meta.name in subrule_matches:
|
||||
# rules that are also matched by other rules should not get rendered by default.
|
||||
# this cuts down on the amount of output while giving approx the same detail.
|
||||
# see #224
|
||||
continue
|
||||
|
||||
count = len(rule["matches"])
|
||||
count = len(rule.matches)
|
||||
if count == 1:
|
||||
capability = rutils.bold(rule["meta"]["name"])
|
||||
capability = rutils.bold(rule.meta.name)
|
||||
else:
|
||||
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
|
||||
rows.append((capability, rule["meta"]["namespace"]))
|
||||
capability = "%s (%d matches)" % (rutils.bold(rule.meta.name), count)
|
||||
rows.append((capability, rule.meta.namespace))
|
||||
|
||||
if rows:
|
||||
ostream.write(
|
||||
@@ -108,7 +109,7 @@ def render_capabilities(doc, ostream: StringIO):
|
||||
ostream.writeln(rutils.bold("no capabilities found"))
|
||||
|
||||
|
||||
def render_attack(doc, ostream: StringIO):
|
||||
def render_attack(doc: rd.ResultDocument, ostream: StringIO):
|
||||
"""
|
||||
example::
|
||||
|
||||
@@ -126,17 +127,14 @@ def render_attack(doc, ostream: StringIO):
|
||||
"""
|
||||
tactics = collections.defaultdict(set)
|
||||
for rule in rutils.capability_rules(doc):
|
||||
if not rule["meta"].get("att&ck"):
|
||||
continue
|
||||
|
||||
for attack in rule["meta"]["att&ck"]:
|
||||
tactics[attack["tactic"]].add((attack["technique"], attack.get("subtechnique"), attack["id"]))
|
||||
for attack in rule.meta.attack:
|
||||
tactics[attack.tactic].add((attack.technique, attack.subtechnique, attack.id))
|
||||
|
||||
rows = []
|
||||
for tactic, techniques in sorted(tactics.items()):
|
||||
inner_rows = []
|
||||
for (technique, subtechnique, id) in sorted(techniques):
|
||||
if subtechnique is None:
|
||||
if not subtechnique:
|
||||
inner_rows.append("%s %s" % (rutils.bold(technique), id))
|
||||
else:
|
||||
inner_rows.append("%s::%s %s" % (rutils.bold(technique), subtechnique, id))
|
||||
@@ -156,7 +154,7 @@ def render_attack(doc, ostream: StringIO):
|
||||
ostream.write("\n")
|
||||
|
||||
|
||||
def render_mbc(doc, ostream: StringIO):
|
||||
def render_mbc(doc: rd.ResultDocument, ostream: StringIO):
|
||||
"""
|
||||
example::
|
||||
|
||||
@@ -172,17 +170,14 @@ def render_mbc(doc, ostream: StringIO):
|
||||
"""
|
||||
objectives = collections.defaultdict(set)
|
||||
for rule in rutils.capability_rules(doc):
|
||||
if not rule["meta"].get("mbc"):
|
||||
continue
|
||||
|
||||
for mbc in rule["meta"]["mbc"]:
|
||||
objectives[mbc["objective"]].add((mbc["behavior"], mbc.get("method"), mbc["id"]))
|
||||
for mbc in rule.meta.mbc:
|
||||
objectives[mbc.objective].add((mbc.behavior, mbc.method, mbc.id))
|
||||
|
||||
rows = []
|
||||
for objective, behaviors in sorted(objectives.items()):
|
||||
inner_rows = []
|
||||
for (behavior, method, id) in sorted(behaviors):
|
||||
if method is None:
|
||||
if not method:
|
||||
inner_rows.append("%s [%s]" % (rutils.bold(behavior), id))
|
||||
else:
|
||||
inner_rows.append("%s::%s [%s]" % (rutils.bold(behavior), method, id))
|
||||
@@ -200,7 +195,7 @@ def render_mbc(doc, ostream: StringIO):
|
||||
ostream.write("\n")
|
||||
|
||||
|
||||
def render_default(doc):
|
||||
def render_default(doc: rd.ResultDocument):
|
||||
ostream = rutils.StringIO()
|
||||
|
||||
render_meta(doc, ostream)
|
||||
@@ -215,5 +210,5 @@ def render_default(doc):
|
||||
|
||||
|
||||
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
|
||||
doc = capa.render.result_document.convert_capabilities_to_result_document(meta, rules, capabilities)
|
||||
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
|
||||
return render_default(doc)
|
||||
|
||||
+2
-21
@@ -5,29 +5,10 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import json
|
||||
|
||||
import capa.render.result_document as rd
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.render.result_document import convert_capabilities_to_result_document
|
||||
|
||||
|
||||
class CapaJsonObjectEncoder(json.JSONEncoder):
|
||||
"""JSON encoder that emits Python sets as sorted lists"""
|
||||
|
||||
def default(self, obj):
|
||||
if isinstance(obj, (list, dict, int, float, bool, type(None))) or isinstance(obj, str):
|
||||
return json.JSONEncoder.default(self, obj)
|
||||
elif isinstance(obj, set):
|
||||
return list(sorted(obj))
|
||||
else:
|
||||
# probably will TypeError
|
||||
return json.JSONEncoder.default(self, obj)
|
||||
|
||||
|
||||
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
|
||||
return json.dumps(
|
||||
convert_capabilities_to_result_document(meta, rules, capabilities),
|
||||
cls=CapaJsonObjectEncoder,
|
||||
sort_keys=True,
|
||||
)
|
||||
return rd.ResultDocument.from_capa(meta, rules, capabilities).json(exclude_none=True)
|
||||
|
||||
+523
-294
@@ -5,324 +5,553 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import copy
|
||||
import datetime
|
||||
from typing import Any, Dict, Tuple, Union, Optional
|
||||
|
||||
from pydantic import Field, BaseModel
|
||||
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
import capa.render.utils
|
||||
import capa.features.common
|
||||
import capa.features.freeze as frz
|
||||
import capa.features.address
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.helpers import assert_never
|
||||
|
||||
|
||||
def convert_statement_to_result_document(statement):
|
||||
"""
|
||||
"statement": {
|
||||
"type": "or"
|
||||
},
|
||||
|
||||
"statement": {
|
||||
"max": 9223372036854775808,
|
||||
"min": 2,
|
||||
"type": "range"
|
||||
},
|
||||
"""
|
||||
statement_type = statement.name.lower()
|
||||
result = {"type": statement_type}
|
||||
if statement.description:
|
||||
result["description"] = statement.description
|
||||
|
||||
if statement_type == "some" and statement.count == 0:
|
||||
result["type"] = "optional"
|
||||
elif statement_type == "some":
|
||||
result["count"] = statement.count
|
||||
elif statement_type == "range":
|
||||
result["min"] = statement.min
|
||||
result["max"] = statement.max
|
||||
result["child"] = convert_feature_to_result_document(statement.child)
|
||||
elif statement_type == "subscope":
|
||||
result["subscope"] = statement.scope
|
||||
|
||||
return result
|
||||
class FrozenModel(BaseModel):
|
||||
class Config:
|
||||
frozen = True
|
||||
|
||||
|
||||
def convert_feature_to_result_document(feature):
|
||||
"""
|
||||
"feature": {
|
||||
"number": 6,
|
||||
"type": "number"
|
||||
},
|
||||
|
||||
"feature": {
|
||||
"api": "ws2_32.WSASocket",
|
||||
"type": "api"
|
||||
},
|
||||
|
||||
"feature": {
|
||||
"match": "create TCP socket",
|
||||
"type": "match"
|
||||
},
|
||||
|
||||
"feature": {
|
||||
"characteristic": [
|
||||
"loop",
|
||||
true
|
||||
],
|
||||
"type": "characteristic"
|
||||
},
|
||||
"""
|
||||
result = {"type": feature.name, feature.name: feature.get_value_str()}
|
||||
if feature.description:
|
||||
result["description"] = feature.description
|
||||
if feature.name in ("regex", "substring"):
|
||||
result["matches"] = feature.matches
|
||||
return result
|
||||
class Sample(FrozenModel):
|
||||
md5: str
|
||||
sha1: str
|
||||
sha256: str
|
||||
path: str
|
||||
|
||||
|
||||
def convert_node_to_result_document(node):
|
||||
"""
|
||||
"node": {
|
||||
"type": "statement",
|
||||
"statement": { ... }
|
||||
},
|
||||
class BasicBlockLayout(FrozenModel):
|
||||
address: frz.Address
|
||||
|
||||
"node": {
|
||||
"type": "feature",
|
||||
"feature": { ... }
|
||||
},
|
||||
"""
|
||||
|
||||
if isinstance(node, capa.engine.Statement):
|
||||
return {
|
||||
"type": "statement",
|
||||
"statement": convert_statement_to_result_document(node),
|
||||
}
|
||||
elif isinstance(node, capa.features.common.Feature):
|
||||
return {
|
||||
"type": "feature",
|
||||
"feature": convert_feature_to_result_document(node),
|
||||
}
|
||||
class FunctionLayout(FrozenModel):
|
||||
address: frz.Address
|
||||
matched_basic_blocks: Tuple[BasicBlockLayout, ...]
|
||||
|
||||
|
||||
class Layout(FrozenModel):
|
||||
functions: Tuple[FunctionLayout, ...]
|
||||
|
||||
|
||||
class LibraryFunction(FrozenModel):
|
||||
address: frz.Address
|
||||
name: str
|
||||
|
||||
|
||||
class FunctionFeatureCount(FrozenModel):
|
||||
address: frz.Address
|
||||
count: int
|
||||
|
||||
|
||||
class FeatureCounts(FrozenModel):
|
||||
file: int
|
||||
functions: Tuple[FunctionFeatureCount, ...]
|
||||
|
||||
|
||||
class Analysis(FrozenModel):
|
||||
format: str
|
||||
arch: str
|
||||
os: str
|
||||
extractor: str
|
||||
rules: Tuple[str, ...]
|
||||
base_address: frz.Address
|
||||
layout: Layout
|
||||
feature_counts: FeatureCounts
|
||||
library_functions: Tuple[LibraryFunction, ...]
|
||||
|
||||
|
||||
class Metadata(FrozenModel):
|
||||
timestamp: datetime.datetime
|
||||
version: str
|
||||
argv: Tuple[str, ...]
|
||||
sample: Sample
|
||||
analysis: Analysis
|
||||
|
||||
@classmethod
|
||||
def from_capa(cls, meta: Any) -> "Metadata":
|
||||
return cls(
|
||||
timestamp=meta["timestamp"],
|
||||
version=meta["version"],
|
||||
argv=meta["argv"] if "argv" in meta else None,
|
||||
sample=Sample(
|
||||
md5=meta["sample"]["md5"],
|
||||
sha1=meta["sample"]["sha1"],
|
||||
sha256=meta["sample"]["sha256"],
|
||||
path=meta["sample"]["path"],
|
||||
),
|
||||
analysis=Analysis(
|
||||
format=meta["analysis"]["format"],
|
||||
arch=meta["analysis"]["arch"],
|
||||
os=meta["analysis"]["os"],
|
||||
extractor=meta["analysis"]["extractor"],
|
||||
rules=meta["analysis"]["rules"],
|
||||
base_address=frz.Address.from_capa(meta["analysis"]["base_address"]),
|
||||
layout=Layout(
|
||||
functions=[
|
||||
FunctionLayout(
|
||||
address=frz.Address.from_capa(address),
|
||||
matched_basic_blocks=[
|
||||
BasicBlockLayout(address=frz.Address.from_capa(bb)) for bb in f["matched_basic_blocks"]
|
||||
],
|
||||
)
|
||||
for address, f in meta["analysis"]["layout"]["functions"].items()
|
||||
]
|
||||
),
|
||||
feature_counts=FeatureCounts(
|
||||
file=meta["analysis"]["feature_counts"]["file"],
|
||||
functions=[
|
||||
FunctionFeatureCount(address=frz.Address.from_capa(address), count=count)
|
||||
for address, count in meta["analysis"]["feature_counts"]["functions"].items()
|
||||
],
|
||||
),
|
||||
library_functions=[
|
||||
LibraryFunction(address=frz.Address.from_capa(address), name=name)
|
||||
for address, name in meta["analysis"]["library_functions"].items()
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class StatementModel(FrozenModel):
|
||||
...
|
||||
|
||||
|
||||
class AndStatement(StatementModel):
|
||||
type = "and"
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class OrStatement(StatementModel):
|
||||
type = "or"
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class NotStatement(StatementModel):
|
||||
type = "not"
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class SomeStatement(StatementModel):
|
||||
type = "some"
|
||||
description: Optional[str]
|
||||
count: int
|
||||
|
||||
|
||||
class OptionalStatement(StatementModel):
|
||||
type = "optional"
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class RangeStatement(StatementModel):
|
||||
type = "range"
|
||||
description: Optional[str]
|
||||
min: int
|
||||
max: int
|
||||
child: frz.Feature
|
||||
|
||||
|
||||
class SubscopeStatement(StatementModel):
|
||||
type = "subscope"
|
||||
description: Optional[str]
|
||||
scope = capa.rules.Scope
|
||||
|
||||
|
||||
Statement = Union[
|
||||
OptionalStatement,
|
||||
AndStatement,
|
||||
OrStatement,
|
||||
NotStatement,
|
||||
SomeStatement,
|
||||
RangeStatement,
|
||||
SubscopeStatement,
|
||||
]
|
||||
|
||||
|
||||
class StatementNode(FrozenModel):
|
||||
type = "statement"
|
||||
statement: Statement
|
||||
|
||||
|
||||
def statement_from_capa(node: capa.engine.Statement) -> Statement:
|
||||
if isinstance(node, capa.engine.And):
|
||||
return AndStatement(description=node.description)
|
||||
|
||||
elif isinstance(node, capa.engine.Or):
|
||||
return OrStatement(description=node.description)
|
||||
|
||||
elif isinstance(node, capa.engine.Not):
|
||||
return NotStatement(description=node.description)
|
||||
|
||||
elif isinstance(node, capa.engine.Some):
|
||||
if node.count == 0:
|
||||
return OptionalStatement(description=node.description)
|
||||
|
||||
else:
|
||||
return SomeStatement(
|
||||
description=node.description,
|
||||
count=node.count,
|
||||
)
|
||||
|
||||
elif isinstance(node, capa.engine.Range):
|
||||
return RangeStatement(
|
||||
description=node.description,
|
||||
min=node.min,
|
||||
max=node.max,
|
||||
child=frz.feature_from_capa(node.child),
|
||||
)
|
||||
|
||||
elif isinstance(node, capa.engine.Subscope):
|
||||
return SubscopeStatement(
|
||||
description=node.description,
|
||||
scope=capa.rules.Scope(node.scope),
|
||||
)
|
||||
|
||||
else:
|
||||
raise RuntimeError("unexpected match node type")
|
||||
raise NotImplementedError(f"statement_from_capa({type(node)}) not implemented")
|
||||
|
||||
|
||||
def convert_match_to_result_document(rules, capabilities, result):
|
||||
class FeatureNode(FrozenModel):
|
||||
type = "feature"
|
||||
feature: frz.Feature
|
||||
|
||||
|
||||
Node = Union[StatementNode, FeatureNode]
|
||||
|
||||
|
||||
def node_from_capa(node: Union[capa.engine.Statement, capa.engine.Feature]) -> Node:
|
||||
if isinstance(node, capa.engine.Statement):
|
||||
return StatementNode(statement=statement_from_capa(node))
|
||||
|
||||
elif isinstance(node, capa.engine.Feature):
|
||||
return FeatureNode(feature=frz.feature_from_capa(node))
|
||||
|
||||
else:
|
||||
assert_never(node)
|
||||
|
||||
|
||||
class Match(BaseModel):
|
||||
"""
|
||||
convert the given Result instance into a common, Python-native data structure.
|
||||
this will become part of the "result document" format that can be emitted to JSON.
|
||||
args:
|
||||
success: did the node match?
|
||||
node: the logic node or feature node.
|
||||
children: any children of the logic node. not relevent for features, can be empty.
|
||||
locations: where the feature matched. not relevant for logic nodes (except range), can be empty.
|
||||
captures: captured values from the string/regex feature, and the locations of those values.
|
||||
"""
|
||||
doc = {
|
||||
"success": bool(result.success),
|
||||
"node": convert_node_to_result_document(result.statement),
|
||||
"children": [convert_match_to_result_document(rules, capabilities, child) for child in result.children],
|
||||
}
|
||||
|
||||
# logic expression, like `and`, don't have locations - their children do.
|
||||
# so only add `locations` to feature nodes.
|
||||
if isinstance(result.statement, capa.features.common.Feature):
|
||||
if bool(result.success):
|
||||
doc["locations"] = result.locations
|
||||
elif isinstance(result.statement, capa.engine.Range):
|
||||
if bool(result.success):
|
||||
doc["locations"] = result.locations
|
||||
success: bool
|
||||
node: Node
|
||||
children: Tuple["Match", ...]
|
||||
locations: Tuple[frz.Address, ...]
|
||||
captures: Dict[str, Tuple[frz.Address, ...]]
|
||||
|
||||
# if we have a `match` statement, then we're referencing another rule or namespace.
|
||||
# this could an external rule (written by a human), or
|
||||
# rule generated to support a subscope (basic block, etc.)
|
||||
# we still want to include the matching logic in this tree.
|
||||
#
|
||||
# so, we need to lookup the other rule results
|
||||
# and then filter those down to the address used here.
|
||||
# finally, splice that logic into this tree.
|
||||
if (
|
||||
doc["node"]["type"] == "feature"
|
||||
and doc["node"]["feature"]["type"] == "match"
|
||||
# only add subtree on success,
|
||||
# because there won't be results for the other rule on failure.
|
||||
and doc["success"]
|
||||
):
|
||||
@classmethod
|
||||
def from_capa(
|
||||
cls,
|
||||
rules: RuleSet,
|
||||
capabilities: MatchResults,
|
||||
result: capa.engine.Result,
|
||||
) -> "Match":
|
||||
success = bool(result)
|
||||
|
||||
name = doc["node"]["feature"]["match"]
|
||||
node = node_from_capa(result.statement)
|
||||
children = [Match.from_capa(rules, capabilities, child) for child in result.children]
|
||||
|
||||
if name in rules:
|
||||
# this is a rule that we're matching
|
||||
#
|
||||
# pull matches from the referenced rule into our tree here.
|
||||
rule_name = doc["node"]["feature"]["match"]
|
||||
# logic expression, like `and`, don't have locations - their children do.
|
||||
# so only add `locations` to feature nodes.
|
||||
locations = []
|
||||
if isinstance(node, FeatureNode) and success:
|
||||
locations = list(map(frz.Address.from_capa, result.locations))
|
||||
elif isinstance(node, StatementNode) and isinstance(node.statement, RangeStatement) and success:
|
||||
locations = list(map(frz.Address.from_capa, result.locations))
|
||||
|
||||
captures = {}
|
||||
if isinstance(result.statement, (capa.features.common._MatchedSubstring, capa.features.common._MatchedRegex)):
|
||||
captures = {
|
||||
capture: list(map(frz.Address.from_capa, locs)) for capture, locs in result.statement.matches.items()
|
||||
}
|
||||
|
||||
# if we have a `match` statement, then we're referencing another rule or namespace.
|
||||
# this could an external rule (written by a human), or
|
||||
# rule generated to support a subscope (basic block, etc.)
|
||||
# we still want to include the matching logic in this tree.
|
||||
#
|
||||
# so, we need to lookup the other rule results
|
||||
# and then filter those down to the address used here.
|
||||
# finally, splice that logic into this tree.
|
||||
if (
|
||||
isinstance(node, FeatureNode)
|
||||
and isinstance(node.feature, frz.features.MatchFeature)
|
||||
# only add subtree on success,
|
||||
# because there won't be results for the other rule on failure.
|
||||
and success
|
||||
):
|
||||
name = node.feature.match
|
||||
|
||||
if name in rules:
|
||||
# this is a rule that we're matching
|
||||
#
|
||||
# pull matches from the referenced rule into our tree here.
|
||||
rule_name = name
|
||||
rule = rules[rule_name]
|
||||
rule_matches = {address: result for (address, result) in capabilities[rule_name]}
|
||||
|
||||
if rule.is_subscope_rule():
|
||||
# for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
|
||||
#
|
||||
# e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
|
||||
#
|
||||
# note! replace `node`
|
||||
node = StatementNode(
|
||||
statement=SubscopeStatement(
|
||||
scope=rule.meta["scope"],
|
||||
)
|
||||
)
|
||||
|
||||
for location in result.locations:
|
||||
children.append(Match.from_capa(rules, capabilities, rule_matches[location]))
|
||||
else:
|
||||
# this is a namespace that we're matching
|
||||
#
|
||||
# check for all rules in the namespace,
|
||||
# seeing if they matched.
|
||||
# if so, pull their matches into our match tree here.
|
||||
ns_name = name
|
||||
ns_rules = rules.rules_by_namespace[ns_name]
|
||||
|
||||
for rule in ns_rules:
|
||||
if rule.name in capabilities:
|
||||
# the rule matched, so splice results into our tree here.
|
||||
#
|
||||
# note, there's a shortcoming in our result document schema here:
|
||||
# we lose the name of the rule that matched in a namespace.
|
||||
# for example, if we have a statement: `match: runtime/dotnet`
|
||||
# and we get matches, we can say the following:
|
||||
#
|
||||
# match: runtime/dotnet @ 0x0
|
||||
# or:
|
||||
# import: mscoree._CorExeMain @ 0x402000
|
||||
#
|
||||
# however, we lose the fact that it was rule
|
||||
# "compiled to the .NET platform"
|
||||
# that contained this logic and did the match.
|
||||
#
|
||||
# we could introduce an intermediate node here.
|
||||
# this would be a breaking change and require updates to the renderers.
|
||||
# in the meantime, the above might be sufficient.
|
||||
rule_matches = {address: result for (address, result) in capabilities[rule.name]}
|
||||
for location in result.locations:
|
||||
# doc[locations] contains all matches for the given namespace.
|
||||
# for example, the feature might be `match: anti-analysis/packer`
|
||||
# which matches against "generic unpacker" and "UPX".
|
||||
# in this case, doc[locations] contains locations for *both* of thse.
|
||||
#
|
||||
# rule_matches contains the matches for the specific rule.
|
||||
# this is a subset of doc[locations].
|
||||
#
|
||||
# so, grab only the locations for current rule.
|
||||
if location in rule_matches:
|
||||
children.append(Match.from_capa(rules, capabilities, rule_matches[location]))
|
||||
|
||||
return cls(
|
||||
success=success,
|
||||
node=node,
|
||||
children=children,
|
||||
locations=locations,
|
||||
captures=captures,
|
||||
)
|
||||
|
||||
|
||||
def parse_parts_id(s: str):
|
||||
id = ""
|
||||
parts = s.split("::")
|
||||
if len(parts) > 0:
|
||||
last = parts.pop()
|
||||
last, _, id = last.rpartition(" ")
|
||||
id = id.lstrip("[").rstrip("]")
|
||||
parts.append(last)
|
||||
return parts, id
|
||||
|
||||
|
||||
class AttackSpec(FrozenModel):
|
||||
"""
|
||||
given an ATT&CK spec like: `Tactic::Technique::Subtechnique [Identifier]`
|
||||
e.g., `Execution::Command and Scripting Interpreter::Python [T1059.006]`
|
||||
|
||||
args:
|
||||
tactic: like `Tactic` above, perhaps "Execution"
|
||||
technique: like `Technique` above, perhaps "Command and Scripting Interpreter"
|
||||
subtechnique: like `Subtechnique` above, perhaps "Python"
|
||||
id: like `Identifier` above, perhaps "T1059.006"
|
||||
"""
|
||||
|
||||
parts: Tuple[str, ...]
|
||||
tactic: str
|
||||
technique: str
|
||||
subtechnique: str
|
||||
id: str
|
||||
|
||||
@classmethod
|
||||
def from_str(cls, s) -> "AttackSpec":
|
||||
tactic = ""
|
||||
technique = ""
|
||||
subtechnique = ""
|
||||
parts, id = parse_parts_id(s)
|
||||
if len(parts) > 0:
|
||||
tactic = parts[0]
|
||||
if len(parts) > 1:
|
||||
technique = parts[1]
|
||||
if len(parts) > 2:
|
||||
subtechnique = parts[2]
|
||||
|
||||
return cls(
|
||||
parts=parts,
|
||||
tactic=tactic,
|
||||
technique=technique,
|
||||
subtechnique=subtechnique,
|
||||
id=id,
|
||||
)
|
||||
|
||||
|
||||
class MBCSpec(FrozenModel):
|
||||
"""
|
||||
given an MBC spec like: `Objective::Behavior::Method [Identifier]`
|
||||
e.g., `Collection::Input Capture::Mouse Events [E1056.m01]`
|
||||
|
||||
args:
|
||||
objective: like `Objective` above, perhaps "Collection"
|
||||
behavior: like `Behavior` above, perhaps "Input Capture"
|
||||
method: like `Method` above, perhaps "Mouse Events"
|
||||
id: like `Identifier` above, perhaps "E1056.m01"
|
||||
"""
|
||||
|
||||
parts: Tuple[str, ...]
|
||||
objective: str
|
||||
behavior: str
|
||||
method: str
|
||||
id: str
|
||||
|
||||
@classmethod
|
||||
def from_str(cls, s) -> "MBCSpec":
|
||||
objective = ""
|
||||
behavior = ""
|
||||
method = ""
|
||||
parts, id = parse_parts_id(s)
|
||||
if len(parts) > 0:
|
||||
objective = parts[0]
|
||||
if len(parts) > 1:
|
||||
behavior = parts[1]
|
||||
if len(parts) > 2:
|
||||
method = parts[2]
|
||||
|
||||
return cls(
|
||||
parts=parts,
|
||||
objective=objective,
|
||||
behavior=behavior,
|
||||
method=method,
|
||||
id=id,
|
||||
)
|
||||
|
||||
|
||||
class MaecMetadata(FrozenModel):
|
||||
analysis_conclusion: Optional[str] = Field(None, alias="analysis-conclusion")
|
||||
analysis_conclusion_ov: Optional[str] = Field(None, alias="analysis-conclusion-ov")
|
||||
malware_family: Optional[str] = Field(None, alias="malware-family")
|
||||
malware_category: Optional[str] = Field(None, alias="malware-category")
|
||||
malware_category_ov: Optional[str] = Field(None, alias="malware-category-ov")
|
||||
|
||||
class Config:
|
||||
frozen = True
|
||||
allow_population_by_field_name = True
|
||||
|
||||
|
||||
class RuleMetadata(FrozenModel):
|
||||
name: str
|
||||
namespace: Optional[str]
|
||||
authors: Tuple[str, ...]
|
||||
scope: capa.rules.Scope
|
||||
attack: Tuple[AttackSpec, ...] = Field(alias="att&ck")
|
||||
mbc: Tuple[MBCSpec, ...]
|
||||
references: Tuple[str, ...]
|
||||
examples: Tuple[str, ...]
|
||||
description: str
|
||||
|
||||
lib: bool = Field(False, alias="lib")
|
||||
is_subscope_rule: bool = Field(False, alias="capa/subscope")
|
||||
maec: MaecMetadata
|
||||
|
||||
@classmethod
|
||||
def from_capa(cls, rule: capa.rules.Rule) -> "RuleMetadata":
|
||||
return cls(
|
||||
name=rule.meta.get("name"),
|
||||
namespace=rule.meta.get("namespace"),
|
||||
authors=rule.meta.get("authors"),
|
||||
scope=capa.rules.Scope(rule.meta.get("scope")),
|
||||
attack=list(map(AttackSpec.from_str, rule.meta.get("att&ck", []))),
|
||||
mbc=list(map(MBCSpec.from_str, rule.meta.get("mbc", []))),
|
||||
references=rule.meta.get("references", []),
|
||||
examples=rule.meta.get("examples", []),
|
||||
description=rule.meta.get("description", ""),
|
||||
lib=rule.meta.get("lib", False),
|
||||
capa_subscope=rule.meta.get("capa/subscope", False),
|
||||
maec=MaecMetadata(
|
||||
analysis_conclusion=rule.meta.get("maec/analysis-conclusion"),
|
||||
analysis_conclusion_ov=rule.meta.get("maec/analysis-conclusion-ov"),
|
||||
malware_family=rule.meta.get("maec/malware-family"),
|
||||
malware_category=rule.meta.get("maec/malware-category"),
|
||||
malware_category_ov=rule.meta.get("maec/malware-category-ov"),
|
||||
),
|
||||
)
|
||||
|
||||
class Config:
|
||||
frozen = True
|
||||
allow_population_by_field_name = True
|
||||
|
||||
|
||||
class RuleMatches(BaseModel):
|
||||
"""
|
||||
args:
|
||||
meta: the metadata from the rule
|
||||
source: the raw rule text
|
||||
"""
|
||||
|
||||
meta: RuleMetadata
|
||||
source: str
|
||||
matches: Tuple[Tuple[frz.Address, Match], ...]
|
||||
|
||||
|
||||
class ResultDocument(BaseModel):
|
||||
meta: Metadata
|
||||
rules: Dict[str, RuleMatches]
|
||||
|
||||
@classmethod
|
||||
def from_capa(cls, meta, rules: RuleSet, capabilities: MatchResults) -> "ResultDocument":
|
||||
rule_matches: Dict[str, RuleMatches] = {}
|
||||
for rule_name, matches in capabilities.items():
|
||||
rule = rules[rule_name]
|
||||
rule_matches = {address: result for (address, result) in capabilities[rule_name]}
|
||||
|
||||
if rule.meta.get("capa/subscope-rule"):
|
||||
# for a subscope rule, fixup the node to be a scope node, rather than a match feature node.
|
||||
#
|
||||
# e.g. `contain loop/30c4c78e29bf4d54894fc74f664c62e8` -> `basic block`
|
||||
scope = rule.meta["scope"]
|
||||
doc["node"] = {
|
||||
"type": "statement",
|
||||
"statement": {
|
||||
"type": "subscope",
|
||||
"subscope": scope,
|
||||
},
|
||||
}
|
||||
continue
|
||||
|
||||
for location in doc["locations"]:
|
||||
doc["children"].append(convert_match_to_result_document(rules, capabilities, rule_matches[location]))
|
||||
else:
|
||||
# this is a namespace that we're matching
|
||||
#
|
||||
# check for all rules in the namespace,
|
||||
# seeing if they matched.
|
||||
# if so, pull their matches into our match tree here.
|
||||
ns_name = doc["node"]["feature"]["match"]
|
||||
ns_rules = rules.rules_by_namespace[ns_name]
|
||||
rule_matches[rule_name] = RuleMatches(
|
||||
meta=RuleMetadata.from_capa(rule),
|
||||
source=rule.definition,
|
||||
matches=[
|
||||
(frz.Address.from_capa(addr), Match.from_capa(rules, capabilities, match))
|
||||
for addr, match in matches
|
||||
],
|
||||
)
|
||||
|
||||
for rule in ns_rules:
|
||||
if rule.name in capabilities:
|
||||
# the rule matched, so splice results into our tree here.
|
||||
#
|
||||
# note, there's a shortcoming in our result document schema here:
|
||||
# we lose the name of the rule that matched in a namespace.
|
||||
# for example, if we have a statement: `match: runtime/dotnet`
|
||||
# and we get matches, we can say the following:
|
||||
#
|
||||
# match: runtime/dotnet @ 0x0
|
||||
# or:
|
||||
# import: mscoree._CorExeMain @ 0x402000
|
||||
#
|
||||
# however, we lose the fact that it was rule
|
||||
# "compiled to the .NET platform"
|
||||
# that contained this logic and did the match.
|
||||
#
|
||||
# we could introduce an intermediate node here.
|
||||
# this would be a breaking change and require updates to the renderers.
|
||||
# in the meantime, the above might be sufficient.
|
||||
rule_matches = {address: result for (address, result) in capabilities[rule.name]}
|
||||
for location in doc["locations"]:
|
||||
# doc[locations] contains all matches for the given namespace.
|
||||
# for example, the feature might be `match: anti-analysis/packer`
|
||||
# which matches against "generic unpacker" and "UPX".
|
||||
# in this case, doc[locations] contains locations for *both* of thse.
|
||||
#
|
||||
# rule_matches contains the matches for the specific rule.
|
||||
# this is a subset of doc[locations].
|
||||
#
|
||||
# so, grab only the locations for current rule.
|
||||
if location in rule_matches:
|
||||
doc["children"].append(
|
||||
convert_match_to_result_document(rules, capabilities, rule_matches[location])
|
||||
)
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
def convert_meta_to_result_document(meta):
|
||||
# make a copy so that we don't modify the given parameter
|
||||
meta = copy.deepcopy(meta)
|
||||
|
||||
attacks = meta.get("att&ck", [])
|
||||
meta["att&ck"] = [parse_canonical_attack(attack) for attack in attacks]
|
||||
mbcs = meta.get("mbc", [])
|
||||
meta["mbc"] = [parse_canonical_mbc(mbc) for mbc in mbcs]
|
||||
return meta
|
||||
|
||||
|
||||
def parse_canonical_attack(attack: str):
|
||||
"""
|
||||
parse capa's canonical ATT&CK representation: `Tactic::Technique::Subtechnique [Identifier]`
|
||||
"""
|
||||
tactic = ""
|
||||
technique = ""
|
||||
subtechnique = ""
|
||||
parts, id = capa.render.utils.parse_parts_id(attack)
|
||||
if len(parts) > 0:
|
||||
tactic = parts[0]
|
||||
if len(parts) > 1:
|
||||
technique = parts[1]
|
||||
if len(parts) > 2:
|
||||
subtechnique = parts[2]
|
||||
|
||||
return {
|
||||
"parts": parts,
|
||||
"id": id,
|
||||
"tactic": tactic,
|
||||
"technique": technique,
|
||||
"subtechnique": subtechnique,
|
||||
}
|
||||
|
||||
|
||||
def parse_canonical_mbc(mbc: str):
|
||||
"""
|
||||
parse capa's canonical MBC representation: `Objective::Behavior::Method [Identifier]`
|
||||
"""
|
||||
objective = ""
|
||||
behavior = ""
|
||||
method = ""
|
||||
parts, id = capa.render.utils.parse_parts_id(mbc)
|
||||
if len(parts) > 0:
|
||||
objective = parts[0]
|
||||
if len(parts) > 1:
|
||||
behavior = parts[1]
|
||||
if len(parts) > 2:
|
||||
method = parts[2]
|
||||
|
||||
return {
|
||||
"parts": parts,
|
||||
"id": id,
|
||||
"objective": objective,
|
||||
"behavior": behavior,
|
||||
"method": method,
|
||||
}
|
||||
|
||||
|
||||
def convert_capabilities_to_result_document(meta, rules: RuleSet, capabilities: MatchResults):
|
||||
"""
|
||||
convert the given rule set and capabilities result to a common, Python-native data structure.
|
||||
this format can be directly emitted to JSON, or passed to the other `capa.render.*.render()` routines
|
||||
to render as text.
|
||||
|
||||
see examples of substructures in above routines.
|
||||
|
||||
schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"meta": {...},
|
||||
"rules: {
|
||||
$rule-name: {
|
||||
"meta": {...copied from rule.meta...},
|
||||
"matches: {
|
||||
$address: {...match details...},
|
||||
...
|
||||
}
|
||||
},
|
||||
...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Args:
|
||||
meta (Dict[str, Any]):
|
||||
rules (RuleSet):
|
||||
capabilities (Dict[str, List[Tuple[int, Result]]]):
|
||||
"""
|
||||
doc = {
|
||||
"meta": meta,
|
||||
"rules": {},
|
||||
}
|
||||
|
||||
for rule_name, matches in capabilities.items():
|
||||
rule = rules[rule_name]
|
||||
|
||||
if rule.meta.get("capa/subscope-rule"):
|
||||
continue
|
||||
|
||||
rule_meta = convert_meta_to_result_document(rule.meta)
|
||||
|
||||
doc["rules"][rule_name] = {
|
||||
"meta": rule_meta,
|
||||
"source": rule.definition,
|
||||
"matches": {
|
||||
addr: convert_match_to_result_document(rules, capabilities, match) for (addr, match) in matches
|
||||
},
|
||||
}
|
||||
|
||||
return doc
|
||||
return ResultDocument(meta=Metadata.from_capa(meta), rules=rule_matches)
|
||||
|
||||
+14
-24
@@ -7,9 +7,12 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import io
|
||||
from typing import Union, Iterator
|
||||
|
||||
import termcolor
|
||||
|
||||
import capa.render.result_document as rd
|
||||
|
||||
|
||||
def bold(s: str) -> str:
|
||||
"""draw attention to the given string"""
|
||||
@@ -29,42 +32,29 @@ def hex(n: int) -> str:
|
||||
return "0x%X" % n
|
||||
|
||||
|
||||
def parse_parts_id(s: str):
|
||||
id = ""
|
||||
parts = s.split("::")
|
||||
if len(parts) > 0:
|
||||
last = parts.pop()
|
||||
last, _, id = last.rpartition(" ")
|
||||
id = id.lstrip("[").rstrip("]")
|
||||
parts.append(last)
|
||||
return parts, id
|
||||
|
||||
|
||||
def format_parts_id(data):
|
||||
def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]):
|
||||
"""
|
||||
format canonical representation of ATT&CK/MBC parts and ID
|
||||
"""
|
||||
return "%s [%s]" % ("::".join(data["parts"]), data["id"])
|
||||
return "%s [%s]" % ("::".join(data.parts), data.id)
|
||||
|
||||
|
||||
def capability_rules(doc):
|
||||
def capability_rules(doc: rd.ResultDocument) -> Iterator[rd.RuleMatches]:
|
||||
"""enumerate the rules in (namespace, name) order that are 'capability' rules (not lib/subscope/disposition/etc)."""
|
||||
for (_, _, rule) in sorted(
|
||||
map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc["rules"].values())
|
||||
):
|
||||
if rule["meta"].get("lib"):
|
||||
for (_, _, rule) in sorted(map(lambda rule: (rule.meta.namespace or "", rule.meta.name, rule), doc.rules.values())):
|
||||
if rule.meta.lib:
|
||||
continue
|
||||
if rule["meta"].get("capa/subscope"):
|
||||
if rule.meta.is_subscope_rule:
|
||||
continue
|
||||
if rule["meta"].get("maec/analysis-conclusion"):
|
||||
if rule.meta.maec.analysis_conclusion:
|
||||
continue
|
||||
if rule["meta"].get("maec/analysis-conclusion-ov"):
|
||||
if rule.meta.maec.analysis_conclusion_ov:
|
||||
continue
|
||||
if rule["meta"].get("maec/malware-family"):
|
||||
if rule.meta.maec.malware_family:
|
||||
continue
|
||||
if rule["meta"].get("maec/malware-category"):
|
||||
if rule.meta.maec.malware_category:
|
||||
continue
|
||||
if rule["meta"].get("maec/malware-category-ov"):
|
||||
if rule.meta.maec.malware_category_ov:
|
||||
continue
|
||||
|
||||
yield rule
|
||||
|
||||
+59
-29
@@ -22,16 +22,46 @@ Unless required by applicable law or agreed to in writing, software distributed
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
from tarfile import FIFOTYPE
|
||||
|
||||
import tabulate
|
||||
import dnfile.mdtable
|
||||
import dncil.clr.token
|
||||
|
||||
import capa.rules
|
||||
import capa.render.utils as rutils
|
||||
import capa.features.freeze as frz
|
||||
import capa.render.result_document
|
||||
import capa.render.result_document as rd
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
|
||||
|
||||
def render_meta(ostream, doc):
|
||||
def format_address(address: frz.Address) -> str:
|
||||
if address.type == frz.AddressType.ABSOLUTE:
|
||||
return rutils.hex(address.value)
|
||||
elif address.type == frz.AddressType.RELATIVE:
|
||||
return f"base address+{rutils.hex(address.value)}"
|
||||
elif address.type == frz.AddressType.FILE:
|
||||
return f"file+{rutils.hex(address.value)}"
|
||||
elif address.type == frz.AddressType.DN_TOKEN:
|
||||
token = dncil.clr.token.Token(address.value)
|
||||
name = dnfile.mdtable.ClrMetaDataTableFactory._table_number_map[token.table].name
|
||||
rid = token.rid
|
||||
return f"{name}[{rid}]"
|
||||
elif address.type == frz.AddressType.DN_TOKEN_OFFSET:
|
||||
token, offset = address.value
|
||||
token = dncil.clr.token.Token(token)
|
||||
name = dnfile.mdtable.ClrMetaDataTableFactory._table_number_map[token.table].name
|
||||
rid = token.rid
|
||||
return f"{name}[{rid}]+{rutils.hex(offset)}"
|
||||
elif address.type == frz.AddressType.NO_ADDRESS:
|
||||
return "global"
|
||||
else:
|
||||
raise ValueError("unexpected address type")
|
||||
|
||||
|
||||
def render_meta(ostream, doc: rd.ResultDocument):
|
||||
"""
|
||||
like:
|
||||
|
||||
@@ -51,31 +81,31 @@ def render_meta(ostream, doc):
|
||||
total feature count 1918
|
||||
"""
|
||||
rows = [
|
||||
("md5", doc["meta"]["sample"]["md5"]),
|
||||
("sha1", doc["meta"]["sample"]["sha1"]),
|
||||
("sha256", doc["meta"]["sample"]["sha256"]),
|
||||
("path", doc["meta"]["sample"]["path"]),
|
||||
("timestamp", doc["meta"]["timestamp"]),
|
||||
("capa version", doc["meta"]["version"]),
|
||||
("os", doc["meta"]["analysis"]["os"]),
|
||||
("format", doc["meta"]["analysis"]["format"]),
|
||||
("arch", doc["meta"]["analysis"]["arch"]),
|
||||
("extractor", doc["meta"]["analysis"]["extractor"]),
|
||||
("base address", hex(doc["meta"]["analysis"]["base_address"])),
|
||||
("rules", "\n".join(doc["meta"]["analysis"]["rules"])),
|
||||
("function count", len(doc["meta"]["analysis"]["feature_counts"]["functions"])),
|
||||
("library function count", len(doc["meta"]["analysis"]["library_functions"])),
|
||||
("md5", doc.meta.sample.md5),
|
||||
("sha1", doc.meta.sample.sha1),
|
||||
("sha256", doc.meta.sample.sha256),
|
||||
("path", doc.meta.sample.path),
|
||||
("timestamp", doc.meta.timestamp),
|
||||
("capa version", doc.meta.version),
|
||||
("os", doc.meta.analysis.os),
|
||||
("format", doc.meta.analysis.format),
|
||||
("arch", doc.meta.analysis.arch),
|
||||
("extractor", doc.meta.analysis.extractor),
|
||||
("base address", format_address(doc.meta.analysis.base_address)),
|
||||
("rules", "\n".join(doc.meta.analysis.rules)),
|
||||
("function count", len(doc.meta.analysis.feature_counts.functions)),
|
||||
("library function count", len(doc.meta.analysis.library_functions)),
|
||||
(
|
||||
"total feature count",
|
||||
doc["meta"]["analysis"]["feature_counts"]["file"]
|
||||
+ sum(doc["meta"]["analysis"]["feature_counts"]["functions"].values()),
|
||||
doc.meta.analysis.feature_counts.file
|
||||
+ sum(map(lambda f: f.count, doc.meta.analysis.feature_counts.functions)),
|
||||
),
|
||||
]
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
|
||||
|
||||
def render_rules(ostream, doc):
|
||||
def render_rules(ostream, doc: rd.ResultDocument):
|
||||
"""
|
||||
like:
|
||||
|
||||
@@ -88,28 +118,29 @@ def render_rules(ostream, doc):
|
||||
"""
|
||||
had_match = False
|
||||
for rule in rutils.capability_rules(doc):
|
||||
count = len(rule["matches"])
|
||||
count = len(rule.matches)
|
||||
if count == 1:
|
||||
capability = rutils.bold(rule["meta"]["name"])
|
||||
capability = rutils.bold(rule.meta.name)
|
||||
else:
|
||||
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
|
||||
capability = "%s (%d matches)" % (rutils.bold(rule.meta.name), count)
|
||||
|
||||
ostream.writeln(capability)
|
||||
had_match = True
|
||||
|
||||
rows = []
|
||||
for key in ("namespace", "description", "scope"):
|
||||
if key == "name" or key not in rule["meta"]:
|
||||
v = getattr(rule.meta, key)
|
||||
if not v:
|
||||
continue
|
||||
|
||||
v = rule["meta"][key]
|
||||
if isinstance(v, list) and len(v) == 1:
|
||||
v = v[0]
|
||||
|
||||
rows.append((key, v))
|
||||
|
||||
if rule["meta"]["scope"] != capa.rules.FILE_SCOPE:
|
||||
locations = doc["rules"][rule["meta"]["name"]]["matches"].keys()
|
||||
rows.append(("matches", "\n".join(map(rutils.hex, locations))))
|
||||
if rule.meta.scope != capa.rules.FILE_SCOPE:
|
||||
locations = list(map(lambda m: m[0], doc.rules[rule.meta.name].matches))
|
||||
rows.append(("matches", "\n".join(map(format_address, locations))))
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
ostream.write("\n")
|
||||
@@ -118,7 +149,7 @@ def render_rules(ostream, doc):
|
||||
ostream.writeln(rutils.bold("no capabilities found"))
|
||||
|
||||
|
||||
def render_verbose(doc):
|
||||
def render_verbose(doc: rd.ResultDocument):
|
||||
ostream = rutils.StringIO()
|
||||
|
||||
render_meta(ostream, doc)
|
||||
@@ -131,5 +162,4 @@ def render_verbose(doc):
|
||||
|
||||
|
||||
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
|
||||
doc = capa.render.result_document.convert_capabilities_to_result_document(meta, rules, capabilities)
|
||||
return render_verbose(doc)
|
||||
return render_verbose(rd.ResultDocument.from_capa(meta, rules, capabilities))
|
||||
|
||||
+171
-121
@@ -6,102 +6,135 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from typing import Dict, List, Iterable
|
||||
|
||||
import tabulate
|
||||
|
||||
import capa.rules
|
||||
import capa.render.utils as rutils
|
||||
import capa.render.verbose
|
||||
import capa.features.common
|
||||
import capa.render.result_document
|
||||
import capa.features.freeze as frz
|
||||
import capa.features.address
|
||||
import capa.render.result_document as rd
|
||||
import capa.features.freeze.features as frzf
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
|
||||
|
||||
def render_locations(ostream, match):
|
||||
def render_locations(ostream, locations: Iterable[frz.Address]):
|
||||
import capa.render.verbose as v
|
||||
|
||||
# its possible to have an empty locations array here,
|
||||
# such as when we're in MODE_FAILURE and showing the logic
|
||||
# under a `not` statement (which will have no matched locations).
|
||||
locations = list(sorted(match.get("locations", [])))
|
||||
locations = list(sorted(locations))
|
||||
|
||||
if len(locations) == 0:
|
||||
return
|
||||
|
||||
ostream.write(" @ ")
|
||||
|
||||
if len(locations) == 1:
|
||||
ostream.write(" @ ")
|
||||
ostream.write(rutils.hex(locations[0]))
|
||||
ostream.write(v.format_address(locations[0]))
|
||||
|
||||
elif len(locations) > 4:
|
||||
# don't display too many locations, because it becomes very noisy.
|
||||
# probably only the first handful of locations will be useful for inspection.
|
||||
ostream.write(", ".join(map(v.format_address, locations[0:4])))
|
||||
ostream.write(", and %d more..." % (len(locations) - 4))
|
||||
|
||||
elif len(locations) > 1:
|
||||
ostream.write(" @ ")
|
||||
if len(locations) > 4:
|
||||
# don't display too many locations, because it becomes very noisy.
|
||||
# probably only the first handful of locations will be useful for inspection.
|
||||
ostream.write(", ".join(map(rutils.hex, locations[0:4])))
|
||||
ostream.write(", and %d more..." % (len(locations) - 4))
|
||||
else:
|
||||
ostream.write(", ".join(map(rutils.hex, locations)))
|
||||
ostream.write(", ".join(map(v.format_address, locations)))
|
||||
|
||||
else:
|
||||
raise RuntimeError("unreachable")
|
||||
|
||||
|
||||
def render_statement(ostream, match, statement, indent=0):
|
||||
def render_statement(ostream, match: rd.Match, statement: rd.Statement, indent=0):
|
||||
ostream.write(" " * indent)
|
||||
if statement["type"] in ("and", "or", "optional", "not", "subscope"):
|
||||
if statement["type"] == "subscope":
|
||||
# emit `basic block:`
|
||||
# rather than `subscope:`
|
||||
ostream.write(statement["subscope"])
|
||||
else:
|
||||
# emit `and:`
|
||||
ostream.write(statement["type"])
|
||||
|
||||
if isinstance(statement, rd.SubscopeStatement):
|
||||
# emit `basic block:`
|
||||
# rather than `subscope:`
|
||||
ostream.write(statement.scope)
|
||||
|
||||
ostream.write(":")
|
||||
if statement.get("description"):
|
||||
ostream.write(" = %s" % statement["description"])
|
||||
if statement.description:
|
||||
ostream.write(" = %s" % statement.description)
|
||||
ostream.writeln("")
|
||||
elif statement["type"] == "some":
|
||||
ostream.write("%d or more:" % (statement["count"]))
|
||||
if statement.get("description"):
|
||||
ostream.write(" = %s" % statement["description"])
|
||||
|
||||
elif isinstance(statement, (rd.AndStatement, rd.OrStatement, rd.OptionalStatement, rd.NotStatement)):
|
||||
# emit `and:` `or:` `optional:` `not:`
|
||||
ostream.write(statement.type)
|
||||
|
||||
ostream.write(":")
|
||||
if statement.description:
|
||||
ostream.write(" = %s" % statement.description)
|
||||
ostream.writeln("")
|
||||
elif statement["type"] == "range":
|
||||
|
||||
elif isinstance(statement, rd.SomeStatement):
|
||||
ostream.write("%d or more:" % (statement.count))
|
||||
|
||||
if statement.description:
|
||||
ostream.write(" = %s" % statement.description)
|
||||
ostream.writeln("")
|
||||
|
||||
elif isinstance(statement, rd.RangeStatement):
|
||||
# `range` is a weird node, its almost a hybrid of statement+feature.
|
||||
# it is a specific feature repeated multiple times.
|
||||
# there's no additional logic in the feature part, just the existence of a feature.
|
||||
# so, we have to inline some of the feature rendering here.
|
||||
|
||||
child = statement["child"]
|
||||
child = statement.child
|
||||
value = getattr(child, child.type)
|
||||
|
||||
if value:
|
||||
if isinstance(child, frzf.StringFeature):
|
||||
value = '"%s"' % capa.features.common.escape_string(value)
|
||||
|
||||
if child[child["type"]]:
|
||||
if child["type"] == "string":
|
||||
value = '"%s"' % capa.features.common.escape_string(child[child["type"]])
|
||||
else:
|
||||
value = child[child["type"]]
|
||||
value = rutils.bold2(value)
|
||||
if child.get("description"):
|
||||
ostream.write("count(%s(%s = %s)): " % (child["type"], value, child["description"]))
|
||||
|
||||
if child.description:
|
||||
ostream.write("count(%s(%s = %s)): " % (child.type, value, child.description))
|
||||
else:
|
||||
ostream.write("count(%s(%s)): " % (child["type"], value))
|
||||
ostream.write("count(%s(%s)): " % (child.type, value))
|
||||
else:
|
||||
ostream.write("count(%s): " % child["type"])
|
||||
ostream.write("count(%s): " % child.type)
|
||||
|
||||
if statement["max"] == statement["min"]:
|
||||
ostream.write("%d" % (statement["min"]))
|
||||
elif statement["min"] == 0:
|
||||
ostream.write("%d or fewer" % (statement["max"]))
|
||||
elif statement["max"] == (1 << 64 - 1):
|
||||
ostream.write("%d or more" % (statement["min"]))
|
||||
if statement.max == statement.min:
|
||||
ostream.write("%d" % (statement.min))
|
||||
elif statement.min == 0:
|
||||
ostream.write("%d or fewer" % (statement.max))
|
||||
elif statement.max == (1 << 64 - 1):
|
||||
ostream.write("%d or more" % (statement.min))
|
||||
else:
|
||||
ostream.write("between %d and %d" % (statement["min"], statement["max"]))
|
||||
ostream.write("between %d and %d" % (statement.min, statement.max))
|
||||
|
||||
if statement.get("description"):
|
||||
ostream.write(" = %s" % statement["description"])
|
||||
render_locations(ostream, match)
|
||||
if statement.description:
|
||||
ostream.write(" = %s" % statement.description)
|
||||
render_locations(ostream, match.locations)
|
||||
ostream.writeln("")
|
||||
|
||||
else:
|
||||
raise RuntimeError("unexpected match statement type: " + str(statement))
|
||||
|
||||
|
||||
def render_string_value(s):
|
||||
def render_string_value(s: str) -> str:
|
||||
return '"%s"' % capa.features.common.escape_string(s)
|
||||
|
||||
|
||||
def render_feature(ostream, match, feature, indent=0):
|
||||
def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0):
|
||||
ostream.write(" " * indent)
|
||||
|
||||
key = feature["type"]
|
||||
value = feature[feature["type"]]
|
||||
key = feature.type
|
||||
if isinstance(feature, frzf.ImportFeature):
|
||||
# fixup access to Python reserved name
|
||||
value = feature.import_
|
||||
if isinstance(feature, frzf.ClassFeature):
|
||||
value = feature.class_
|
||||
else:
|
||||
value = getattr(feature, key)
|
||||
|
||||
if key not in ("regex", "substring"):
|
||||
# like:
|
||||
@@ -115,12 +148,12 @@ def render_feature(ostream, match, feature, indent=0):
|
||||
if value:
|
||||
ostream.write(rutils.bold2(value))
|
||||
|
||||
if "description" in feature:
|
||||
if feature.description:
|
||||
ostream.write(capa.rules.DESCRIPTION_SEPARATOR)
|
||||
ostream.write(feature["description"])
|
||||
ostream.write(feature.description)
|
||||
|
||||
if key not in ("os", "arch"):
|
||||
render_locations(ostream, match)
|
||||
render_locations(ostream, match.locations)
|
||||
ostream.write("\n")
|
||||
else:
|
||||
# like:
|
||||
@@ -132,19 +165,19 @@ def render_feature(ostream, match, feature, indent=0):
|
||||
ostream.write(value)
|
||||
ostream.write("\n")
|
||||
|
||||
for match, locations in sorted(feature["matches"].items(), key=lambda p: p[0]):
|
||||
for capture, locations in sorted(match.captures.items()):
|
||||
ostream.write(" " * (indent + 1))
|
||||
ostream.write("- ")
|
||||
ostream.write(rutils.bold2(render_string_value(match)))
|
||||
render_locations(ostream, {"locations": locations})
|
||||
ostream.write(rutils.bold2(render_string_value(capture)))
|
||||
render_locations(ostream, locations)
|
||||
ostream.write("\n")
|
||||
|
||||
|
||||
def render_node(ostream, match, node, indent=0):
|
||||
if node["type"] == "statement":
|
||||
render_statement(ostream, match, node["statement"], indent=indent)
|
||||
elif node["type"] == "feature":
|
||||
render_feature(ostream, match, node["feature"], indent=indent)
|
||||
def render_node(ostream, match: rd.Match, node: rd.Node, indent=0):
|
||||
if isinstance(node, rd.StatementNode):
|
||||
render_statement(ostream, match, node.statement, indent=indent)
|
||||
elif isinstance(node, rd.FeatureNode):
|
||||
render_feature(ostream, match, node.feature, indent=indent)
|
||||
else:
|
||||
raise RuntimeError("unexpected node type: " + str(node))
|
||||
|
||||
@@ -157,42 +190,45 @@ MODE_SUCCESS = "success"
|
||||
MODE_FAILURE = "failure"
|
||||
|
||||
|
||||
def render_match(ostream, match, indent=0, mode=MODE_SUCCESS):
|
||||
def render_match(ostream, match: rd.Match, indent=0, mode=MODE_SUCCESS):
|
||||
child_mode = mode
|
||||
if mode == MODE_SUCCESS:
|
||||
# display only nodes that evaluated successfully.
|
||||
if not match["success"]:
|
||||
if not match.success:
|
||||
return
|
||||
|
||||
# optional statement with no successful children is empty
|
||||
if match["node"].get("statement", {}).get("type") == "optional" and not any(
|
||||
map(lambda m: m["success"], match["children"])
|
||||
):
|
||||
return
|
||||
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.OptionalStatement):
|
||||
if not any(map(lambda m: m.success, match.children)):
|
||||
return
|
||||
|
||||
# not statement, so invert the child mode to show failed evaluations
|
||||
if match["node"].get("statement", {}).get("type") == "not":
|
||||
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.NotStatement):
|
||||
child_mode = MODE_FAILURE
|
||||
|
||||
elif mode == MODE_FAILURE:
|
||||
# display only nodes that did not evaluate to True
|
||||
if match["success"]:
|
||||
if match.success:
|
||||
return
|
||||
|
||||
# optional statement with successful children is not relevant
|
||||
if match["node"].get("statement", {}).get("type") == "optional" and any(
|
||||
map(lambda m: m["success"], match["children"])
|
||||
):
|
||||
return
|
||||
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.OptionalStatement):
|
||||
if any(map(lambda m: m.success, match.children)):
|
||||
return
|
||||
|
||||
# not statement, so invert the child mode to show successful evaluations
|
||||
if match["node"].get("statement", {}).get("type") == "not":
|
||||
if isinstance(match.node, rd.StatementNode) and isinstance(match.node.statement, rd.NotStatement):
|
||||
child_mode = MODE_SUCCESS
|
||||
else:
|
||||
raise RuntimeError("unexpected mode: " + mode)
|
||||
|
||||
render_node(ostream, match, match["node"], indent=indent)
|
||||
render_node(ostream, match, match.node, indent=indent)
|
||||
|
||||
for child in match["children"]:
|
||||
for child in match.children:
|
||||
render_match(ostream, child, indent=indent + 1, mode=child_mode)
|
||||
|
||||
|
||||
def render_rules(ostream, doc):
|
||||
def render_rules(ostream, doc: rd.ResultDocument):
|
||||
"""
|
||||
like:
|
||||
|
||||
@@ -208,76 +244,91 @@ def render_rules(ostream, doc):
|
||||
api: kernel32.GetLastError @ 0x10004A87
|
||||
api: kernel32.OutputDebugString @ 0x10004767, 0x10004787, 0x10004816, 0x10004895
|
||||
"""
|
||||
functions_by_bb = {}
|
||||
for function, info in doc["meta"]["analysis"]["layout"]["functions"].items():
|
||||
for bb in info["matched_basic_blocks"]:
|
||||
functions_by_bb[bb] = function
|
||||
functions_by_bb: Dict[capa.features.address.Address, capa.features.address.Address] = {}
|
||||
for finfo in doc.meta.analysis.layout.functions:
|
||||
faddress = finfo.address.to_capa()
|
||||
|
||||
for bb in finfo.matched_basic_blocks:
|
||||
bbaddress = bb.address.to_capa()
|
||||
functions_by_bb[bbaddress] = faddress
|
||||
|
||||
had_match = False
|
||||
|
||||
for (_, _, rule) in sorted(
|
||||
map(lambda rule: (rule["meta"].get("namespace", ""), rule["meta"]["name"], rule), doc["rules"].values())
|
||||
):
|
||||
for (_, _, rule) in sorted(map(lambda rule: (rule.meta.namespace or "", rule.meta.name, rule), doc.rules.values())):
|
||||
# default scope hides things like lib rules, malware-category rules, etc.
|
||||
# but in vverbose mode, we really want to show everything.
|
||||
#
|
||||
# still ignore subscope rules because they're stitched into the final document.
|
||||
if rule["meta"].get("capa/subscope"):
|
||||
if rule.meta.is_subscope_rule:
|
||||
continue
|
||||
|
||||
count = len(rule["matches"])
|
||||
count = len(rule.matches)
|
||||
if count == 1:
|
||||
capability = rutils.bold(rule["meta"]["name"])
|
||||
capability = rutils.bold(rule.meta.name)
|
||||
else:
|
||||
capability = "%s (%d matches)" % (rutils.bold(rule["meta"]["name"]), count)
|
||||
capability = "%s (%d matches)" % (rutils.bold(rule.meta.name), count)
|
||||
|
||||
ostream.writeln(capability)
|
||||
had_match = True
|
||||
|
||||
rows = []
|
||||
for key in capa.rules.META_KEYS:
|
||||
if key == "name" or key not in rule["meta"]:
|
||||
continue
|
||||
rows.append(("namespace", rule.meta.namespace))
|
||||
|
||||
if key == "examples":
|
||||
# I can't think of a reason that an analyst would pivot to the concrete example
|
||||
# directly from the capa output.
|
||||
# the more likely flow is to review the rule and go from there.
|
||||
# so, don't make the output messy by showing the examples.
|
||||
continue
|
||||
if rule.meta.maec.analysis_conclusion or rule.meta.maec.analysis_conclusion_ov:
|
||||
rows.append(
|
||||
(
|
||||
"maec/analysis-conclusion",
|
||||
rule.meta.maec.analysis_conclusion or rule.meta.maec.analysis_conclusion_ov,
|
||||
)
|
||||
)
|
||||
|
||||
v = rule["meta"][key]
|
||||
if not v:
|
||||
continue
|
||||
if rule.meta.maec.malware_family:
|
||||
rows.append(("maec/malware-family", rule.meta.maec.malware_family))
|
||||
|
||||
if key in ("att&ck", "mbc"):
|
||||
v = [rutils.format_parts_id(vv) for vv in v]
|
||||
if rule.meta.maec.malware_category or rule.meta.maec.malware_category:
|
||||
rows.append(
|
||||
("maec/malware-category", rule.meta.maec.malware_category or rule.meta.maec.malware_category_ov)
|
||||
)
|
||||
|
||||
if isinstance(v, list) and len(v) == 1:
|
||||
v = v[0]
|
||||
elif isinstance(v, list) and len(v) > 1:
|
||||
v = ", ".join(v)
|
||||
rows.append((key, v))
|
||||
rows.append(("author", ", ".join(rule.meta.authors)))
|
||||
|
||||
rows.append(("scope", rule.meta.scope.value))
|
||||
|
||||
if rule.meta.attack:
|
||||
rows.append(("att&ck", ", ".join([rutils.format_parts_id(v) for v in rule.meta.attack])))
|
||||
|
||||
if rule.meta.mbc:
|
||||
rows.append(("mbc", ", ".join([rutils.format_parts_id(v) for v in rule.meta.mbc])))
|
||||
|
||||
if rule.meta.references:
|
||||
rows.append(("references", ", ".join(rule.meta.references)))
|
||||
|
||||
if rule.meta.description:
|
||||
rows.append(("description", rule.meta.description))
|
||||
|
||||
ostream.writeln(tabulate.tabulate(rows, tablefmt="plain"))
|
||||
|
||||
if rule["meta"]["scope"] == capa.rules.FILE_SCOPE:
|
||||
matches = list(doc["rules"][rule["meta"]["name"]]["matches"].values())
|
||||
if rule.meta.scope == capa.rules.FILE_SCOPE:
|
||||
matches = doc.rules[rule.meta.name].matches
|
||||
if len(matches) != 1:
|
||||
# i think there should only ever be one match per file-scope rule,
|
||||
# because we do the file-scope evaluation a single time.
|
||||
# but i'm not 100% sure if this is/will always be true.
|
||||
# so, lets be explicit about our assumptions and raise an exception if they fail.
|
||||
raise RuntimeError("unexpected file scope match count: %d" % (len(matches)))
|
||||
render_match(ostream, matches[0], indent=0)
|
||||
first_address, first_match = matches[0]
|
||||
render_match(ostream, first_match, indent=0)
|
||||
else:
|
||||
for location, match in sorted(doc["rules"][rule["meta"]["name"]]["matches"].items()):
|
||||
ostream.write(rule["meta"]["scope"])
|
||||
for location, match in sorted(doc.rules[rule.meta.name].matches):
|
||||
ostream.write(rule.meta.scope)
|
||||
ostream.write(" @ ")
|
||||
ostream.write(rutils.hex(location))
|
||||
ostream.write(capa.render.verbose.format_address(location))
|
||||
|
||||
if rule["meta"]["scope"] == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
ostream.write(" in function " + rutils.hex(functions_by_bb[location]))
|
||||
if rule.meta.scope == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
ostream.write(
|
||||
" in function "
|
||||
+ capa.render.verbose.format_address(frz.Address.from_capa(functions_by_bb[location.to_capa()]))
|
||||
)
|
||||
|
||||
ostream.write("\n")
|
||||
render_match(ostream, match, indent=1)
|
||||
@@ -287,7 +338,7 @@ def render_rules(ostream, doc):
|
||||
ostream.writeln(rutils.bold("no capabilities found"))
|
||||
|
||||
|
||||
def render_vverbose(doc):
|
||||
def render_vverbose(doc: rd.ResultDocument):
|
||||
ostream = rutils.StringIO()
|
||||
|
||||
capa.render.verbose.render_meta(ostream, doc)
|
||||
@@ -300,5 +351,4 @@ def render_vverbose(doc):
|
||||
|
||||
|
||||
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
|
||||
doc = capa.render.result_document.convert_capabilities_to_result_document(meta, rules, capabilities)
|
||||
return render_vverbose(doc)
|
||||
return render_vverbose(rd.ResultDocument.from_capa(meta, rules, capabilities))
|
||||
|
||||
+8
-6
@@ -12,7 +12,6 @@ import uuid
|
||||
import codecs
|
||||
import logging
|
||||
import binascii
|
||||
import functools
|
||||
import collections
|
||||
from enum import Enum
|
||||
|
||||
@@ -40,6 +39,7 @@ import capa.features.common
|
||||
import capa.features.basicblock
|
||||
from capa.engine import Statement, FeatureSet
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Feature
|
||||
from capa.features.address import Address
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -48,7 +48,6 @@ logger = logging.getLogger(__name__)
|
||||
META_KEYS = (
|
||||
"name",
|
||||
"namespace",
|
||||
"rule-category",
|
||||
"maec/analysis-conclusion",
|
||||
"maec/analysis-conclusion-ov",
|
||||
"maec/malware-family",
|
||||
@@ -692,6 +691,9 @@ class Rule:
|
||||
for new_rule in self._extract_subscope_rules_rec(child):
|
||||
yield new_rule
|
||||
|
||||
def is_subscope_rule(self):
|
||||
return bool(self.meta.get("capa/subscope-rule", False))
|
||||
|
||||
def extract_subscope_rules(self):
|
||||
"""
|
||||
scan through the statements of this rule,
|
||||
@@ -1253,7 +1255,7 @@ class RuleSet:
|
||||
# at lower scope, e.g. function scope.
|
||||
# so, we find all dependencies of all rules, and later will filter them down.
|
||||
for rule in rules:
|
||||
if rule.meta.get("capa/subscope-rule", False):
|
||||
if rule.is_subscope_rule():
|
||||
continue
|
||||
|
||||
scope_rules.update(get_rules_and_dependencies(rules, rule.name))
|
||||
@@ -1306,7 +1308,7 @@ class RuleSet:
|
||||
break
|
||||
return RuleSet(list(rules_filtered))
|
||||
|
||||
def match(self, scope: Scope, features: FeatureSet, va: int) -> Tuple[FeatureSet, ceng.MatchResults]:
|
||||
def match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[FeatureSet, ceng.MatchResults]:
|
||||
"""
|
||||
match rules from this ruleset at the given scope against the given features.
|
||||
|
||||
@@ -1338,7 +1340,7 @@ class RuleSet:
|
||||
# first, match against the set of rules that have at least one
|
||||
# feature shared with our feature set.
|
||||
candidate_rules = [self.rules[name] for name in candidate_rule_names]
|
||||
features2, easy_matches = ceng.match(candidate_rules, features, va)
|
||||
features2, easy_matches = ceng.match(candidate_rules, features, addr)
|
||||
|
||||
# note that we've stored the updated feature set in `features2`.
|
||||
# this contains a superset of the features in `features`;
|
||||
@@ -1357,7 +1359,7 @@ class RuleSet:
|
||||
# that we can't really make any guesses about.
|
||||
# these are rules with hard features, like substring/regex/bytes and match statements.
|
||||
hard_rules = [self.rules[name] for name in hard_rule_names]
|
||||
features3, hard_matches = ceng.match(hard_rules, features2, va)
|
||||
features3, hard_matches = ceng.match(hard_rules, features2, addr)
|
||||
|
||||
# note that above, we probably are skipping matching a bunch of
|
||||
# rules that definitely would never hit.
|
||||
|
||||
+6
-14
@@ -68,6 +68,7 @@ import capa
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.render.json
|
||||
import capa.render.result_document as rd
|
||||
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
@@ -126,19 +127,14 @@ def get_capa_results(args):
|
||||
"error": "unexpected error: %s" % (e),
|
||||
}
|
||||
|
||||
meta = capa.main.collect_metadata("", path, "", extractor)
|
||||
meta = capa.main.collect_metadata([], path, [], extractor)
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
meta["analysis"].update(counts)
|
||||
meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)
|
||||
|
||||
return {
|
||||
"path": path,
|
||||
"status": "ok",
|
||||
"ok": {
|
||||
"meta": meta,
|
||||
"capabilities": capabilities,
|
||||
},
|
||||
}
|
||||
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
|
||||
|
||||
return {"path": path, "status": "ok", "ok": doc.dict(exclude_none=True)}
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
@@ -205,11 +201,7 @@ def main(argv=None):
|
||||
if result["status"] == "error":
|
||||
logger.warning(result["error"])
|
||||
elif result["status"] == "ok":
|
||||
meta = result["ok"]["meta"]
|
||||
capabilities = result["ok"]["capabilities"]
|
||||
# our renderer expects to emit a json document for a single sample
|
||||
# so we deserialize the json document, store it in a larger dict, and we'll subsequently re-encode.
|
||||
results[result["path"]] = json.loads(capa.render.json.render(meta, rules, capabilities))
|
||||
results[result["path"]] = rd.ResultDocument.parse_obj(result["ok"]).json(exclude_none=True)
|
||||
else:
|
||||
raise ValueError("unexpected status: %s" % (result["status"]))
|
||||
|
||||
|
||||
@@ -535,7 +535,7 @@ def convert_rules(rules, namespaces, cround):
|
||||
|
||||
rule_name = convert_rule_name(rule.name)
|
||||
|
||||
if rule.meta.get("capa/subscope-rule", False):
|
||||
if rule.is_subscope_rule():
|
||||
logger.info("skipping sub scope rule capa: " + rule.name)
|
||||
continue
|
||||
|
||||
|
||||
+2
-5
@@ -800,15 +800,12 @@ def lint_rule(ctx: Context, rule: Rule):
|
||||
# this is by far the most common reason to be in the nursery,
|
||||
# and ends up just producing a lot of noise.
|
||||
if not (is_nursery_rule(rule) and len(violations) == 1 and violations[0].name == "missing examples"):
|
||||
category = rule.meta.get("rule-category")
|
||||
|
||||
print("")
|
||||
print(
|
||||
"%s%s %s"
|
||||
"%s%s"
|
||||
% (
|
||||
" (nursery) " if is_nursery_rule(rule) else "",
|
||||
rule.name,
|
||||
("(%s)" % category) if category else "",
|
||||
)
|
||||
)
|
||||
|
||||
@@ -904,7 +901,7 @@ def lint(ctx: Context):
|
||||
with tqdm.contrib.logging.tqdm_logging_redirect(ctx.rules.rules.items(), unit="rule") as pbar:
|
||||
with redirecting_print_to_tqdm():
|
||||
for name, rule in pbar:
|
||||
if rule.meta.get("capa/subscope-rule", False):
|
||||
if rule.is_subscope_rule():
|
||||
continue
|
||||
|
||||
pbar.set_description(width("linting rule: %s" % (name), 48))
|
||||
|
||||
@@ -53,6 +53,7 @@ import sys
|
||||
import logging
|
||||
import argparse
|
||||
import collections
|
||||
from typing import Dict
|
||||
|
||||
import colorama
|
||||
|
||||
@@ -63,14 +64,16 @@ import capa.helpers
|
||||
import capa.features
|
||||
import capa.exceptions
|
||||
import capa.render.utils as rutils
|
||||
import capa.render.verbose
|
||||
import capa.features.freeze
|
||||
import capa.render.result_document
|
||||
import capa.render.result_document as rd
|
||||
from capa.helpers import get_file_taste
|
||||
from capa.features.freeze import Address
|
||||
|
||||
logger = logging.getLogger("capa.show-capabilities-by-function")
|
||||
|
||||
|
||||
def render_matches_by_function(doc):
|
||||
def render_matches_by_function(doc: rd.ResultDocument):
|
||||
"""
|
||||
like:
|
||||
|
||||
@@ -89,32 +92,34 @@ def render_matches_by_function(doc):
|
||||
- send HTTP request
|
||||
- connect to HTTP server
|
||||
"""
|
||||
functions_by_bb = {}
|
||||
for function, info in doc["meta"]["analysis"]["layout"]["functions"].items():
|
||||
for bb in info["matched_basic_blocks"]:
|
||||
functions_by_bb[bb] = function
|
||||
functions_by_bb: Dict[Address, Address] = {}
|
||||
for finfo in doc.meta.analysis.layout.functions:
|
||||
faddress = finfo.address
|
||||
|
||||
for bb in finfo.matched_basic_blocks:
|
||||
bbaddress = bb.address
|
||||
functions_by_bb[bbaddress] = faddress
|
||||
|
||||
ostream = rutils.StringIO()
|
||||
|
||||
matches_by_function = collections.defaultdict(set)
|
||||
for rule in rutils.capability_rules(doc):
|
||||
if rule["meta"]["scope"] == capa.rules.FUNCTION_SCOPE:
|
||||
for va in rule["matches"].keys():
|
||||
matches_by_function[va].add(rule["meta"]["name"])
|
||||
elif rule["meta"]["scope"] == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
for va in rule["matches"].keys():
|
||||
function = functions_by_bb[va]
|
||||
matches_by_function[function].add(rule["meta"]["name"])
|
||||
if rule.meta.scope == capa.rules.FUNCTION_SCOPE:
|
||||
for addr, _ in rule.matches:
|
||||
matches_by_function[addr].add(rule.meta.name)
|
||||
elif rule.meta.scope == capa.rules.BASIC_BLOCK_SCOPE:
|
||||
for addr, _ in rule.matches:
|
||||
function = functions_by_bb[addr]
|
||||
matches_by_function[function].add(rule.meta.name)
|
||||
else:
|
||||
# file scope
|
||||
pass
|
||||
|
||||
for va, feature_count in sorted(doc["meta"]["analysis"]["feature_counts"]["functions"].items()):
|
||||
va = int(va)
|
||||
if not matches_by_function.get(va, {}):
|
||||
for f in doc.meta.analysis.feature_counts.functions:
|
||||
if not matches_by_function.get(f.address, {}):
|
||||
continue
|
||||
ostream.writeln("function at 0x%X with %d features: " % (va, feature_count))
|
||||
for rule_name in sorted(matches_by_function[va]):
|
||||
ostream.writeln("function at %s with %d features: " % (capa.render.verbose.format_address(addr), f.count))
|
||||
for rule_name in sorted(matches_by_function[f.address]):
|
||||
ostream.writeln(" - " + rule_name)
|
||||
|
||||
return ostream.getvalue()
|
||||
@@ -187,7 +192,7 @@ def main(argv=None):
|
||||
# - when not an interactive session, and disable coloring
|
||||
# renderers should use coloring and assume it will be stripped out if necessary.
|
||||
colorama.init()
|
||||
doc = capa.render.result_document.convert_capabilities_to_result_document(meta, rules, capabilities)
|
||||
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
|
||||
print(render_matches_by_function(doc))
|
||||
colorama.deinit()
|
||||
|
||||
|
||||
+55
-41
@@ -76,13 +76,19 @@ import capa.engine
|
||||
import capa.helpers
|
||||
import capa.features
|
||||
import capa.exceptions
|
||||
import capa.render.verbose as v
|
||||
import capa.features.common
|
||||
import capa.features.freeze
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.helpers import log_unsupported_runtime_error
|
||||
|
||||
logger = logging.getLogger("capa.show-features")
|
||||
|
||||
|
||||
def format_address(addr: capa.features.address.Address) -> str:
|
||||
return v.format_address(capa.features.freeze.Address.from_capa((addr)))
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
@@ -90,7 +96,7 @@ def main(argv=None):
|
||||
parser = argparse.ArgumentParser(description="Show the features that capa extracts from the given sample")
|
||||
capa.main.install_common_args(parser, wanted={"format", "sample", "signatures", "backend"})
|
||||
|
||||
parser.add_argument("-F", "--function", type=lambda x: int(x, 0x10), help="Show features for specific function")
|
||||
parser.add_argument("-F", "--function", type=str, help="Show features for specific function")
|
||||
args = parser.parse_args(args=argv)
|
||||
capa.main.handle_common_args(args)
|
||||
|
||||
@@ -122,36 +128,31 @@ def main(argv=None):
|
||||
log_unsupported_runtime_error()
|
||||
return -1
|
||||
|
||||
for feature, va in extractor.extract_global_features():
|
||||
if va:
|
||||
print("global: 0x%08x: %s" % (va, feature))
|
||||
else:
|
||||
print("global: 0x00000000: %s" % (feature))
|
||||
for feature, addr in extractor.extract_global_features():
|
||||
print("global: %s: %s" % (format_address(addr), feature))
|
||||
|
||||
if not args.function:
|
||||
for feature, va in extractor.extract_file_features():
|
||||
if va:
|
||||
print("file: 0x%08x: %s" % (va, feature))
|
||||
else:
|
||||
print("file: 0x00000000: %s" % (feature))
|
||||
for feature, addr in extractor.extract_file_features():
|
||||
print("file: %s: %s" % (format_address(addr), feature))
|
||||
|
||||
functions = extractor.get_functions()
|
||||
function_handles = extractor.get_functions()
|
||||
|
||||
if args.function:
|
||||
if args.format == "freeze":
|
||||
functions = tuple(filter(lambda f: f == args.function, functions))
|
||||
# TODO fix
|
||||
function_handles = tuple(filter(lambda fh: fh.address == args.function, function_handles))
|
||||
else:
|
||||
functions = tuple(filter(lambda f: int(f) == args.function, functions))
|
||||
function_handles = tuple(filter(lambda fh: format_address(fh.address) == args.function, function_handles))
|
||||
|
||||
if args.function not in [int(f) for f in functions]:
|
||||
print("0x%X not a function" % args.function)
|
||||
if args.function not in [format_address(fh.address) for fh in function_handles]:
|
||||
print("%s not a function" % args.function)
|
||||
return -1
|
||||
|
||||
if len(functions) == 0:
|
||||
print("0x%X not a function")
|
||||
if len(function_handles) == 0:
|
||||
print("%s not a function", args.function)
|
||||
return -1
|
||||
|
||||
print_features(functions, extractor)
|
||||
print_features(function_handles, extractor)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -167,58 +168,71 @@ def ida_main():
|
||||
extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor()
|
||||
|
||||
if not function:
|
||||
for feature, va in extractor.extract_file_features():
|
||||
if va:
|
||||
print("file: 0x%08x: %s" % (va, feature))
|
||||
else:
|
||||
print("file: 0x00000000: %s" % (feature))
|
||||
for feature, addr in extractor.extract_file_features():
|
||||
print("file: %s: %s" % (format_address(addr), feature))
|
||||
return
|
||||
|
||||
functions = extractor.get_functions()
|
||||
function_handles = extractor.get_functions()
|
||||
|
||||
if function:
|
||||
functions = tuple(filter(lambda f: f.start_ea == function, functions))
|
||||
function_handles = tuple(filter(lambda fh: fh.inner.start_ea == function, function_handles))
|
||||
|
||||
if len(functions) == 0:
|
||||
if len(function_handles) == 0:
|
||||
print("0x%X not a function" % function)
|
||||
return -1
|
||||
|
||||
print_features(functions, extractor)
|
||||
print_features(function_handles, extractor)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def print_features(functions, extractor):
|
||||
def print_features(functions, extractor: capa.features.extractors.base_extractor.FeatureExtractor):
|
||||
for f in functions:
|
||||
function_address = int(f)
|
||||
|
||||
if extractor.is_library_function(function_address):
|
||||
function_name = extractor.get_function_name(function_address)
|
||||
logger.debug("skipping library function 0x%x (%s)", function_address, function_name)
|
||||
if extractor.is_library_function(f.address):
|
||||
function_name = extractor.get_function_name(f.address)
|
||||
logger.debug("skipping library function %s (%s)", format_address(f.address), function_name)
|
||||
continue
|
||||
|
||||
print("func: 0x%08x" % (function_address))
|
||||
print("func: %s" % (format_address(f.address)))
|
||||
|
||||
for feature, va in extractor.extract_function_features(f):
|
||||
for feature, addr in extractor.extract_function_features(f):
|
||||
if capa.features.common.is_global_feature(feature):
|
||||
continue
|
||||
|
||||
print("func: 0x%08x: %s" % (va, feature))
|
||||
if f.address != addr:
|
||||
print(" func: %s: %s -> %s" % (format_address(f.address), feature, format_address(addr)))
|
||||
else:
|
||||
print(" func: %s: %s" % (format_address(f.address), feature))
|
||||
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
for feature, addr in extractor.extract_basic_block_features(f, bb):
|
||||
if capa.features.common.is_global_feature(feature):
|
||||
continue
|
||||
|
||||
print("bb : 0x%08x: %s" % (va, feature))
|
||||
if bb.address != addr:
|
||||
print(" bb: %s: %s -> %s" % (format_address(bb.address), feature, format_address(addr)))
|
||||
else:
|
||||
print(" bb: %s: %s" % (format_address(bb.address), feature))
|
||||
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
for feature, addr in extractor.extract_insn_features(f, bb, insn):
|
||||
if capa.features.common.is_global_feature(feature):
|
||||
continue
|
||||
|
||||
try:
|
||||
print("insn: 0x%08x: %s" % (va, feature))
|
||||
if insn.address != addr:
|
||||
print(
|
||||
" insn: %s: %s: %s -> %s"
|
||||
% (
|
||||
format_address(f.address),
|
||||
format_address(insn.address),
|
||||
feature,
|
||||
format_address(addr),
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(" insn: %s: %s" % (format_address(insn.address), feature))
|
||||
|
||||
except UnicodeEncodeError:
|
||||
# may be an issue while piping to less and encountering non-ascii characters
|
||||
continue
|
||||
|
||||
@@ -28,6 +28,7 @@ requirements = [
|
||||
"pyelftools==0.28",
|
||||
"dnfile==0.11.0",
|
||||
"dncil==1.0.0",
|
||||
"pydantic==1.9.1",
|
||||
]
|
||||
|
||||
# this sets __version__
|
||||
|
||||
+93
-42
@@ -13,6 +13,7 @@ import binascii
|
||||
import itertools
|
||||
import contextlib
|
||||
import collections
|
||||
from typing import Set, Dict
|
||||
from functools import lru_cache
|
||||
|
||||
import pytest
|
||||
@@ -34,7 +35,11 @@ from capa.features.common import (
|
||||
FORMAT_DOTNET,
|
||||
Arch,
|
||||
Format,
|
||||
Feature,
|
||||
)
|
||||
from capa.features.address import Address
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
from capa.features.extractors.dnfile.extractor import DnfileFeatureExtractor
|
||||
|
||||
CD = os.path.dirname(__file__)
|
||||
DOTNET_DIR = os.path.join(CD, "data", "dotnet")
|
||||
@@ -133,20 +138,35 @@ def get_smda_extractor(path):
|
||||
def get_pefile_extractor(path):
|
||||
import capa.features.extractors.pefile
|
||||
|
||||
return capa.features.extractors.pefile.PefileFeatureExtractor(path)
|
||||
extractor = capa.features.extractors.pefile.PefileFeatureExtractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path)
|
||||
|
||||
return extractor
|
||||
|
||||
|
||||
def get_dotnetfile_extractor(path):
|
||||
import capa.features.extractors.dotnetfile
|
||||
|
||||
return capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path)
|
||||
extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path)
|
||||
|
||||
return extractor
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_dnfile_extractor(path):
|
||||
import capa.features.extractors.dnfile.extractor
|
||||
|
||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path)
|
||||
|
||||
return extractor
|
||||
|
||||
|
||||
def extract_global_features(extractor):
|
||||
@@ -165,35 +185,35 @@ def extract_file_features(extractor):
|
||||
|
||||
|
||||
# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
|
||||
def extract_function_features(extractor, f):
|
||||
def extract_function_features(extractor, fh):
|
||||
features = collections.defaultdict(set)
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
for bb in extractor.get_basic_blocks(fh):
|
||||
for insn in extractor.get_instructions(fh, bb):
|
||||
for feature, va in extractor.extract_insn_features(fh, bb, insn):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
for feature, va in extractor.extract_basic_block_features(fh, bb):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_function_features(f):
|
||||
for feature, va in extractor.extract_function_features(fh):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
|
||||
def extract_basic_block_features(extractor, f, bb):
|
||||
def extract_basic_block_features(extractor, fh, bbh):
|
||||
features = collections.defaultdict(set)
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
for insn in extractor.get_instructions(fh, bbh):
|
||||
for feature, va in extractor.extract_insn_features(fh, bbh, insn):
|
||||
features[feature].add(va)
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
for feature, va in extractor.extract_basic_block_features(fh, bbh):
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
# f may not be hashable (e.g. ida func_t) so cannot @lru_cache this
|
||||
def extract_instruction_features(extractor, f, bb, insn):
|
||||
def extract_instruction_features(extractor, fh, bbh, ih) -> Dict[Feature, Set[Address]]:
|
||||
features = collections.defaultdict(set)
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
features[feature].add(va)
|
||||
for feature, addr in extractor.extract_insn_features(fh, bbh, ih):
|
||||
features[feature].add(addr)
|
||||
return features
|
||||
|
||||
|
||||
@@ -257,6 +277,8 @@ def get_data_path_by_name(name):
|
||||
return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe")
|
||||
elif name.startswith("_1c444"):
|
||||
return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_")
|
||||
elif name.startswith("_692f"):
|
||||
return os.path.join(CD, "data", "dotnet", "692f7fd6d198e804d6af98eb9e390d61.exe_")
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture: %s" % name)
|
||||
|
||||
@@ -325,24 +347,43 @@ def sample(request):
|
||||
return resolve_sample(request.param)
|
||||
|
||||
|
||||
def get_function(extractor, fva):
|
||||
for f in extractor.get_functions():
|
||||
if int(f) == fva:
|
||||
return f
|
||||
def get_function(extractor, fva: int) -> FunctionHandle:
|
||||
for fh in extractor.get_functions():
|
||||
if isinstance(extractor, DnfileFeatureExtractor):
|
||||
addr = fh.inner.offset
|
||||
else:
|
||||
addr = fh.address
|
||||
if addr == fva:
|
||||
return fh
|
||||
raise ValueError("function not found")
|
||||
|
||||
|
||||
def get_basic_block(extractor, f, va):
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
if int(bb) == va:
|
||||
return bb
|
||||
def get_function_by_token(extractor, token: int) -> FunctionHandle:
|
||||
for fh in extractor.get_functions():
|
||||
if fh.address.token.value == token:
|
||||
return fh
|
||||
raise ValueError("function not found by token")
|
||||
|
||||
|
||||
def get_basic_block(extractor, fh: FunctionHandle, va: int) -> BBHandle:
|
||||
for bbh in extractor.get_basic_blocks(fh):
|
||||
if isinstance(extractor, DnfileFeatureExtractor):
|
||||
addr = bbh.inner.offset
|
||||
else:
|
||||
addr = bbh.address
|
||||
if addr == va:
|
||||
return bbh
|
||||
raise ValueError("basic block not found")
|
||||
|
||||
|
||||
def get_instruction(extractor, f, bb, va):
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
if int(insn) == va:
|
||||
return insn
|
||||
def get_instruction(extractor, fh: FunctionHandle, bbh: BBHandle, va: int) -> InsnHandle:
|
||||
for ih in extractor.get_instructions(fh, bbh):
|
||||
if isinstance(extractor, DnfileFeatureExtractor):
|
||||
addr = ih.inner.offset
|
||||
else:
|
||||
addr = ih.address
|
||||
if addr == va:
|
||||
return ih
|
||||
raise ValueError("instruction not found")
|
||||
|
||||
|
||||
@@ -369,10 +410,10 @@ def resolve_scope(scope):
|
||||
iva = int(ispec.partition("=")[2], 0x10)
|
||||
|
||||
def inner_insn(extractor):
|
||||
f = get_function(extractor, fva)
|
||||
bb = get_basic_block(extractor, f, bbva)
|
||||
insn = get_instruction(extractor, f, bb, iva)
|
||||
features = extract_instruction_features(extractor, f, bb, insn)
|
||||
fh = get_function(extractor, fva)
|
||||
bbh = get_basic_block(extractor, fh, bbva)
|
||||
ih = get_instruction(extractor, fh, bbh, iva)
|
||||
features = extract_instruction_features(extractor, fh, bbh, ih)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
@@ -388,22 +429,25 @@ def resolve_scope(scope):
|
||||
bbva = int(bbspec.partition("=")[2], 0x10)
|
||||
|
||||
def inner_bb(extractor):
|
||||
f = get_function(extractor, fva)
|
||||
bb = get_basic_block(extractor, f, bbva)
|
||||
features = extract_basic_block_features(extractor, f, bb)
|
||||
fh = get_function(extractor, fva)
|
||||
bbh = get_basic_block(extractor, fh, bbva)
|
||||
features = extract_basic_block_features(extractor, fh, bbh)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
|
||||
inner_bb.__name__ = scope
|
||||
return inner_bb
|
||||
elif scope.startswith("function"):
|
||||
# like `function=0x401000`
|
||||
elif scope.startswith(("function", "token")):
|
||||
# like `function=0x401000` or `token=0x6000001`
|
||||
va = int(scope.partition("=")[2], 0x10)
|
||||
|
||||
def inner_function(extractor):
|
||||
f = get_function(extractor, va)
|
||||
features = extract_function_features(extractor, f)
|
||||
if scope.startswith("token"):
|
||||
fh = get_function_by_token(extractor, va)
|
||||
else:
|
||||
fh = get_function(extractor, va)
|
||||
features = extract_function_features(extractor, fh)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
@@ -705,6 +749,8 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
|
||||
True,
|
||||
),
|
||||
("_1c444", "function=0x2544", capa.features.common.Characteristic("unmanaged call"), False),
|
||||
# same as above but using token instead of function
|
||||
("_1c444", "token=0x6000088", capa.features.common.Characteristic("unmanaged call"), False),
|
||||
(
|
||||
"_1c444",
|
||||
"function=0x1F68, bb=0x1F68, insn=0x1FF9",
|
||||
@@ -846,10 +892,15 @@ def mixed_mode_64_dotnetfile_extractor():
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def hello_world_dnfile_extractor():
|
||||
def hello_world_dotnetfile_extractor():
|
||||
return get_dnfile_extractor(get_data_path_by_name("hello-world"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _1c444_dnfile_extractor():
|
||||
return get_dnfile_extractor(get_data_path_by_name("1c444..."))
|
||||
def _1c444_dotnetfile_extractor():
|
||||
return get_dnfile_extractor(get_data_path_by_name("_1c444"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _692f_dotnetfile_extractor():
|
||||
return get_dnfile_extractor(get_data_path_by_name("_692f"))
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import pytest
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
from fixtures import parametrize
|
||||
|
||||
|
||||
@parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_dnfile_features(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@parametrize(
|
||||
"extractor,function,expected",
|
||||
[
|
||||
("b9f5b_dotnetfile_extractor", "is_dotnet_file", True),
|
||||
("b9f5b_dotnetfile_extractor", "is_mixed_mode", False),
|
||||
("mixed_mode_64_dotnetfile_extractor", "is_mixed_mode", True),
|
||||
("b9f5b_dotnetfile_extractor", "get_entry_point", 0x6000007),
|
||||
("b9f5b_dotnetfile_extractor", "get_runtime_version", (2, 5)),
|
||||
("b9f5b_dotnetfile_extractor", "get_meta_version_string", "v2.0.50727"),
|
||||
],
|
||||
)
|
||||
def test_dnfile_extractor(request, extractor, function, expected):
|
||||
extractor_function = getattr(request.getfixturevalue(extractor), function)
|
||||
assert extractor_function() == expected
|
||||
+62
-81
@@ -6,6 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import textwrap
|
||||
from typing import List
|
||||
|
||||
from fixtures import *
|
||||
|
||||
@@ -17,49 +18,60 @@ import capa.features.insn
|
||||
import capa.features.common
|
||||
import capa.features.freeze
|
||||
import capa.features.basicblock
|
||||
import capa.features.extractors.null
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.features.address import AbsoluteVirtualAddress
|
||||
|
||||
EXTRACTOR = capa.features.extractors.base_extractor.NullFeatureExtractor(
|
||||
{
|
||||
"base address": 0x401000,
|
||||
"file features": [
|
||||
(0x402345, capa.features.common.Characteristic("embedded pe")),
|
||||
],
|
||||
"functions": {
|
||||
0x401000: {
|
||||
"features": [
|
||||
(0x401000, capa.features.common.Characteristic("indirect call")),
|
||||
],
|
||||
"basic blocks": {
|
||||
0x401000: {
|
||||
"features": [
|
||||
(0x401000, capa.features.common.Characteristic("tight loop")),
|
||||
],
|
||||
"instructions": {
|
||||
0x401000: {
|
||||
"features": [
|
||||
(0x401000, capa.features.insn.Mnemonic("xor")),
|
||||
(0x401000, capa.features.common.Characteristic("nzxor")),
|
||||
],
|
||||
},
|
||||
0x401002: {
|
||||
"features": [
|
||||
(0x401002, capa.features.insn.Mnemonic("mov")),
|
||||
],
|
||||
},
|
||||
},
|
||||
EXTRACTOR = capa.features.extractors.null.NullFeatureExtractor(
|
||||
base_address=AbsoluteVirtualAddress(0x401000),
|
||||
global_features=[],
|
||||
file_features=[
|
||||
(AbsoluteVirtualAddress(0x402345), capa.features.common.Characteristic("embedded pe")),
|
||||
],
|
||||
functions={
|
||||
AbsoluteVirtualAddress(0x401000): capa.features.extractors.null.FunctionFeatures(
|
||||
features=[
|
||||
(AbsoluteVirtualAddress(0x401000), capa.features.common.Characteristic("indirect call")),
|
||||
],
|
||||
basic_blocks={
|
||||
AbsoluteVirtualAddress(0x401000): capa.features.extractors.null.BasicBlockFeatures(
|
||||
features=[
|
||||
(AbsoluteVirtualAddress(0x401000), capa.features.common.Characteristic("tight loop")),
|
||||
],
|
||||
instructions={
|
||||
AbsoluteVirtualAddress(0x401000): capa.features.extractors.null.InstructionFeatures(
|
||||
features=[
|
||||
(AbsoluteVirtualAddress(0x401000), capa.features.insn.Mnemonic("xor")),
|
||||
(AbsoluteVirtualAddress(0x401000), capa.features.common.Characteristic("nzxor")),
|
||||
],
|
||||
),
|
||||
AbsoluteVirtualAddress(0x401002): capa.features.extractors.null.InstructionFeatures(
|
||||
features=[
|
||||
(AbsoluteVirtualAddress(0x401002), capa.features.insn.Mnemonic("mov")),
|
||||
],
|
||||
),
|
||||
},
|
||||
},
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def addresses(s) -> List[Address]:
|
||||
return list(sorted(map(lambda i: i.address, s)))
|
||||
|
||||
|
||||
def test_null_feature_extractor():
|
||||
assert list(EXTRACTOR.get_functions()) == [0x401000]
|
||||
assert list(EXTRACTOR.get_basic_blocks(0x401000)) == [0x401000]
|
||||
assert list(EXTRACTOR.get_instructions(0x401000, 0x0401000)) == [0x401000, 0x401002]
|
||||
fh = FunctionHandle(AbsoluteVirtualAddress(0x401000), None)
|
||||
bbh = BBHandle(AbsoluteVirtualAddress(0x401000), None)
|
||||
|
||||
assert addresses(EXTRACTOR.get_functions()) == [AbsoluteVirtualAddress(0x401000)]
|
||||
assert addresses(EXTRACTOR.get_basic_blocks(fh)) == [AbsoluteVirtualAddress(0x401000)]
|
||||
assert addresses(EXTRACTOR.get_instructions(fh, bbh)) == [
|
||||
AbsoluteVirtualAddress(0x401000),
|
||||
AbsoluteVirtualAddress(0x401002),
|
||||
]
|
||||
|
||||
rules = capa.rules.RuleSet(
|
||||
[
|
||||
@@ -85,63 +97,33 @@ def test_null_feature_extractor():
|
||||
|
||||
|
||||
def compare_extractors(a, b):
|
||||
"""
|
||||
args:
|
||||
a (capa.features.extractors.NullFeatureExtractor)
|
||||
b (capa.features.extractors.NullFeatureExtractor)
|
||||
"""
|
||||
|
||||
# TODO: ordering of these things probably doesn't work yet
|
||||
|
||||
assert list(a.extract_file_features()) == list(b.extract_file_features())
|
||||
assert list(a.get_functions()) == list(b.get_functions())
|
||||
|
||||
assert addresses(a.get_functions()) == addresses(b.get_functions())
|
||||
for f in a.get_functions():
|
||||
assert list(a.get_basic_blocks(f)) == list(b.get_basic_blocks(f))
|
||||
assert list(a.extract_function_features(f)) == list(b.extract_function_features(f))
|
||||
assert addresses(a.get_basic_blocks(f)) == addresses(b.get_basic_blocks(f))
|
||||
assert list(sorted(set(a.extract_function_features(f)))) == list(sorted(set(b.extract_function_features(f))))
|
||||
|
||||
for bb in a.get_basic_blocks(f):
|
||||
assert list(a.get_instructions(f, bb)) == list(b.get_instructions(f, bb))
|
||||
assert list(a.extract_basic_block_features(f, bb)) == list(b.extract_basic_block_features(f, bb))
|
||||
|
||||
for insn in a.get_instructions(f, bb):
|
||||
assert list(a.extract_insn_features(f, bb, insn)) == list(b.extract_insn_features(f, bb, insn))
|
||||
|
||||
|
||||
def compare_extractors_viv_null(viv_ext, null_ext):
|
||||
"""
|
||||
almost identical to compare_extractors but adds casts to ints since the VivisectFeatureExtractor returns objects
|
||||
and NullFeatureExtractor returns ints
|
||||
|
||||
args:
|
||||
viv_ext (capa.features.extractors.viv.extractor.VivisectFeatureExtractor)
|
||||
null_ext (capa.features.extractors.NullFeatureExtractor)
|
||||
"""
|
||||
assert list(viv_ext.extract_file_features()) == list(null_ext.extract_file_features())
|
||||
assert list(map(int, viv_ext.get_functions())) == list(null_ext.get_functions())
|
||||
for f in viv_ext.get_functions():
|
||||
assert list(map(int, viv_ext.get_basic_blocks(f))) == list(null_ext.get_basic_blocks(int(f)))
|
||||
assert list(viv_ext.extract_function_features(f)) == list(null_ext.extract_function_features(int(f)))
|
||||
|
||||
for bb in viv_ext.get_basic_blocks(f):
|
||||
assert list(map(int, viv_ext.get_instructions(f, bb))) == list(null_ext.get_instructions(int(f), int(bb)))
|
||||
assert list(viv_ext.extract_basic_block_features(f, bb)) == list(
|
||||
null_ext.extract_basic_block_features(int(f), int(bb))
|
||||
assert addresses(a.get_instructions(f, bb)) == addresses(b.get_instructions(f, bb))
|
||||
assert list(sorted(set(a.extract_basic_block_features(f, bb)))) == list(
|
||||
sorted(set(b.extract_basic_block_features(f, bb)))
|
||||
)
|
||||
|
||||
for insn in viv_ext.get_instructions(f, bb):
|
||||
assert list(viv_ext.extract_insn_features(f, bb, insn)) == list(
|
||||
null_ext.extract_insn_features(int(f), int(bb), int(insn))
|
||||
for insn in a.get_instructions(f, bb):
|
||||
assert list(sorted(set(a.extract_insn_features(f, bb, insn)))) == list(
|
||||
sorted(set(b.extract_insn_features(f, bb, insn)))
|
||||
)
|
||||
|
||||
|
||||
def test_freeze_s_roundtrip():
|
||||
def test_freeze_str_roundtrip():
|
||||
load = capa.features.freeze.loads
|
||||
dump = capa.features.freeze.dumps
|
||||
reanimated = load(dump(EXTRACTOR))
|
||||
compare_extractors(EXTRACTOR, reanimated)
|
||||
|
||||
|
||||
def test_freeze_b_roundtrip():
|
||||
def test_freeze_bytes_roundtrip():
|
||||
load = capa.features.freeze.load
|
||||
dump = capa.features.freeze.dump
|
||||
reanimated = load(dump(EXTRACTOR))
|
||||
@@ -149,9 +131,7 @@ def test_freeze_b_roundtrip():
|
||||
|
||||
|
||||
def roundtrip_feature(feature):
|
||||
serialize = capa.features.freeze.serialize_feature
|
||||
deserialize = capa.features.freeze.deserialize_feature
|
||||
assert feature == deserialize(serialize(feature))
|
||||
assert feature == capa.features.freeze.feature_from_capa(feature).to_capa()
|
||||
|
||||
|
||||
def test_serialize_features():
|
||||
@@ -166,6 +146,7 @@ def test_serialize_features():
|
||||
roundtrip_feature(capa.features.file.Export("BaseThreadInitThunk"))
|
||||
roundtrip_feature(capa.features.file.Import("kernel32.IsWow64Process"))
|
||||
roundtrip_feature(capa.features.file.Import("#11"))
|
||||
roundtrip_feature(capa.features.insn.OperandOffset(0, 0x8))
|
||||
|
||||
|
||||
def test_freeze_sample(tmpdir, z9324d_extractor):
|
||||
@@ -184,4 +165,4 @@ def test_freeze_load_sample(tmpdir, z9324d_extractor):
|
||||
with open(o.strpath, "rb") as f:
|
||||
null_extractor = capa.features.freeze.load(f.read())
|
||||
|
||||
compare_extractors_viv_null(z9324d_extractor, null_extractor)
|
||||
compare_extractors(z9324d_extractor, null_extractor)
|
||||
|
||||
+29
-3
@@ -11,6 +11,7 @@ import textwrap
|
||||
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
from fixtures import _692f_dotnetfile_extractor, _1c444_dotnetfile_extractor
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
@@ -438,6 +439,31 @@ def test_json_meta(capsys):
|
||||
assert capa.main.main([path, "-j"]) == 0
|
||||
std = capsys.readouterr()
|
||||
std_json = json.loads(std.out)
|
||||
# remember: json can't have integer keys :-(
|
||||
assert str(0x10001010) in std_json["meta"]["analysis"]["layout"]["functions"]
|
||||
assert 0x10001179 in std_json["meta"]["analysis"]["layout"]["functions"][str(0x10001010)]["matched_basic_blocks"]
|
||||
|
||||
assert {"type": "absolute", "value": 0x10001010} in list(
|
||||
map(lambda f: f["address"], std_json["meta"]["analysis"]["layout"]["functions"])
|
||||
)
|
||||
|
||||
for addr, info in std_json["meta"]["analysis"]["layout"]["functions"]:
|
||||
if addr == ["absolute", 0x10001010]:
|
||||
assert {"address": ["absolute", 0x10001179]} in info["matched_basic_blocks"]
|
||||
|
||||
|
||||
def test_main_dotnet(_1c444_dotnetfile_extractor):
|
||||
# tests rules can be loaded successfully and all output modes
|
||||
path = _1c444_dotnetfile_extractor.path
|
||||
assert capa.main.main([path, "-vv"]) == 0
|
||||
assert capa.main.main([path, "-v"]) == 0
|
||||
assert capa.main.main([path, "-j"]) == 0
|
||||
assert capa.main.main([path, "-q"]) == 0
|
||||
assert capa.main.main([path]) == 0
|
||||
|
||||
|
||||
def test_main_dotnet2(_692f_dotnetfile_extractor):
|
||||
# tests rules can be loaded successfully and all output modes
|
||||
path = _692f_dotnetfile_extractor.path
|
||||
assert capa.main.main([path, "-vv"]) == 0
|
||||
assert capa.main.main([path, "-v"]) == 0
|
||||
assert capa.main.main([path, "-j"]) == 0
|
||||
assert capa.main.main([path, "-q"]) == 0
|
||||
assert capa.main.main([path]) == 0
|
||||
|
||||
+19
-13
@@ -28,6 +28,9 @@ def test_render_meta_attack():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
authors:
|
||||
- foo
|
||||
att&ck:
|
||||
- {:s}
|
||||
features:
|
||||
@@ -37,13 +40,13 @@ def test_render_meta_attack():
|
||||
)
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
rule_meta = capa.render.result_document.convert_meta_to_result_document(r.meta)
|
||||
attack = rule_meta["att&ck"][0]
|
||||
rule_meta = capa.render.result_document.RuleMetadata.from_capa(r)
|
||||
attack = rule_meta.attack[0]
|
||||
|
||||
assert attack["id"] == id
|
||||
assert attack["tactic"] == tactic
|
||||
assert attack["technique"] == technique
|
||||
assert attack["subtechnique"] == subtechnique
|
||||
assert attack.id == id
|
||||
assert attack.tactic == tactic
|
||||
assert attack.technique == technique
|
||||
assert attack.subtechnique == subtechnique
|
||||
|
||||
assert capa.render.utils.format_parts_id(attack) == canonical
|
||||
|
||||
@@ -61,6 +64,9 @@ def test_render_meta_mbc():
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: function
|
||||
authors:
|
||||
- foo
|
||||
mbc:
|
||||
- {:s}
|
||||
features:
|
||||
@@ -70,12 +76,12 @@ def test_render_meta_mbc():
|
||||
)
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
rule_meta = capa.render.result_document.convert_meta_to_result_document(r.meta)
|
||||
attack = rule_meta["mbc"][0]
|
||||
rule_meta = capa.render.result_document.RuleMetadata.from_capa(r)
|
||||
mbc = rule_meta.mbc[0]
|
||||
|
||||
assert attack["id"] == id
|
||||
assert attack["objective"] == objective
|
||||
assert attack["behavior"] == behavior
|
||||
assert attack["method"] == method
|
||||
assert mbc.id == id
|
||||
assert mbc.objective == objective
|
||||
assert mbc.behavior == behavior
|
||||
assert mbc.method == method
|
||||
|
||||
assert capa.render.utils.format_parts_id(attack) == canonical
|
||||
assert capa.render.utils.format_parts_id(mbc) == canonical
|
||||
|
||||
Reference in New Issue
Block a user