*: remove /x32 and /x64 flavors from number and offset features

This commit is contained in:
Willi Ballenthin
2022-04-05 10:35:41 -06:00
parent f246a01484
commit ecabd557a7
9 changed files with 15 additions and 202 deletions

View File

@@ -98,33 +98,23 @@ class Result:
class Feature(abc.ABC):
def __init__(self, value: Union[str, int, bytes], bitness=None, description=None):
def __init__(self, value: Union[str, int, bytes], description=None):
"""
Args:
value (any): the value of the feature, such as the number or string.
bitness (str): one of the VALID_BITNESS values, or None.
When None, then the feature applies to any bitness.
Modifies the feature name from `feature` to `feature/bitness`, like `offset/x32`.
description (str): a human-readable description that explains the feature value.
"""
super(Feature, self).__init__()
if bitness is not None:
if bitness not in VALID_BITNESS:
raise ValueError("bitness '%s' must be one of %s" % (bitness, VALID_BITNESS))
self.name = self.__class__.__name__.lower() + "/" + bitness
else:
self.name = self.__class__.__name__.lower()
self.name = self.__class__.__name__.lower()
self.value = value
self.bitness = bitness
self.description = description
def __hash__(self):
return hash((self.name, self.value, self.bitness))
return hash((self.name, self.value))
def __eq__(self, other):
return self.name == other.name and self.value == other.value and self.bitness == other.bitness
return self.name == other.name and self.value == other.value
def get_value_str(self) -> str:
"""
@@ -153,10 +143,7 @@ class Feature(abc.ABC):
return Result(self in ctx, self, [], locations=ctx.get(self, []))
def freeze_serialize(self):
if self.bitness is not None:
return (self.__class__.__name__, [self.value, {"bitness": self.bitness}])
else:
return (self.__class__.__name__, [self.value])
return (self.__class__.__name__, [self.value])
@classmethod
def freeze_deserialize(cls, args):
@@ -400,13 +387,6 @@ class Bytes(Feature):
return cls(*[codecs.decode(x, "hex") for x in args])
# identifiers for supported bitness names that tweak a feature
# for example, offset/x32
BITNESS_X32 = "x32"
BITNESS_X64 = "x64"
VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
ARCH_I386 = "i386"
ARCH_AMD64 = "amd64"

View File

@@ -13,39 +13,13 @@ import idautils
import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import (
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features
SECURITY_COOKIE_BYTES_DELTA = 0x40
def get_bitness(ctx):
"""
fetch the BITNESS_* constant for the currently open workspace.
via Tamir Bahar/@tmr232
https://reverseengineering.stackexchange.com/a/11398/17194
"""
if "bitness" not in ctx:
info = idaapi.get_inf_structure()
if info.is_64bit():
ctx["bitness"] = BITNESS_X64
elif info.is_32bit():
ctx["bitness"] = BITNESS_X32
else:
raise ValueError("unexpected bitness")
return ctx["bitness"]
def get_imports(ctx):
if "imports_cache" not in ctx:
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports()
@@ -159,7 +133,6 @@ def extract_insn_number_features(f, bb, insn):
const = op.addr
yield Number(const), insn.ea
yield Number(const, bitness=get_bitness(f.ctx)), insn.ea
yield OperandNumber(i, const), insn.ea
@@ -234,7 +207,6 @@ def extract_insn_offset_features(f, bb, insn):
op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)
yield Offset(op_off), insn.ea
yield Offset(op_off, bitness=get_bitness(f.ctx)), insn.ea
yield OperandOffset(i, op_off), insn.ea

View File

@@ -6,15 +6,7 @@ from smda.common.SmdaReport import SmdaReport
import capa.features.extractors.helpers
from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.common import (
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features
@@ -23,16 +15,6 @@ PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
def get_bitness(smda_report):
if smda_report.architecture == "intel":
if smda_report.bitness == 32:
return BITNESS_X32
elif smda_report.bitness == 64:
return BITNESS_X64
else:
raise NotImplementedError
def extract_insn_api_features(f, bb, insn):
"""parse API features from the given instruction."""
if insn.offset in f.apirefs:
@@ -89,7 +71,6 @@ def extract_insn_number_features(f, bb, insn):
value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1)
yield Number(value), insn.offset
yield Number(value, bitness=get_bitness(f.smda_report)), insn.offset
except:
continue
@@ -232,7 +213,6 @@ def extract_insn_offset_features(f, bb, insn):
number = int(number_int.group("num"))
number = -1 * number if number_int.group().startswith("-") else number
yield Offset(number), insn.offset
yield Offset(number, bitness=get_bitness(f.smda_report)), insn.offset
def is_security_cookie(f, bb, insn):

View File

@@ -18,15 +18,7 @@ import envi.archs.amd64.disasm
import capa.features.extractors.helpers
import capa.features.extractors.viv.helpers
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import (
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -34,14 +26,6 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i
SECURITY_COOKIE_BYTES_DELTA = 0x40
def get_bitness(vw):
bitness = vw.getMeta("Architecture")
if bitness == "i386":
return BITNESS_X32
elif bitness == "amd64":
return BITNESS_X64
def interface_extract_instruction_XXX(f, bb, insn):
"""
parse features from the given instruction.
@@ -553,7 +537,6 @@ def extract_op_number_features(f, bb, insn, i, oper):
return
yield Number(v), insn.va
yield Number(v, bitness=get_bitness(f.vw)), insn.va
yield OperandNumber(i, v), insn.va
@@ -582,7 +565,6 @@ def extract_op_offset_features(f, bb, insn, i, oper):
v = oper.disp
yield Offset(v), insn.va
yield Offset(v, bitness=get_bitness(f.vw)), insn.va
yield OperandOffset(i, v), insn.va
# like: [esi + ecx + 16384]
@@ -594,7 +576,6 @@ def extract_op_offset_features(f, bb, insn, i, oper):
v = oper.disp
yield Offset(v), insn.va
yield Offset(v, bitness=get_bitness(f.vw)), insn.va
yield OperandOffset(i, v), insn.va

View File

@@ -22,16 +22,16 @@ class API(Feature):
class Number(Feature):
def __init__(self, value: int, bitness=None, description=None):
super(Number, self).__init__(value, bitness=bitness, description=description)
def __init__(self, value: int, description=None):
super(Number, self).__init__(value, description=description)
def get_value_str(self):
return capa.render.utils.hex(self.value)
class Offset(Feature):
def __init__(self, value: int, bitness=None, description=None):
super(Offset, self).__init__(value, bitness=bitness, description=description)
def __init__(self, value: int, description=None):
super(Offset, self).__init__(value, description=description)
def get_value_str(self):
return capa.render.utils.hex(self.value)
@@ -53,7 +53,7 @@ class _Operand(Feature, abc.ABC):
self.index = index
def __hash__(self):
return hash((self.name, self.value, self.bitness))
return hash((self.name, self.value))
def __eq__(self, other):
return super().__eq__(other) and self.index == other.index

View File

@@ -257,20 +257,8 @@ def parse_feature(key: str):
return capa.features.common.Bytes
elif key == "number":
return capa.features.insn.Number
elif key.startswith("number/"):
bitness = key.partition("/")[2]
# the other handlers here return constructors for features,
# and we want to as well,
# however, we need to preconfigure one of the arguments (`bitness`).
# so, instead we return a partially-applied function that
# provides `bitness` to the feature constructor.
# it forwards any other arguments provided to the closure along to the constructor.
return functools.partial(capa.features.insn.Number, bitness=bitness)
elif key == "offset":
return capa.features.insn.Offset
elif key.startswith("offset/"):
bitness = key.partition("/")[2]
return functools.partial(capa.features.insn.Offset, bitness=bitness)
elif key == "mnemonic":
return capa.features.insn.Mnemonic
elif key == "basic blocks":

View File

@@ -43,7 +43,7 @@ import capa.rules
import capa.engine
import capa.features
import capa.features.insn
from capa.features.common import BITNESS_X32, BITNESS_X64, String
from capa.features.common import String
logger = logging.getLogger("capa2yara")

View File

@@ -22,19 +22,7 @@ import capa.features.file
import capa.features.insn
import capa.features.common
import capa.features.basicblock
from capa.features.common import (
OS,
OS_LINUX,
ARCH_I386,
FORMAT_PE,
ARCH_AMD64,
FORMAT_ELF,
OS_WINDOWS,
BITNESS_X32,
BITNESS_X64,
Arch,
Format,
)
from capa.features.common import OS, OS_LINUX, ARCH_I386, FORMAT_PE, ARCH_AMD64, FORMAT_ELF, OS_WINDOWS, Arch, Format
CD = os.path.dirname(__file__)
@@ -431,10 +419,6 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/number: stack adjustments
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False),
# insn/number: bitness flavors
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X64), False),
# insn/number: negative
("mimikatz", "function=0x401553", capa.features.insn.Number(0xFFFFFFFF), True),
("mimikatz", "function=0x43e543", capa.features.insn.Number(0xFFFFFFF0), True),
@@ -450,10 +434,6 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/offset: negative
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True),
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True),
# insn/offset: bitness flavors
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X64), False),
# insn/api
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),

View File

@@ -23,8 +23,6 @@ from capa.features.common import (
ARCH_AMD64,
FORMAT_ELF,
OS_WINDOWS,
BITNESS_X32,
BITNESS_X64,
Arch,
Format,
String,
@@ -531,39 +529,6 @@ def test_invalid_number():
)
def test_number_bitness():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- number/x32: 2
"""
)
)
assert r.evaluate({Number(2, bitness=BITNESS_X32): {1}}) == True
assert r.evaluate({Number(2): {1}}) == False
assert r.evaluate({Number(2, bitness=BITNESS_X64): {1}}) == False
def test_number_bitness_symbol():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- number/x32: 2 = some constant
"""
)
)
assert r.evaluate({Number(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
def test_offset_symbol():
rule = textwrap.dedent(
"""
@@ -609,39 +574,6 @@ def test_count_offset_symbol():
assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True
def test_offset_bitness():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- offset/x32: 2
"""
)
)
assert r.evaluate({Offset(2, bitness=BITNESS_X32): {1}}) == True
assert r.evaluate({Offset(2): {1}}) == False
assert r.evaluate({Offset(2, bitness=BITNESS_X64): {1}}) == False
def test_offset_bitness_symbol():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- offset/x32: 2 = some constant
"""
)
)
assert r.evaluate({Offset(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
def test_invalid_offset():
with pytest.raises(capa.rules.InvalidRule):
r = capa.rules.Rule.from_yaml(