*: remove /x32 and /x64 flavors from number and offset features

This commit is contained in:
Willi Ballenthin
2022-04-05 10:35:41 -06:00
parent f246a01484
commit ecabd557a7
9 changed files with 15 additions and 202 deletions

View File

@@ -98,33 +98,23 @@ class Result:
class Feature(abc.ABC): class Feature(abc.ABC):
def __init__(self, value: Union[str, int, bytes], bitness=None, description=None): def __init__(self, value: Union[str, int, bytes], description=None):
""" """
Args: Args:
value (any): the value of the feature, such as the number or string. value (any): the value of the feature, such as the number or string.
bitness (str): one of the VALID_BITNESS values, or None.
When None, then the feature applies to any bitness.
Modifies the feature name from `feature` to `feature/bitness`, like `offset/x32`.
description (str): a human-readable description that explains the feature value. description (str): a human-readable description that explains the feature value.
""" """
super(Feature, self).__init__() super(Feature, self).__init__()
self.name = self.__class__.__name__.lower()
if bitness is not None:
if bitness not in VALID_BITNESS:
raise ValueError("bitness '%s' must be one of %s" % (bitness, VALID_BITNESS))
self.name = self.__class__.__name__.lower() + "/" + bitness
else:
self.name = self.__class__.__name__.lower()
self.value = value self.value = value
self.bitness = bitness
self.description = description self.description = description
def __hash__(self): def __hash__(self):
return hash((self.name, self.value, self.bitness)) return hash((self.name, self.value))
def __eq__(self, other): def __eq__(self, other):
return self.name == other.name and self.value == other.value and self.bitness == other.bitness return self.name == other.name and self.value == other.value
def get_value_str(self) -> str: def get_value_str(self) -> str:
""" """
@@ -153,10 +143,7 @@ class Feature(abc.ABC):
return Result(self in ctx, self, [], locations=ctx.get(self, [])) return Result(self in ctx, self, [], locations=ctx.get(self, []))
def freeze_serialize(self): def freeze_serialize(self):
if self.bitness is not None: return (self.__class__.__name__, [self.value])
return (self.__class__.__name__, [self.value, {"bitness": self.bitness}])
else:
return (self.__class__.__name__, [self.value])
@classmethod @classmethod
def freeze_deserialize(cls, args): def freeze_deserialize(cls, args):
@@ -400,13 +387,6 @@ class Bytes(Feature):
return cls(*[codecs.decode(x, "hex") for x in args]) return cls(*[codecs.decode(x, "hex") for x in args])
# identifiers for supported bitness names that tweak a feature
# for example, offset/x32
BITNESS_X32 = "x32"
BITNESS_X64 = "x64"
VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types # other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
ARCH_I386 = "i386" ARCH_I386 = "i386"
ARCH_AMD64 = "amd64" ARCH_AMD64 = "amd64"

View File

@@ -13,39 +13,13 @@ import idautils
import capa.features.extractors.helpers import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers import capa.features.extractors.ida.helpers
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import ( from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
# security cookie checks may perform non-zeroing XORs, these are expected within a certain # security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features # byte range within the first and returning basic blocks, this helps to reduce FP features
SECURITY_COOKIE_BYTES_DELTA = 0x40 SECURITY_COOKIE_BYTES_DELTA = 0x40
def get_bitness(ctx):
"""
fetch the BITNESS_* constant for the currently open workspace.
via Tamir Bahar/@tmr232
https://reverseengineering.stackexchange.com/a/11398/17194
"""
if "bitness" not in ctx:
info = idaapi.get_inf_structure()
if info.is_64bit():
ctx["bitness"] = BITNESS_X64
elif info.is_32bit():
ctx["bitness"] = BITNESS_X32
else:
raise ValueError("unexpected bitness")
return ctx["bitness"]
def get_imports(ctx): def get_imports(ctx):
if "imports_cache" not in ctx: if "imports_cache" not in ctx:
ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports() ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports()
@@ -159,7 +133,6 @@ def extract_insn_number_features(f, bb, insn):
const = op.addr const = op.addr
yield Number(const), insn.ea yield Number(const), insn.ea
yield Number(const, bitness=get_bitness(f.ctx)), insn.ea
yield OperandNumber(i, const), insn.ea yield OperandNumber(i, const), insn.ea
@@ -234,7 +207,6 @@ def extract_insn_offset_features(f, bb, insn):
op_off = capa.features.extractors.helpers.twos_complement(op_off, 32) op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)
yield Offset(op_off), insn.ea yield Offset(op_off), insn.ea
yield Offset(op_off, bitness=get_bitness(f.ctx)), insn.ea
yield OperandOffset(i, op_off), insn.ea yield OperandOffset(i, op_off), insn.ea

View File

@@ -6,15 +6,7 @@ from smda.common.SmdaReport import SmdaReport
import capa.features.extractors.helpers import capa.features.extractors.helpers
from capa.features.insn import API, Number, Offset, Mnemonic from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.common import ( from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
# security cookie checks may perform non-zeroing XORs, these are expected within a certain # security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features # byte range within the first and returning basic blocks, this helps to reduce FP features
@@ -23,16 +15,6 @@ PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])") PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
def get_bitness(smda_report):
if smda_report.architecture == "intel":
if smda_report.bitness == 32:
return BITNESS_X32
elif smda_report.bitness == 64:
return BITNESS_X64
else:
raise NotImplementedError
def extract_insn_api_features(f, bb, insn): def extract_insn_api_features(f, bb, insn):
"""parse API features from the given instruction.""" """parse API features from the given instruction."""
if insn.offset in f.apirefs: if insn.offset in f.apirefs:
@@ -89,7 +71,6 @@ def extract_insn_number_features(f, bb, insn):
value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1) value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1)
yield Number(value), insn.offset yield Number(value), insn.offset
yield Number(value, bitness=get_bitness(f.smda_report)), insn.offset
except: except:
continue continue
@@ -232,7 +213,6 @@ def extract_insn_offset_features(f, bb, insn):
number = int(number_int.group("num")) number = int(number_int.group("num"))
number = -1 * number if number_int.group().startswith("-") else number number = -1 * number if number_int.group().startswith("-") else number
yield Offset(number), insn.offset yield Offset(number), insn.offset
yield Offset(number, bitness=get_bitness(f.smda_report)), insn.offset
def is_security_cookie(f, bb, insn): def is_security_cookie(f, bb, insn):

View File

@@ -18,15 +18,7 @@ import envi.archs.amd64.disasm
import capa.features.extractors.helpers import capa.features.extractors.helpers
import capa.features.extractors.viv.helpers import capa.features.extractors.viv.helpers
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import ( from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
# security cookie checks may perform non-zeroing XORs, these are expected within a certain # security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -34,14 +26,6 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i
SECURITY_COOKIE_BYTES_DELTA = 0x40 SECURITY_COOKIE_BYTES_DELTA = 0x40
def get_bitness(vw):
bitness = vw.getMeta("Architecture")
if bitness == "i386":
return BITNESS_X32
elif bitness == "amd64":
return BITNESS_X64
def interface_extract_instruction_XXX(f, bb, insn): def interface_extract_instruction_XXX(f, bb, insn):
""" """
parse features from the given instruction. parse features from the given instruction.
@@ -553,7 +537,6 @@ def extract_op_number_features(f, bb, insn, i, oper):
return return
yield Number(v), insn.va yield Number(v), insn.va
yield Number(v, bitness=get_bitness(f.vw)), insn.va
yield OperandNumber(i, v), insn.va yield OperandNumber(i, v), insn.va
@@ -582,7 +565,6 @@ def extract_op_offset_features(f, bb, insn, i, oper):
v = oper.disp v = oper.disp
yield Offset(v), insn.va yield Offset(v), insn.va
yield Offset(v, bitness=get_bitness(f.vw)), insn.va
yield OperandOffset(i, v), insn.va yield OperandOffset(i, v), insn.va
# like: [esi + ecx + 16384] # like: [esi + ecx + 16384]
@@ -594,7 +576,6 @@ def extract_op_offset_features(f, bb, insn, i, oper):
v = oper.disp v = oper.disp
yield Offset(v), insn.va yield Offset(v), insn.va
yield Offset(v, bitness=get_bitness(f.vw)), insn.va
yield OperandOffset(i, v), insn.va yield OperandOffset(i, v), insn.va

View File

@@ -22,16 +22,16 @@ class API(Feature):
class Number(Feature): class Number(Feature):
def __init__(self, value: int, bitness=None, description=None): def __init__(self, value: int, description=None):
super(Number, self).__init__(value, bitness=bitness, description=description) super(Number, self).__init__(value, description=description)
def get_value_str(self): def get_value_str(self):
return capa.render.utils.hex(self.value) return capa.render.utils.hex(self.value)
class Offset(Feature): class Offset(Feature):
def __init__(self, value: int, bitness=None, description=None): def __init__(self, value: int, description=None):
super(Offset, self).__init__(value, bitness=bitness, description=description) super(Offset, self).__init__(value, description=description)
def get_value_str(self): def get_value_str(self):
return capa.render.utils.hex(self.value) return capa.render.utils.hex(self.value)
@@ -53,7 +53,7 @@ class _Operand(Feature, abc.ABC):
self.index = index self.index = index
def __hash__(self): def __hash__(self):
return hash((self.name, self.value, self.bitness)) return hash((self.name, self.value))
def __eq__(self, other): def __eq__(self, other):
return super().__eq__(other) and self.index == other.index return super().__eq__(other) and self.index == other.index

View File

@@ -257,20 +257,8 @@ def parse_feature(key: str):
return capa.features.common.Bytes return capa.features.common.Bytes
elif key == "number": elif key == "number":
return capa.features.insn.Number return capa.features.insn.Number
elif key.startswith("number/"):
bitness = key.partition("/")[2]
# the other handlers here return constructors for features,
# and we want to as well,
# however, we need to preconfigure one of the arguments (`bitness`).
# so, instead we return a partially-applied function that
# provides `bitness` to the feature constructor.
# it forwards any other arguments provided to the closure along to the constructor.
return functools.partial(capa.features.insn.Number, bitness=bitness)
elif key == "offset": elif key == "offset":
return capa.features.insn.Offset return capa.features.insn.Offset
elif key.startswith("offset/"):
bitness = key.partition("/")[2]
return functools.partial(capa.features.insn.Offset, bitness=bitness)
elif key == "mnemonic": elif key == "mnemonic":
return capa.features.insn.Mnemonic return capa.features.insn.Mnemonic
elif key == "basic blocks": elif key == "basic blocks":

View File

@@ -43,7 +43,7 @@ import capa.rules
import capa.engine import capa.engine
import capa.features import capa.features
import capa.features.insn import capa.features.insn
from capa.features.common import BITNESS_X32, BITNESS_X64, String from capa.features.common import String
logger = logging.getLogger("capa2yara") logger = logging.getLogger("capa2yara")

View File

@@ -22,19 +22,7 @@ import capa.features.file
import capa.features.insn import capa.features.insn
import capa.features.common import capa.features.common
import capa.features.basicblock import capa.features.basicblock
from capa.features.common import ( from capa.features.common import OS, OS_LINUX, ARCH_I386, FORMAT_PE, ARCH_AMD64, FORMAT_ELF, OS_WINDOWS, Arch, Format
OS,
OS_LINUX,
ARCH_I386,
FORMAT_PE,
ARCH_AMD64,
FORMAT_ELF,
OS_WINDOWS,
BITNESS_X32,
BITNESS_X64,
Arch,
Format,
)
CD = os.path.dirname(__file__) CD = os.path.dirname(__file__)
@@ -431,10 +419,6 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/number: stack adjustments # insn/number: stack adjustments
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False),
# insn/number: bitness flavors
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X64), False),
# insn/number: negative # insn/number: negative
("mimikatz", "function=0x401553", capa.features.insn.Number(0xFFFFFFFF), True), ("mimikatz", "function=0x401553", capa.features.insn.Number(0xFFFFFFFF), True),
("mimikatz", "function=0x43e543", capa.features.insn.Number(0xFFFFFFF0), True), ("mimikatz", "function=0x43e543", capa.features.insn.Number(0xFFFFFFF0), True),
@@ -450,10 +434,6 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/offset: negative # insn/offset: negative
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True), ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True),
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True), ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True),
# insn/offset: bitness flavors
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X64), False),
# insn/api # insn/api
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True), ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True), ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),

View File

@@ -23,8 +23,6 @@ from capa.features.common import (
ARCH_AMD64, ARCH_AMD64,
FORMAT_ELF, FORMAT_ELF,
OS_WINDOWS, OS_WINDOWS,
BITNESS_X32,
BITNESS_X64,
Arch, Arch,
Format, Format,
String, String,
@@ -531,39 +529,6 @@ def test_invalid_number():
) )
def test_number_bitness():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- number/x32: 2
"""
)
)
assert r.evaluate({Number(2, bitness=BITNESS_X32): {1}}) == True
assert r.evaluate({Number(2): {1}}) == False
assert r.evaluate({Number(2, bitness=BITNESS_X64): {1}}) == False
def test_number_bitness_symbol():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- number/x32: 2 = some constant
"""
)
)
assert r.evaluate({Number(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
def test_offset_symbol(): def test_offset_symbol():
rule = textwrap.dedent( rule = textwrap.dedent(
""" """
@@ -609,39 +574,6 @@ def test_count_offset_symbol():
assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True
def test_offset_bitness():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- offset/x32: 2
"""
)
)
assert r.evaluate({Offset(2, bitness=BITNESS_X32): {1}}) == True
assert r.evaluate({Offset(2): {1}}) == False
assert r.evaluate({Offset(2, bitness=BITNESS_X64): {1}}) == False
def test_offset_bitness_symbol():
r = capa.rules.Rule.from_yaml(
textwrap.dedent(
"""
rule:
meta:
name: test rule
features:
- offset/x32: 2 = some constant
"""
)
)
assert r.evaluate({Offset(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
def test_invalid_offset(): def test_invalid_offset():
with pytest.raises(capa.rules.InvalidRule): with pytest.raises(capa.rules.InvalidRule):
r = capa.rules.Rule.from_yaml( r = capa.rules.Rule.from_yaml(