diff --git a/capa/features/common.py b/capa/features/common.py index 7f25e8d4..b86f86e4 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -98,33 +98,23 @@ class Result: class Feature(abc.ABC): - def __init__(self, value: Union[str, int, bytes], bitness=None, description=None): + def __init__(self, value: Union[str, int, bytes], description=None): """ Args: value (any): the value of the feature, such as the number or string. - bitness (str): one of the VALID_BITNESS values, or None. - When None, then the feature applies to any bitness. - Modifies the feature name from `feature` to `feature/bitness`, like `offset/x32`. description (str): a human-readable description that explains the feature value. """ super(Feature, self).__init__() - - if bitness is not None: - if bitness not in VALID_BITNESS: - raise ValueError("bitness '%s' must be one of %s" % (bitness, VALID_BITNESS)) - self.name = self.__class__.__name__.lower() + "/" + bitness - else: - self.name = self.__class__.__name__.lower() + self.name = self.__class__.__name__.lower() self.value = value - self.bitness = bitness self.description = description def __hash__(self): - return hash((self.name, self.value, self.bitness)) + return hash((self.name, self.value)) def __eq__(self, other): - return self.name == other.name and self.value == other.value and self.bitness == other.bitness + return self.name == other.name and self.value == other.value def get_value_str(self) -> str: """ @@ -153,10 +143,7 @@ class Feature(abc.ABC): return Result(self in ctx, self, [], locations=ctx.get(self, [])) def freeze_serialize(self): - if self.bitness is not None: - return (self.__class__.__name__, [self.value, {"bitness": self.bitness}]) - else: - return (self.__class__.__name__, [self.value]) + return (self.__class__.__name__, [self.value]) @classmethod def freeze_deserialize(cls, args): @@ -400,13 +387,6 @@ class Bytes(Feature): return cls(*[codecs.decode(x, "hex") for x in args]) -# identifiers for supported bitness names that tweak a feature -# for example, offset/x32 -BITNESS_X32 = "x32" -BITNESS_X64 = "x64" -VALID_BITNESS = (BITNESS_X32, BITNESS_X64) - - # other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types ARCH_I386 = "i386" ARCH_AMD64 = "amd64" diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 306bb954..15942c78 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -13,39 +13,13 @@ import idautils import capa.features.extractors.helpers import capa.features.extractors.ida.helpers from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset -from capa.features.common import ( - BITNESS_X32, - BITNESS_X64, - MAX_BYTES_FEATURE_SIZE, - THUNK_CHAIN_DEPTH_DELTA, - Bytes, - String, - Characteristic, -) +from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic # security cookie checks may perform non-zeroing XORs, these are expected within a certain # byte range within the first and returning basic blocks, this helps to reduce FP features SECURITY_COOKIE_BYTES_DELTA = 0x40 -def get_bitness(ctx): - """ - fetch the BITNESS_* constant for the currently open workspace. - - via Tamir Bahar/@tmr232 - https://reverseengineering.stackexchange.com/a/11398/17194 - """ - if "bitness" not in ctx: - info = idaapi.get_inf_structure() - if info.is_64bit(): - ctx["bitness"] = BITNESS_X64 - elif info.is_32bit(): - ctx["bitness"] = BITNESS_X32 - else: - raise ValueError("unexpected bitness") - return ctx["bitness"] - - def get_imports(ctx): if "imports_cache" not in ctx: ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports() @@ -159,7 +133,6 @@ def extract_insn_number_features(f, bb, insn): const = op.addr yield Number(const), insn.ea - yield Number(const, bitness=get_bitness(f.ctx)), insn.ea yield OperandNumber(i, const), insn.ea @@ -234,7 +207,6 @@ def extract_insn_offset_features(f, bb, insn): op_off = capa.features.extractors.helpers.twos_complement(op_off, 32) yield Offset(op_off), insn.ea - yield Offset(op_off, bitness=get_bitness(f.ctx)), insn.ea yield OperandOffset(i, op_off), insn.ea diff --git a/capa/features/extractors/smda/insn.py b/capa/features/extractors/smda/insn.py index e4b92132..309389e8 100644 --- a/capa/features/extractors/smda/insn.py +++ b/capa/features/extractors/smda/insn.py @@ -6,15 +6,7 @@ from smda.common.SmdaReport import SmdaReport import capa.features.extractors.helpers from capa.features.insn import API, Number, Offset, Mnemonic -from capa.features.common import ( - BITNESS_X32, - BITNESS_X64, - MAX_BYTES_FEATURE_SIZE, - THUNK_CHAIN_DEPTH_DELTA, - Bytes, - String, - Characteristic, -) +from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic # security cookie checks may perform non-zeroing XORs, these are expected within a certain # byte range within the first and returning basic blocks, this helps to reduce FP features @@ -23,16 +15,6 @@ PATTERN_HEXNUM = re.compile(r"[+\-] (?P0x[a-fA-F0-9]+)") PATTERN_SINGLENUM = re.compile(r"[+\-] (?P[0-9])") -def get_bitness(smda_report): - if smda_report.architecture == "intel": - if smda_report.bitness == 32: - return BITNESS_X32 - elif smda_report.bitness == 64: - return BITNESS_X64 - else: - raise NotImplementedError - - def extract_insn_api_features(f, bb, insn): """parse API features from the given instruction.""" if insn.offset in f.apirefs: @@ -89,7 +71,6 @@ def extract_insn_number_features(f, bb, insn): value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1) yield Number(value), insn.offset - yield Number(value, bitness=get_bitness(f.smda_report)), insn.offset except: continue @@ -232,7 +213,6 @@ def extract_insn_offset_features(f, bb, insn): number = int(number_int.group("num")) number = -1 * number if number_int.group().startswith("-") else number yield Offset(number), insn.offset - yield Offset(number, bitness=get_bitness(f.smda_report)), insn.offset def is_security_cookie(f, bb, insn): diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 3ae065db..e721606f 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -18,15 +18,7 @@ import envi.archs.amd64.disasm import capa.features.extractors.helpers import capa.features.extractors.viv.helpers from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset -from capa.features.common import ( - BITNESS_X32, - BITNESS_X64, - MAX_BYTES_FEATURE_SIZE, - THUNK_CHAIN_DEPTH_DELTA, - Bytes, - String, - Characteristic, -) +from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call # security cookie checks may perform non-zeroing XORs, these are expected within a certain @@ -34,14 +26,6 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i SECURITY_COOKIE_BYTES_DELTA = 0x40 -def get_bitness(vw): - bitness = vw.getMeta("Architecture") - if bitness == "i386": - return BITNESS_X32 - elif bitness == "amd64": - return BITNESS_X64 - - def interface_extract_instruction_XXX(f, bb, insn): """ parse features from the given instruction. @@ -553,7 +537,6 @@ def extract_op_number_features(f, bb, insn, i, oper): return yield Number(v), insn.va - yield Number(v, bitness=get_bitness(f.vw)), insn.va yield OperandNumber(i, v), insn.va @@ -582,7 +565,6 @@ def extract_op_offset_features(f, bb, insn, i, oper): v = oper.disp yield Offset(v), insn.va - yield Offset(v, bitness=get_bitness(f.vw)), insn.va yield OperandOffset(i, v), insn.va # like: [esi + ecx + 16384] @@ -594,7 +576,6 @@ def extract_op_offset_features(f, bb, insn, i, oper): v = oper.disp yield Offset(v), insn.va - yield Offset(v, bitness=get_bitness(f.vw)), insn.va yield OperandOffset(i, v), insn.va diff --git a/capa/features/insn.py b/capa/features/insn.py index 957450a3..029e8fbe 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -22,16 +22,16 @@ class API(Feature): class Number(Feature): - def __init__(self, value: int, bitness=None, description=None): - super(Number, self).__init__(value, bitness=bitness, description=description) + def __init__(self, value: int, description=None): + super(Number, self).__init__(value, description=description) def get_value_str(self): return capa.render.utils.hex(self.value) class Offset(Feature): - def __init__(self, value: int, bitness=None, description=None): - super(Offset, self).__init__(value, bitness=bitness, description=description) + def __init__(self, value: int, description=None): + super(Offset, self).__init__(value, description=description) def get_value_str(self): return capa.render.utils.hex(self.value) @@ -53,7 +53,7 @@ class _Operand(Feature, abc.ABC): self.index = index def __hash__(self): - return hash((self.name, self.value, self.bitness)) + return hash((self.name, self.value)) def __eq__(self, other): return super().__eq__(other) and self.index == other.index diff --git a/capa/rules.py b/capa/rules.py index d776df4e..f42b56a4 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -257,20 +257,8 @@ def parse_feature(key: str): return capa.features.common.Bytes elif key == "number": return capa.features.insn.Number - elif key.startswith("number/"): - bitness = key.partition("/")[2] - # the other handlers here return constructors for features, - # and we want to as well, - # however, we need to preconfigure one of the arguments (`bitness`). - # so, instead we return a partially-applied function that - # provides `bitness` to the feature constructor. - # it forwards any other arguments provided to the closure along to the constructor. - return functools.partial(capa.features.insn.Number, bitness=bitness) elif key == "offset": return capa.features.insn.Offset - elif key.startswith("offset/"): - bitness = key.partition("/")[2] - return functools.partial(capa.features.insn.Offset, bitness=bitness) elif key == "mnemonic": return capa.features.insn.Mnemonic elif key == "basic blocks": diff --git a/scripts/capa2yara.py b/scripts/capa2yara.py index bdc4521a..a4662f70 100644 --- a/scripts/capa2yara.py +++ b/scripts/capa2yara.py @@ -43,7 +43,7 @@ import capa.rules import capa.engine import capa.features import capa.features.insn -from capa.features.common import BITNESS_X32, BITNESS_X64, String +from capa.features.common import String logger = logging.getLogger("capa2yara") diff --git a/tests/fixtures.py b/tests/fixtures.py index 8630bccf..105bef9b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -22,19 +22,7 @@ import capa.features.file import capa.features.insn import capa.features.common import capa.features.basicblock -from capa.features.common import ( - OS, - OS_LINUX, - ARCH_I386, - FORMAT_PE, - ARCH_AMD64, - FORMAT_ELF, - OS_WINDOWS, - BITNESS_X32, - BITNESS_X64, - Arch, - Format, -) +from capa.features.common import OS, OS_LINUX, ARCH_I386, FORMAT_PE, ARCH_AMD64, FORMAT_ELF, OS_WINDOWS, Arch, Format CD = os.path.dirname(__file__) @@ -431,10 +419,6 @@ FEATURE_PRESENCE_TESTS = sorted( # insn/number: stack adjustments ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False), - # insn/number: bitness flavors - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X32), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X64), False), # insn/number: negative ("mimikatz", "function=0x401553", capa.features.insn.Number(0xFFFFFFFF), True), ("mimikatz", "function=0x43e543", capa.features.insn.Number(0xFFFFFFF0), True), @@ -450,10 +434,6 @@ FEATURE_PRESENCE_TESTS = sorted( # insn/offset: negative ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True), ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True), - # insn/offset: bitness flavors - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X32), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X64), False), # insn/api ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True), ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True), diff --git a/tests/test_rules.py b/tests/test_rules.py index 90b3b02a..93cc8be6 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -23,8 +23,6 @@ from capa.features.common import ( ARCH_AMD64, FORMAT_ELF, OS_WINDOWS, - BITNESS_X32, - BITNESS_X64, Arch, Format, String, @@ -531,39 +529,6 @@ def test_invalid_number(): ) -def test_number_bitness(): - r = capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: test rule - features: - - number/x32: 2 - """ - ) - ) - assert r.evaluate({Number(2, bitness=BITNESS_X32): {1}}) == True - - assert r.evaluate({Number(2): {1}}) == False - assert r.evaluate({Number(2, bitness=BITNESS_X64): {1}}) == False - - -def test_number_bitness_symbol(): - r = capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: test rule - features: - - number/x32: 2 = some constant - """ - ) - ) - assert r.evaluate({Number(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True - - def test_offset_symbol(): rule = textwrap.dedent( """ @@ -609,39 +574,6 @@ def test_count_offset_symbol(): assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True -def test_offset_bitness(): - r = capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: test rule - features: - - offset/x32: 2 - """ - ) - ) - assert r.evaluate({Offset(2, bitness=BITNESS_X32): {1}}) == True - - assert r.evaluate({Offset(2): {1}}) == False - assert r.evaluate({Offset(2, bitness=BITNESS_X64): {1}}) == False - - -def test_offset_bitness_symbol(): - r = capa.rules.Rule.from_yaml( - textwrap.dedent( - """ - rule: - meta: - name: test rule - features: - - offset/x32: 2 = some constant - """ - ) - ) - assert r.evaluate({Offset(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True - - def test_invalid_offset(): with pytest.raises(capa.rules.InvalidRule): r = capa.rules.Rule.from_yaml(