From f013815b2acccd16ee3af20b95aec75c2c361455 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 16 Aug 2021 12:21:25 -0600 Subject: [PATCH] features: rename legacy term `arch` to `bitness` makes space for upcoming feature `arch: ` for things like i386/amd64/aarch64 --- CHANGELOG.md | 2 ++ capa/features/common.py | 36 +++++++++++++-------------- capa/features/extractors/ida/insn.py | 22 ++++++++-------- capa/features/extractors/smda/insn.py | 14 +++++------ capa/features/extractors/viv/insn.py | 22 ++++++++-------- capa/features/insn.py | 8 +++--- capa/rules.py | 8 +++--- tests/fixtures.py | 16 ++++++------ tests/test_engine.py | 8 +++--- tests/test_rules.py | 24 +++++++++--------- 10 files changed, 81 insertions(+), 79 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20c7e88d..d51b4bee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ ### Breaking Changes +- legacy term `arch` (i.e., "x32") is now called `bitness` @williballenthin + ### New Rules (20) - collection/webcam/capture-webcam-image johnk3r diff --git a/capa/features/common.py b/capa/features/common.py index c2851c8c..444a856f 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -21,11 +21,6 @@ MAX_BYTES_FEATURE_SIZE = 0x100 # thunks may be chained so we specify a delta to control the depth to which these chains are explored THUNK_CHAIN_DEPTH_DELTA = 5 -# identifiers for supported architectures names that tweak a feature -# for example, offset/x32 -ARCH_X32 = "x32" -ARCH_X64 = "x64" -VALID_ARCH = (ARCH_X32, ARCH_X64) OS_WINDOWS = "os/windows" OS_LINUX = "os/linux" @@ -61,33 +56,33 @@ def escape_string(s: str) -> str: class Feature: - def __init__(self, value: Union[str, int, bytes], arch=None, description=None): + def __init__(self, value: Union[str, int, bytes], bitness=None, description=None): """ Args: value (any): the value of the feature, such as the number or string. - arch (str): one of the VALID_ARCH values, or None. - When None, then the feature applies to any architecture. - Modifies the feature name from `feature` to `feature/arch`, like `offset/x32`. + bitness (str): one of the VALID_BITNESS values, or None. + When None, then the feature applies to any bitness. + Modifies the feature name from `feature` to `feature/bitness`, like `offset/x32`. description (str): a human-readable description that explains the feature value. """ super(Feature, self).__init__() - if arch is not None: - if arch not in VALID_ARCH: - raise ValueError("arch '%s' must be one of %s" % (arch, VALID_ARCH)) - self.name = self.__class__.__name__.lower() + "/" + arch + if bitness is not None: + if bitness not in VALID_BITNESS: + raise ValueError("bitness '%s' must be one of %s" % (bitness, VALID_BITNESS)) + self.name = self.__class__.__name__.lower() + "/" + bitness else: self.name = self.__class__.__name__.lower() self.value = value - self.arch = arch + self.bitness = bitness self.description = description def __hash__(self): - return hash((self.name, self.value, self.arch)) + return hash((self.name, self.value, self.bitness)) def __eq__(self, other): - return self.name == other.name and self.value == other.value and self.arch == other.arch + return self.name == other.name and self.value == other.value and self.bitness == other.bitness def get_value_str(self) -> str: """ @@ -114,8 +109,8 @@ class Feature: return capa.engine.Result(self in ctx, self, [], locations=ctx.get(self, [])) def freeze_serialize(self): - if self.arch is not None: - return (self.__class__.__name__, [self.value, {"arch": self.arch}]) + if self.bitness is not None: + return (self.__class__.__name__, [self.value, {"bitness": self.bitness}]) else: return (self.__class__.__name__, [self.value]) @@ -280,6 +275,11 @@ class Bytes(Feature): return cls(*[codecs.decode(x, "hex") for x in args]) +# identifiers for supported bitness names that tweak a feature +# for example, offset/x32 +BITNESS_X32 = "x32" +BITNESS_X64 = "x64" +VALID_BITNESS = (BITNESS_X32, BITNESS_X64) def is_global_feature(feature): """ is this a feature that is extracted at every scope? diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 82fb4715..8bfcd7fb 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -14,8 +14,8 @@ import capa.features.extractors.helpers import capa.features.extractors.ida.helpers from capa.features.insn import API, Number, Offset, Mnemonic from capa.features.common import ( - ARCH_X32, - ARCH_X64, + BITNESS_X32, + BITNESS_X64, MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, @@ -28,22 +28,22 @@ from capa.features.common import ( SECURITY_COOKIE_BYTES_DELTA = 0x40 -def get_arch(ctx): +def get_bitness(ctx): """ - fetch the ARCH_* constant for the currently open workspace. + fetch the BITNESS_* constant for the currently open workspace. via Tamir Bahar/@tmr232 https://reverseengineering.stackexchange.com/a/11398/17194 """ - if "arch" not in ctx: + if "bitness" not in ctx: info = idaapi.get_inf_structure() if info.is_64bit(): - ctx["arch"] = ARCH_X64 + ctx["bitness"] = BITNESS_X64 elif info.is_32bit(): - ctx["arch"] = ARCH_X32 + ctx["bitness"] = BITNESS_X32 else: - raise ValueError("unexpected architecture") - return ctx["arch"] + raise ValueError("unexpected bitness") + return ctx["bitness"] def get_imports(ctx): @@ -149,7 +149,7 @@ def extract_insn_number_features(f, bb, insn): const = op.addr yield Number(const), insn.ea - yield Number(const, arch=get_arch(f.ctx)), insn.ea + yield Number(const, bitness=get_bitness(f.ctx)), insn.ea def extract_insn_bytes_features(f, bb, insn): @@ -218,7 +218,7 @@ def extract_insn_offset_features(f, bb, insn): op_off = capa.features.extractors.helpers.twos_complement(op_off, 32) yield Offset(op_off), insn.ea - yield Offset(op_off, arch=get_arch(f.ctx)), insn.ea + yield Offset(op_off, bitness=get_bitness(f.ctx)), insn.ea def contains_stack_cookie_keywords(s): diff --git a/capa/features/extractors/smda/insn.py b/capa/features/extractors/smda/insn.py index da5ebbfa..d666728c 100644 --- a/capa/features/extractors/smda/insn.py +++ b/capa/features/extractors/smda/insn.py @@ -7,8 +7,8 @@ from smda.common.SmdaReport import SmdaReport import capa.features.extractors.helpers from capa.features.insn import API, Number, Offset, Mnemonic from capa.features.common import ( - ARCH_X32, - ARCH_X64, + BITNESS_X32, + BITNESS_X64, MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, @@ -23,12 +23,12 @@ PATTERN_HEXNUM = re.compile(r"[+\-] (?P0x[a-fA-F0-9]+)") PATTERN_SINGLENUM = re.compile(r"[+\-] (?P[0-9])") -def get_arch(smda_report): +def get_bitness(smda_report): if smda_report.architecture == "intel": if smda_report.bitness == 32: - return ARCH_X32 + return BITNESS_X32 elif smda_report.bitness == 64: - return ARCH_X64 + return BITNESS_X64 else: raise NotImplementedError @@ -85,7 +85,7 @@ def extract_insn_number_features(f, bb, insn): for operand in operands: try: yield Number(int(operand, 16)), insn.offset - yield Number(int(operand, 16), arch=get_arch(f.smda_report)), insn.offset + yield Number(int(operand, 16), bitness=get_bitness(f.smda_report)), insn.offset except: continue @@ -228,7 +228,7 @@ def extract_insn_offset_features(f, bb, insn): number = int(number_int.group("num")) number = -1 * number if number_int.group().startswith("-") else number yield Offset(number), insn.offset - yield Offset(number, arch=get_arch(f.smda_report)), insn.offset + yield Offset(number, bitness=get_bitness(f.smda_report)), insn.offset def is_security_cookie(f, bb, insn): diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index ffe9e3c9..5157b75b 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -19,8 +19,8 @@ import capa.features.extractors.helpers import capa.features.extractors.viv.helpers from capa.features.insn import API, Number, Offset, Mnemonic from capa.features.common import ( - ARCH_X32, - ARCH_X64, + BITNESS_X32, + BITNESS_X64, MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, @@ -34,12 +34,12 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i SECURITY_COOKIE_BYTES_DELTA = 0x40 -def get_arch(vw): - arch = vw.getMeta("Architecture") - if arch == "i386": - return ARCH_X32 - elif arch == "amd64": - return ARCH_X64 +def get_bitness(vw): + bitness = vw.getMeta("Architecture") + if bitness == "i386": + return BITNESS_X32 + elif bitness == "amd64": + return BITNESS_X64 def interface_extract_instruction_XXX(f, bb, insn): @@ -193,7 +193,7 @@ def extract_insn_number_features(f, bb, insn): return yield Number(v), insn.va - yield Number(v, arch=get_arch(f.vw)), insn.va + yield Number(v, bitness=get_bitness(f.vw)), insn.va def derefs(vw, p): @@ -389,7 +389,7 @@ def extract_insn_offset_features(f, bb, insn): v = oper.disp yield Offset(v), insn.va - yield Offset(v, arch=get_arch(f.vw)), insn.va + yield Offset(v, bitness=get_bitness(f.vw)), insn.va # like: [esi + ecx + 16384] # reg ^ ^ @@ -400,7 +400,7 @@ def extract_insn_offset_features(f, bb, insn): v = oper.disp yield Offset(v), insn.va - yield Offset(v, arch=get_arch(f.vw)), insn.va + yield Offset(v, bitness=get_bitness(f.vw)), insn.va def is_security_cookie(f, bb, insn) -> bool: diff --git a/capa/features/insn.py b/capa/features/insn.py index 6c7e07ff..c5bc727e 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -21,16 +21,16 @@ class API(Feature): class Number(Feature): - def __init__(self, value: int, arch=None, description=None): - super(Number, self).__init__(value, arch=arch, description=description) + def __init__(self, value: int, bitness=None, description=None): + super(Number, self).__init__(value, bitness=bitness, description=description) def get_value_str(self): return capa.render.utils.hex(self.value) class Offset(Feature): - def __init__(self, value: int, arch=None, description=None): - super(Offset, self).__init__(value, arch=arch, description=description) + def __init__(self, value: int, bitness=None, description=None): + super(Offset, self).__init__(value, bitness=bitness, description=description) def get_value_str(self): return capa.render.utils.hex(self.value) diff --git a/capa/rules.py b/capa/rules.py index 8e6267bf..c40f992f 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -240,19 +240,19 @@ def parse_feature(key: str): elif key == "number": return capa.features.insn.Number elif key.startswith("number/"): - arch = key.partition("/")[2] + bitness = key.partition("/")[2] # the other handlers here return constructors for features, # and we want to as well, # however, we need to preconfigure one of the arguments (`arch`). # so, instead we return a partially-applied function that # provides `arch` to the feature constructor. # it forwards any other arguments provided to the closure along to the constructor. - return functools.partial(capa.features.insn.Number, arch=arch) + return functools.partial(capa.features.insn.Number, arch=bitness) elif key == "offset": return capa.features.insn.Offset elif key.startswith("offset/"): - arch = key.partition("/")[2] - return functools.partial(capa.features.insn.Offset, arch=arch) + bitness = key.partition("/")[2] + return functools.partial(capa.features.insn.Offset, arch=bitness) elif key == "mnemonic": return capa.features.insn.Mnemonic elif key == "basic blocks": diff --git a/tests/fixtures.py b/tests/fixtures.py index f6e59e25..16d1e0e5 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -22,8 +22,8 @@ import capa.features.insn import capa.features.common import capa.features.basicblock from capa.features.common import ( - ARCH_X32, - ARCH_X64, + BITNESS_X32, + BITNESS_X64, CHARACTERISTIC_PE, CHARACTERISTIC_ELF, CHARACTERISTIC_LINUX, @@ -390,10 +390,10 @@ FEATURE_PRESENCE_TESTS = sorted( # insn/number: stack adjustments ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False), - # insn/number: arch flavors + # insn/number: bitness flavors ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X32), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X64), False), + ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X32), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X64), False), # insn/offset ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True), @@ -406,10 +406,10 @@ FEATURE_PRESENCE_TESTS = sorted( # insn/offset: negative ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True), ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True), - # insn/offset: arch flavors + # insn/offset: bitness flavors ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X32), True), - ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X64), False), + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X32), True), + ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X64), False), # insn/api ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True), ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True), diff --git a/tests/test_engine.py b/tests/test_engine.py index 642ddb49..57fffb8e 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -474,11 +474,11 @@ def test_match_namespace(): def test_render_number(): assert str(capa.features.insn.Number(1)) == "number(0x1)" - assert str(capa.features.insn.Number(1, arch=capa.features.common.ARCH_X32)) == "number/x32(0x1)" - assert str(capa.features.insn.Number(1, arch=capa.features.common.ARCH_X64)) == "number/x64(0x1)" + assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X32)) == "number/x32(0x1)" + assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X64)) == "number/x64(0x1)" def test_render_offset(): assert str(capa.features.insn.Offset(1)) == "offset(0x1)" - assert str(capa.features.insn.Offset(1, arch=capa.features.common.ARCH_X32)) == "offset/x32(0x1)" - assert str(capa.features.insn.Offset(1, arch=capa.features.common.ARCH_X64)) == "offset/x64(0x1)" + assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X32)) == "offset/x32(0x1)" + assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)" diff --git a/tests/test_rules.py b/tests/test_rules.py index 95a39fb1..8e360c22 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -16,8 +16,8 @@ import capa.features.common from capa.features.file import FunctionName from capa.features.insn import Number, Offset from capa.features.common import ( - ARCH_X32, - ARCH_X64, + BITNESS_X32, + BITNESS_X64, FORMAT_PE, OS_WINDOWS, CHARACTERISTIC_PE, @@ -526,7 +526,7 @@ def test_invalid_number(): ) -def test_number_arch(): +def test_number_bitness(): r = capa.rules.Rule.from_yaml( textwrap.dedent( """ @@ -538,13 +538,13 @@ def test_number_arch(): """ ) ) - assert r.evaluate({Number(2, arch=ARCH_X32): {1}}) == True + assert r.evaluate({Number(2, bitness=BITNESS_X32): {1}}) == True assert r.evaluate({Number(2): {1}}) == False - assert r.evaluate({Number(2, arch=ARCH_X64): {1}}) == False + assert r.evaluate({Number(2, bitness=BITNESS_X64): {1}}) == False -def test_number_arch_symbol(): +def test_number_bitness_symbol(): r = capa.rules.Rule.from_yaml( textwrap.dedent( """ @@ -556,7 +556,7 @@ def test_number_arch_symbol(): """ ) ) - assert r.evaluate({Number(2, arch=ARCH_X32, description="some constant"): {1}}) == True + assert r.evaluate({Number(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True def test_offset_symbol(): @@ -604,7 +604,7 @@ def test_count_offset_symbol(): assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True -def test_offset_arch(): +def test_offset_bitness(): r = capa.rules.Rule.from_yaml( textwrap.dedent( """ @@ -616,13 +616,13 @@ def test_offset_arch(): """ ) ) - assert r.evaluate({Offset(2, arch=ARCH_X32): {1}}) == True + assert r.evaluate({Offset(2, bitness=BITNESS_X32): {1}}) == True assert r.evaluate({Offset(2): {1}}) == False - assert r.evaluate({Offset(2, arch=ARCH_X64): {1}}) == False + assert r.evaluate({Offset(2, bitness=BITNESS_X64): {1}}) == False -def test_offset_arch_symbol(): +def test_offset_bitness_symbol(): r = capa.rules.Rule.from_yaml( textwrap.dedent( """ @@ -634,7 +634,7 @@ def test_offset_arch_symbol(): """ ) ) - assert r.evaluate({Offset(2, arch=ARCH_X32, description="some constant"): {1}}) == True + assert r.evaluate({Offset(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True def test_invalid_offset():