features: rename legacy term arch to bitness

makes space for upcoming feature `arch: ` for things like i386/amd64/aarch64
This commit is contained in:
William Ballenthin
2021-08-16 12:21:25 -06:00
parent d5c9a5cf3c
commit f013815b2a
10 changed files with 81 additions and 79 deletions

View File

@@ -13,6 +13,8 @@
### Breaking Changes ### Breaking Changes
- legacy term `arch` (i.e., "x32") is now called `bitness` @williballenthin
### New Rules (20) ### New Rules (20)
- collection/webcam/capture-webcam-image johnk3r - collection/webcam/capture-webcam-image johnk3r

View File

@@ -21,11 +21,6 @@ MAX_BYTES_FEATURE_SIZE = 0x100
# thunks may be chained so we specify a delta to control the depth to which these chains are explored # thunks may be chained so we specify a delta to control the depth to which these chains are explored
THUNK_CHAIN_DEPTH_DELTA = 5 THUNK_CHAIN_DEPTH_DELTA = 5
# identifiers for supported architectures names that tweak a feature
# for example, offset/x32
ARCH_X32 = "x32"
ARCH_X64 = "x64"
VALID_ARCH = (ARCH_X32, ARCH_X64)
OS_WINDOWS = "os/windows" OS_WINDOWS = "os/windows"
OS_LINUX = "os/linux" OS_LINUX = "os/linux"
@@ -61,33 +56,33 @@ def escape_string(s: str) -> str:
class Feature: class Feature:
def __init__(self, value: Union[str, int, bytes], arch=None, description=None): def __init__(self, value: Union[str, int, bytes], bitness=None, description=None):
""" """
Args: Args:
value (any): the value of the feature, such as the number or string. value (any): the value of the feature, such as the number or string.
arch (str): one of the VALID_ARCH values, or None. bitness (str): one of the VALID_BITNESS values, or None.
When None, then the feature applies to any architecture. When None, then the feature applies to any bitness.
Modifies the feature name from `feature` to `feature/arch`, like `offset/x32`. Modifies the feature name from `feature` to `feature/bitness`, like `offset/x32`.
description (str): a human-readable description that explains the feature value. description (str): a human-readable description that explains the feature value.
""" """
super(Feature, self).__init__() super(Feature, self).__init__()
if arch is not None: if bitness is not None:
if arch not in VALID_ARCH: if bitness not in VALID_BITNESS:
raise ValueError("arch '%s' must be one of %s" % (arch, VALID_ARCH)) raise ValueError("bitness '%s' must be one of %s" % (bitness, VALID_BITNESS))
self.name = self.__class__.__name__.lower() + "/" + arch self.name = self.__class__.__name__.lower() + "/" + bitness
else: else:
self.name = self.__class__.__name__.lower() self.name = self.__class__.__name__.lower()
self.value = value self.value = value
self.arch = arch self.bitness = bitness
self.description = description self.description = description
def __hash__(self): def __hash__(self):
return hash((self.name, self.value, self.arch)) return hash((self.name, self.value, self.bitness))
def __eq__(self, other): def __eq__(self, other):
return self.name == other.name and self.value == other.value and self.arch == other.arch return self.name == other.name and self.value == other.value and self.bitness == other.bitness
def get_value_str(self) -> str: def get_value_str(self) -> str:
""" """
@@ -114,8 +109,8 @@ class Feature:
return capa.engine.Result(self in ctx, self, [], locations=ctx.get(self, [])) return capa.engine.Result(self in ctx, self, [], locations=ctx.get(self, []))
def freeze_serialize(self): def freeze_serialize(self):
if self.arch is not None: if self.bitness is not None:
return (self.__class__.__name__, [self.value, {"arch": self.arch}]) return (self.__class__.__name__, [self.value, {"bitness": self.bitness}])
else: else:
return (self.__class__.__name__, [self.value]) return (self.__class__.__name__, [self.value])
@@ -280,6 +275,11 @@ class Bytes(Feature):
return cls(*[codecs.decode(x, "hex") for x in args]) return cls(*[codecs.decode(x, "hex") for x in args])
# identifiers for supported bitness names that tweak a feature
# for example, offset/x32
BITNESS_X32 = "x32"
BITNESS_X64 = "x64"
VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
def is_global_feature(feature): def is_global_feature(feature):
""" """
is this a feature that is extracted at every scope? is this a feature that is extracted at every scope?

View File

@@ -14,8 +14,8 @@ import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers import capa.features.extractors.ida.helpers
from capa.features.insn import API, Number, Offset, Mnemonic from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.common import ( from capa.features.common import (
ARCH_X32, BITNESS_X32,
ARCH_X64, BITNESS_X64,
MAX_BYTES_FEATURE_SIZE, MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA, THUNK_CHAIN_DEPTH_DELTA,
Bytes, Bytes,
@@ -28,22 +28,22 @@ from capa.features.common import (
SECURITY_COOKIE_BYTES_DELTA = 0x40 SECURITY_COOKIE_BYTES_DELTA = 0x40
def get_arch(ctx): def get_bitness(ctx):
""" """
fetch the ARCH_* constant for the currently open workspace. fetch the BITNESS_* constant for the currently open workspace.
via Tamir Bahar/@tmr232 via Tamir Bahar/@tmr232
https://reverseengineering.stackexchange.com/a/11398/17194 https://reverseengineering.stackexchange.com/a/11398/17194
""" """
if "arch" not in ctx: if "bitness" not in ctx:
info = idaapi.get_inf_structure() info = idaapi.get_inf_structure()
if info.is_64bit(): if info.is_64bit():
ctx["arch"] = ARCH_X64 ctx["bitness"] = BITNESS_X64
elif info.is_32bit(): elif info.is_32bit():
ctx["arch"] = ARCH_X32 ctx["bitness"] = BITNESS_X32
else: else:
raise ValueError("unexpected architecture") raise ValueError("unexpected bitness")
return ctx["arch"] return ctx["bitness"]
def get_imports(ctx): def get_imports(ctx):
@@ -149,7 +149,7 @@ def extract_insn_number_features(f, bb, insn):
const = op.addr const = op.addr
yield Number(const), insn.ea yield Number(const), insn.ea
yield Number(const, arch=get_arch(f.ctx)), insn.ea yield Number(const, bitness=get_bitness(f.ctx)), insn.ea
def extract_insn_bytes_features(f, bb, insn): def extract_insn_bytes_features(f, bb, insn):
@@ -218,7 +218,7 @@ def extract_insn_offset_features(f, bb, insn):
op_off = capa.features.extractors.helpers.twos_complement(op_off, 32) op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)
yield Offset(op_off), insn.ea yield Offset(op_off), insn.ea
yield Offset(op_off, arch=get_arch(f.ctx)), insn.ea yield Offset(op_off, bitness=get_bitness(f.ctx)), insn.ea
def contains_stack_cookie_keywords(s): def contains_stack_cookie_keywords(s):

View File

@@ -7,8 +7,8 @@ from smda.common.SmdaReport import SmdaReport
import capa.features.extractors.helpers import capa.features.extractors.helpers
from capa.features.insn import API, Number, Offset, Mnemonic from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.common import ( from capa.features.common import (
ARCH_X32, BITNESS_X32,
ARCH_X64, BITNESS_X64,
MAX_BYTES_FEATURE_SIZE, MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA, THUNK_CHAIN_DEPTH_DELTA,
Bytes, Bytes,
@@ -23,12 +23,12 @@ PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])") PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
def get_arch(smda_report): def get_bitness(smda_report):
if smda_report.architecture == "intel": if smda_report.architecture == "intel":
if smda_report.bitness == 32: if smda_report.bitness == 32:
return ARCH_X32 return BITNESS_X32
elif smda_report.bitness == 64: elif smda_report.bitness == 64:
return ARCH_X64 return BITNESS_X64
else: else:
raise NotImplementedError raise NotImplementedError
@@ -85,7 +85,7 @@ def extract_insn_number_features(f, bb, insn):
for operand in operands: for operand in operands:
try: try:
yield Number(int(operand, 16)), insn.offset yield Number(int(operand, 16)), insn.offset
yield Number(int(operand, 16), arch=get_arch(f.smda_report)), insn.offset yield Number(int(operand, 16), bitness=get_bitness(f.smda_report)), insn.offset
except: except:
continue continue
@@ -228,7 +228,7 @@ def extract_insn_offset_features(f, bb, insn):
number = int(number_int.group("num")) number = int(number_int.group("num"))
number = -1 * number if number_int.group().startswith("-") else number number = -1 * number if number_int.group().startswith("-") else number
yield Offset(number), insn.offset yield Offset(number), insn.offset
yield Offset(number, arch=get_arch(f.smda_report)), insn.offset yield Offset(number, bitness=get_bitness(f.smda_report)), insn.offset
def is_security_cookie(f, bb, insn): def is_security_cookie(f, bb, insn):

View File

@@ -19,8 +19,8 @@ import capa.features.extractors.helpers
import capa.features.extractors.viv.helpers import capa.features.extractors.viv.helpers
from capa.features.insn import API, Number, Offset, Mnemonic from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.common import ( from capa.features.common import (
ARCH_X32, BITNESS_X32,
ARCH_X64, BITNESS_X64,
MAX_BYTES_FEATURE_SIZE, MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA, THUNK_CHAIN_DEPTH_DELTA,
Bytes, Bytes,
@@ -34,12 +34,12 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i
SECURITY_COOKIE_BYTES_DELTA = 0x40 SECURITY_COOKIE_BYTES_DELTA = 0x40
def get_arch(vw): def get_bitness(vw):
arch = vw.getMeta("Architecture") bitness = vw.getMeta("Architecture")
if arch == "i386": if bitness == "i386":
return ARCH_X32 return BITNESS_X32
elif arch == "amd64": elif bitness == "amd64":
return ARCH_X64 return BITNESS_X64
def interface_extract_instruction_XXX(f, bb, insn): def interface_extract_instruction_XXX(f, bb, insn):
@@ -193,7 +193,7 @@ def extract_insn_number_features(f, bb, insn):
return return
yield Number(v), insn.va yield Number(v), insn.va
yield Number(v, arch=get_arch(f.vw)), insn.va yield Number(v, bitness=get_bitness(f.vw)), insn.va
def derefs(vw, p): def derefs(vw, p):
@@ -389,7 +389,7 @@ def extract_insn_offset_features(f, bb, insn):
v = oper.disp v = oper.disp
yield Offset(v), insn.va yield Offset(v), insn.va
yield Offset(v, arch=get_arch(f.vw)), insn.va yield Offset(v, bitness=get_bitness(f.vw)), insn.va
# like: [esi + ecx + 16384] # like: [esi + ecx + 16384]
# reg ^ ^ # reg ^ ^
@@ -400,7 +400,7 @@ def extract_insn_offset_features(f, bb, insn):
v = oper.disp v = oper.disp
yield Offset(v), insn.va yield Offset(v), insn.va
yield Offset(v, arch=get_arch(f.vw)), insn.va yield Offset(v, bitness=get_bitness(f.vw)), insn.va
def is_security_cookie(f, bb, insn) -> bool: def is_security_cookie(f, bb, insn) -> bool:

View File

@@ -21,16 +21,16 @@ class API(Feature):
class Number(Feature): class Number(Feature):
def __init__(self, value: int, arch=None, description=None): def __init__(self, value: int, bitness=None, description=None):
super(Number, self).__init__(value, arch=arch, description=description) super(Number, self).__init__(value, bitness=bitness, description=description)
def get_value_str(self): def get_value_str(self):
return capa.render.utils.hex(self.value) return capa.render.utils.hex(self.value)
class Offset(Feature): class Offset(Feature):
def __init__(self, value: int, arch=None, description=None): def __init__(self, value: int, bitness=None, description=None):
super(Offset, self).__init__(value, arch=arch, description=description) super(Offset, self).__init__(value, bitness=bitness, description=description)
def get_value_str(self): def get_value_str(self):
return capa.render.utils.hex(self.value) return capa.render.utils.hex(self.value)

View File

@@ -240,19 +240,19 @@ def parse_feature(key: str):
elif key == "number": elif key == "number":
return capa.features.insn.Number return capa.features.insn.Number
elif key.startswith("number/"): elif key.startswith("number/"):
arch = key.partition("/")[2] bitness = key.partition("/")[2]
# the other handlers here return constructors for features, # the other handlers here return constructors for features,
# and we want to as well, # and we want to as well,
# however, we need to preconfigure one of the arguments (`arch`). # however, we need to preconfigure one of the arguments (`arch`).
# so, instead we return a partially-applied function that # so, instead we return a partially-applied function that
# provides `arch` to the feature constructor. # provides `arch` to the feature constructor.
# it forwards any other arguments provided to the closure along to the constructor. # it forwards any other arguments provided to the closure along to the constructor.
return functools.partial(capa.features.insn.Number, arch=arch) return functools.partial(capa.features.insn.Number, arch=bitness)
elif key == "offset": elif key == "offset":
return capa.features.insn.Offset return capa.features.insn.Offset
elif key.startswith("offset/"): elif key.startswith("offset/"):
arch = key.partition("/")[2] bitness = key.partition("/")[2]
return functools.partial(capa.features.insn.Offset, arch=arch) return functools.partial(capa.features.insn.Offset, arch=bitness)
elif key == "mnemonic": elif key == "mnemonic":
return capa.features.insn.Mnemonic return capa.features.insn.Mnemonic
elif key == "basic blocks": elif key == "basic blocks":

View File

@@ -22,8 +22,8 @@ import capa.features.insn
import capa.features.common import capa.features.common
import capa.features.basicblock import capa.features.basicblock
from capa.features.common import ( from capa.features.common import (
ARCH_X32, BITNESS_X32,
ARCH_X64, BITNESS_X64,
CHARACTERISTIC_PE, CHARACTERISTIC_PE,
CHARACTERISTIC_ELF, CHARACTERISTIC_ELF,
CHARACTERISTIC_LINUX, CHARACTERISTIC_LINUX,
@@ -390,10 +390,10 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/number: stack adjustments # insn/number: stack adjustments
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False),
# insn/number: arch flavors # insn/number: bitness flavors
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X32), True), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X64), False), ("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X64), False),
# insn/offset # insn/offset
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True), ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True),
@@ -406,10 +406,10 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/offset: negative # insn/offset: negative
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True), ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True),
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True), ("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True),
# insn/offset: arch flavors # insn/offset: bitness flavors
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True), ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X32), True), ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X32), True),
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X64), False), ("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X64), False),
# insn/api # insn/api
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True), ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True), ("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),

View File

@@ -474,11 +474,11 @@ def test_match_namespace():
def test_render_number(): def test_render_number():
assert str(capa.features.insn.Number(1)) == "number(0x1)" assert str(capa.features.insn.Number(1)) == "number(0x1)"
assert str(capa.features.insn.Number(1, arch=capa.features.common.ARCH_X32)) == "number/x32(0x1)" assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X32)) == "number/x32(0x1)"
assert str(capa.features.insn.Number(1, arch=capa.features.common.ARCH_X64)) == "number/x64(0x1)" assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X64)) == "number/x64(0x1)"
def test_render_offset(): def test_render_offset():
assert str(capa.features.insn.Offset(1)) == "offset(0x1)" assert str(capa.features.insn.Offset(1)) == "offset(0x1)"
assert str(capa.features.insn.Offset(1, arch=capa.features.common.ARCH_X32)) == "offset/x32(0x1)" assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X32)) == "offset/x32(0x1)"
assert str(capa.features.insn.Offset(1, arch=capa.features.common.ARCH_X64)) == "offset/x64(0x1)" assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)"

View File

@@ -16,8 +16,8 @@ import capa.features.common
from capa.features.file import FunctionName from capa.features.file import FunctionName
from capa.features.insn import Number, Offset from capa.features.insn import Number, Offset
from capa.features.common import ( from capa.features.common import (
ARCH_X32, BITNESS_X32,
ARCH_X64, BITNESS_X64,
FORMAT_PE, FORMAT_PE,
OS_WINDOWS, OS_WINDOWS,
CHARACTERISTIC_PE, CHARACTERISTIC_PE,
@@ -526,7 +526,7 @@ def test_invalid_number():
) )
def test_number_arch(): def test_number_bitness():
r = capa.rules.Rule.from_yaml( r = capa.rules.Rule.from_yaml(
textwrap.dedent( textwrap.dedent(
""" """
@@ -538,13 +538,13 @@ def test_number_arch():
""" """
) )
) )
assert r.evaluate({Number(2, arch=ARCH_X32): {1}}) == True assert r.evaluate({Number(2, bitness=BITNESS_X32): {1}}) == True
assert r.evaluate({Number(2): {1}}) == False assert r.evaluate({Number(2): {1}}) == False
assert r.evaluate({Number(2, arch=ARCH_X64): {1}}) == False assert r.evaluate({Number(2, bitness=BITNESS_X64): {1}}) == False
def test_number_arch_symbol(): def test_number_bitness_symbol():
r = capa.rules.Rule.from_yaml( r = capa.rules.Rule.from_yaml(
textwrap.dedent( textwrap.dedent(
""" """
@@ -556,7 +556,7 @@ def test_number_arch_symbol():
""" """
) )
) )
assert r.evaluate({Number(2, arch=ARCH_X32, description="some constant"): {1}}) == True assert r.evaluate({Number(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
def test_offset_symbol(): def test_offset_symbol():
@@ -604,7 +604,7 @@ def test_count_offset_symbol():
assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True
def test_offset_arch(): def test_offset_bitness():
r = capa.rules.Rule.from_yaml( r = capa.rules.Rule.from_yaml(
textwrap.dedent( textwrap.dedent(
""" """
@@ -616,13 +616,13 @@ def test_offset_arch():
""" """
) )
) )
assert r.evaluate({Offset(2, arch=ARCH_X32): {1}}) == True assert r.evaluate({Offset(2, bitness=BITNESS_X32): {1}}) == True
assert r.evaluate({Offset(2): {1}}) == False assert r.evaluate({Offset(2): {1}}) == False
assert r.evaluate({Offset(2, arch=ARCH_X64): {1}}) == False assert r.evaluate({Offset(2, bitness=BITNESS_X64): {1}}) == False
def test_offset_arch_symbol(): def test_offset_bitness_symbol():
r = capa.rules.Rule.from_yaml( r = capa.rules.Rule.from_yaml(
textwrap.dedent( textwrap.dedent(
""" """
@@ -634,7 +634,7 @@ def test_offset_arch_symbol():
""" """
) )
) )
assert r.evaluate({Offset(2, arch=ARCH_X32, description="some constant"): {1}}) == True assert r.evaluate({Offset(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
def test_invalid_offset(): def test_invalid_offset():