From 4d915020a8e52f8ec1984cfcaaa1918c48887a8a Mon Sep 17 00:00:00 2001 From: Baptistin Boilot Date: Sun, 27 Feb 2022 15:57:43 +0100 Subject: [PATCH] extractor: add characteristic(call $+5) feature extraction for vivisect and smda --- CHANGELOG.md | 1 + capa/features/extractors/smda/insn.py | 15 +++++++++++++++ capa/features/extractors/viv/insn.py | 19 +++++++++++++++++++ tests/fixtures.py | 10 ++++++++++ 4 files changed, 45 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 160b9cc0..b7215c8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New Features - linter: validate ATT&CK/MBC categories and IDs #103 @kn0wl3dge +- extractor: add characteristic "call $+5" feature #366 @kn0wl3dge ### Breaking Changes diff --git a/capa/features/extractors/smda/insn.py b/capa/features/extractors/smda/insn.py index a42db3ce..c15de2a4 100644 --- a/capa/features/extractors/smda/insn.py +++ b/capa/features/extractors/smda/insn.py @@ -280,6 +280,20 @@ def extract_insn_mnemonic_features(f, bb, insn): yield Mnemonic(insn.mnemonic), insn.offset +def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn): + """ + parse call $+5 instruction from the given instruction. + """ + if insn.mnemonic != "call": + return + + if not insn.operands.startswith("0x"): + return + + if int(insn.operands, 16) == insn.offset + 5: + yield Characteristic("call $+5"), insn.offset + + def extract_insn_peb_access_characteristic_features(f, bb, insn): """ parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 @@ -389,6 +403,7 @@ INSTRUCTION_HANDLERS = ( extract_insn_offset_features, extract_insn_nzxor_characteristic_features, extract_insn_mnemonic_features, + extract_insn_obfs_call_plus_5_characteristic_features, extract_insn_peb_access_characteristic_features, extract_insn_cross_section_cflow, extract_insn_segment_access_features, diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 0580767c..a18ff986 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -453,6 +453,24 @@ def extract_insn_mnemonic_features(f, bb, insn): yield Mnemonic(insn.mnem), insn.va +def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn): + """ + parse call $+5 instruction from the given instruction. + """ + if insn.mnem != "call": + return + + if isinstance(insn.opers[0], envi.archs.i386.disasm.i386PcRelOper): + if insn.va + 5 == insn.opers[0].getOperValue(insn): + yield Characteristic("call $+5"), insn.va + + if isinstance(insn.opers[0], envi.archs.i386.disasm.i386ImmMemOper) or isinstance( + insn.opers[0], envi.archs.amd64.disasm.Amd64RipRelOper + ): + if insn.va + 5 == insn.opers[0].getOperAddr(insn): + yield Characteristic("call $+5"), insn.va + + def extract_insn_peb_access_characteristic_features(f, bb, insn): """ parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 @@ -626,6 +644,7 @@ INSTRUCTION_HANDLERS = ( extract_insn_offset_features, extract_insn_nzxor_characteristic_features, extract_insn_mnemonic_features, + extract_insn_obfs_call_plus_5_characteristic_features, extract_insn_peb_access_characteristic_features, extract_insn_cross_section_cflow, extract_insn_segment_access_features, diff --git a/tests/fixtures.py b/tests/fixtures.py index 5ec434cf..fc35a16b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -220,6 +220,10 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_") elif name == "7351f.elf": return os.path.join(CD, "data", "7351f8a40c5450557b24622417fc478d.elf_") + elif name.startswith("79abd"): + return os.path.join(CD, "data", "79abd17391adc6251ecdc58d13d76baf.dll_") + elif name.startswith("946a9"): + return os.path.join(CD, "data", "946a99f36a46d335dec080d9a4371940.dll_") else: raise ValueError("unexpected sample fixture: %s" % name) @@ -269,6 +273,10 @@ def get_sample_md5_by_name(name): return "56a6ffe6a02941028cc8235204eef31d" elif name == "7351f.elf": return "7351f8a40c5450557b24622417fc478d" + elif name.startswith("79abd"): + return "79abd17391adc6251ecdc58d13d76baf" + elif name.startswith("946a9"): + return "946a99f36a46d335dec080d9a4371940.dll_" else: raise ValueError("unexpected sample fixture: %s" % name) @@ -561,6 +569,8 @@ FEATURE_PRESENCE_TESTS = sorted( ("7351f.elf", "file", Arch(ARCH_AMD64), True), ("7351f.elf", "function=0x408753", capa.features.common.String("/dev/null"), True), ("7351f.elf", "function=0x408753,bb=0x408781", capa.features.insn.API("open"), True), + ("79abd...", "function=0x10002385,bb=0x10002385", capa.features.common.Characteristic("call $+5"), True), + ("946a9...", "function=0x10001510,bb=0x100015c0", capa.features.common.Characteristic("call $+5"), True), ], # order tests by (file, item) # so that our LRU cache is most effective.