From c71cb55051e71c2453f58c4d93ac262908e1e511 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 14 Apr 2023 04:07:05 +0100 Subject: [PATCH 01/95] insn extractor: Add static api extraction using .symtab --- capa/features/extractors/viv/insn.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index d324f31e..2033bc81 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -120,6 +120,27 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato yield API(name[1:]), ih.address return + if imports == {}: + # no imports implies the binary was likely statically linked. + # therefore, we try to use the symbol's table to fetch the api names + name = f.vw.name_by_va.get(target) + if not name: + return + + name = name.split('.')[-1] + prefixes = [ + "__GI_", + "__libc_", + ] + + for prefix in prefixes: + if name.startswith(prefix): + yield API(name[len(prefix):]), ih.address + return + + yield API(name), ih.address + return + for _ in range(THUNK_CHAIN_DEPTH_DELTA): if target in imports: dll, symbol = imports[target] From 21f2cb6e6f518db8793e07c1bd9a3dbc2aa05602 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 14 Apr 2023 04:25:24 +0100 Subject: [PATCH 02/95] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b07df4c2..6641f1b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # Change Log ## master (unreleased) - +- add support for api extraction from statically linked libraries. ### New Features ### Breaking Changes From 44254bfffe40b52a23fd5a44e47c274a243632f1 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer <16624109+yelhamer@users.noreply.github.com> Date: Mon, 17 Apr 2023 09:51:39 +0100 Subject: [PATCH 03/95] Update CHANGELOG.md Co-authored-by: Willi Ballenthin --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6641f1b2..4c9e9fc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # Change Log ## master (unreleased) -- add support for api extraction from statically linked libraries. +- extract function and API names from ELF symtab entries @yelhamer https://github.com/mandiant/capa-rules/issues/736 ### New Features ### Breaking Changes From 97c8fd052561eef1922461330d7b26ec20f479de Mon Sep 17 00:00:00 2001 From: Yacine Elhamer <16624109+yelhamer@users.noreply.github.com> Date: Fri, 21 Apr 2023 19:36:20 +0100 Subject: [PATCH 04/95] Update CHANGELOG.md Co-authored-by: Moritz --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c9e9fc0..99c1eb4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## master (unreleased) - extract function and API names from ELF symtab entries @yelhamer https://github.com/mandiant/capa-rules/issues/736 + ### New Features ### Breaking Changes From e7ccea44e7e462968e34d4daeadec4b4a89bb61b Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Sat, 22 Apr 2023 01:33:00 +0100 Subject: [PATCH 05/95] Shdr: add a constructor for vivisect's shdr representation --- capa/features/extractors/elf.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index e0dc596c..7c2f2d7a 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -90,6 +90,24 @@ class Shdr: link: int entsize: int buf: bytes + + @classmethod + def from_viv(cls, section, buf: bytes): + """ + construct a Shdr object from vivisect's representation of + section headers (Elf.Elf32Section or Elf.Elf64Section) + """ + return cls( + int(section.vsGetField('sh_name')), + int(section.vsGetField('sh_type')), + int(section.vsGetField('sh_flags')), + int(section.vsGetField('sh_addr')), + int(section.vsGetField('sh_offset')), + int(section.vsGetField('sh_size')), + int(section.vsGetField('sh_link')), + int(section.vsGetField('sh_entsize')), + buf, + ) class ELF: From b766d957b08a77c58487354644454f21b34b2909 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Sat, 22 Apr 2023 01:36:57 +0100 Subject: [PATCH 06/95] insn.py: rewire symbol parsing to use SymTab instead of vivisect --- capa/features/extractors/viv/insn.py | 42 +++++++++++++++++----------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 2033bc81..0476d1be 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -22,6 +22,7 @@ import capa.features.extractors.viv.helpers from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress +from capa.features.extractors.elf import Shdr, SymTab from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call @@ -120,26 +121,33 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato yield API(name[1:]), ih.address return - if imports == {}: - # no imports implies the binary was likely statically linked. - # therefore, we try to use the symbol's table to fetch the api names - name = f.vw.name_by_va.get(target) - if not name: - return + if f.vw.metadata['Format'] == 'elf': + if not hasattr(extract_insn_api_features, 'symtab'): + # the symbol table gets stored as a function's attribute in order to avoid running + # this code everytime the call is made, thus preventing the computational overhead. + elf = f.vw.parsedbin + endian = '<' if elf.getEndian() == 0 else '>' + bitness = elf.bits - name = name.split('.')[-1] - prefixes = [ - "__GI_", - "__libc_", - ] + SHT_SYMTAB = 0x2 + for section in elf.sections: + if section.vsGetField('sh_info') & SHT_SYMTAB != 0: + strtab = elf.sections[section.vsGetField('sh_link')] + sh_symtab = Shdr.from_viv(section, elf.getSectionBytes(section.name)) + sh_strtab = Shdr.from_viv(strtab, elf.getSectionBytes(strtab.name)) - for prefix in prefixes: - if name.startswith(prefix): - yield API(name[len(prefix):]), ih.address - return + symtab = SymTab(endian, bitness, sh_symtab, sh_strtab) + extract_insn_api_features.symtab = symtab - yield API(name), ih.address - return + symtab = extract_insn_api_features.symtab + for symbol in symtab.get_symbols(): + sym_name = symtab.get_name(symbol) + sym_value = symbol.value + sym_info = symbol.info + + STT_FUNC = 0x2 + if sym_value == target and sym_info & STT_FUNC != 0: + yield API(sym_name), ih.address for _ in range(THUNK_CHAIN_DEPTH_DELTA): if target in imports: From b32a8ca510b0eb263f09ca092e50218485218767 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Sun, 23 Apr 2023 01:20:25 +0100 Subject: [PATCH 07/95] insn.py: Get the symtab api extractor to yield FunctionName features as well --- capa/features/extractors/viv/insn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 0476d1be..4391037d 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -19,6 +19,7 @@ import envi.archs.amd64.disasm import capa.features.extractors.helpers import capa.features.extractors.viv.helpers +from capa.features.file import FunctionName from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress @@ -148,6 +149,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato STT_FUNC = 0x2 if sym_value == target and sym_info & STT_FUNC != 0: yield API(sym_name), ih.address + yield FunctionName(sym_name), ih.address for _ in range(THUNK_CHAIN_DEPTH_DELTA): if target in imports: From ee881ab82fe09163ec51bdcfb215b3eed6be994c Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Sun, 23 Apr 2023 02:31:11 +0100 Subject: [PATCH 08/95] code style: Fix the format of the committed code --- capa/features/extractors/elf.py | 20 ++++++++++---------- capa/features/extractors/viv/insn.py | 12 ++++++------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 7c2f2d7a..0207543b 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -90,7 +90,7 @@ class Shdr: link: int entsize: int buf: bytes - + @classmethod def from_viv(cls, section, buf: bytes): """ @@ -98,16 +98,16 @@ class Shdr: section headers (Elf.Elf32Section or Elf.Elf64Section) """ return cls( - int(section.vsGetField('sh_name')), - int(section.vsGetField('sh_type')), - int(section.vsGetField('sh_flags')), - int(section.vsGetField('sh_addr')), - int(section.vsGetField('sh_offset')), - int(section.vsGetField('sh_size')), - int(section.vsGetField('sh_link')), - int(section.vsGetField('sh_entsize')), + int(section.vsGetField("sh_name")), + int(section.vsGetField("sh_type")), + int(section.vsGetField("sh_flags")), + int(section.vsGetField("sh_addr")), + int(section.vsGetField("sh_offset")), + int(section.vsGetField("sh_size")), + int(section.vsGetField("sh_link")), + int(section.vsGetField("sh_entsize")), buf, - ) + ) class ELF: diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 4391037d..9e8ee5fa 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -122,18 +122,18 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato yield API(name[1:]), ih.address return - if f.vw.metadata['Format'] == 'elf': - if not hasattr(extract_insn_api_features, 'symtab'): + if f.vw.metadata["Format"] == "elf": + if not hasattr(extract_insn_api_features, "symtab"): # the symbol table gets stored as a function's attribute in order to avoid running # this code everytime the call is made, thus preventing the computational overhead. elf = f.vw.parsedbin - endian = '<' if elf.getEndian() == 0 else '>' + endian = "<" if elf.getEndian() == 0 else ">" bitness = elf.bits SHT_SYMTAB = 0x2 for section in elf.sections: - if section.vsGetField('sh_info') & SHT_SYMTAB != 0: - strtab = elf.sections[section.vsGetField('sh_link')] + if section.vsGetField("sh_info") & SHT_SYMTAB != 0: + strtab = elf.sections[section.vsGetField("sh_link")] sh_symtab = Shdr.from_viv(section, elf.getSectionBytes(section.name)) sh_strtab = Shdr.from_viv(strtab, elf.getSectionBytes(strtab.name)) @@ -145,7 +145,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato sym_name = symtab.get_name(symbol) sym_value = symbol.value sym_info = symbol.info - + STT_FUNC = 0x2 if sym_value == target and sym_info & STT_FUNC != 0: yield API(sym_name), ih.address From 695508aa4c26247980ccb3280280bfa01529082e Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Tue, 25 Apr 2023 08:42:53 +0100 Subject: [PATCH 09/95] insn.py: Update extract_insn_api_features() to optimize by means of viv rather than function attributes --- capa/features/extractors/viv/insn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 9e8ee5fa..730472e2 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -123,7 +123,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato return if f.vw.metadata["Format"] == "elf": - if not hasattr(extract_insn_api_features, "symtab"): + if "SymbolTable" not in f.vw.metadata: # the symbol table gets stored as a function's attribute in order to avoid running # this code everytime the call is made, thus preventing the computational overhead. elf = f.vw.parsedbin @@ -138,9 +138,9 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato sh_strtab = Shdr.from_viv(strtab, elf.getSectionBytes(strtab.name)) symtab = SymTab(endian, bitness, sh_symtab, sh_strtab) - extract_insn_api_features.symtab = symtab + f.vw.metadata["SymbolTable"] = symtab - symtab = extract_insn_api_features.symtab + symtab = f.vw.metadata["SymbolTable"] for symbol in symtab.get_symbols(): sym_name = symtab.get_name(symbol) sym_value = symbol.value From c7b65cfe8a208d4472295885a675016d0c3a927a Mon Sep 17 00:00:00 2001 From: Yacine Elhamer <16624109+yelhamer@users.noreply.github.com> Date: Tue, 25 Apr 2023 17:23:32 +0100 Subject: [PATCH 10/95] Shdr constructor: Use direct member access to get vstruct's section header information Co-authored-by: Willi Ballenthin --- capa/features/extractors/elf.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 0207543b..24c58030 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -98,14 +98,14 @@ class Shdr: section headers (Elf.Elf32Section or Elf.Elf64Section) """ return cls( - int(section.vsGetField("sh_name")), - int(section.vsGetField("sh_type")), - int(section.vsGetField("sh_flags")), - int(section.vsGetField("sh_addr")), - int(section.vsGetField("sh_offset")), - int(section.vsGetField("sh_size")), - int(section.vsGetField("sh_link")), - int(section.vsGetField("sh_entsize")), + section.sh_name, + section.sh_type, + section.sh_flags, + section.sh_addr, + section.sh_offset, + section.sh_size, + section.sh_link, + section.sh_entsize, buf, ) From 64ef2c8a65bddb1c625f730f90f6e54f7e5b86a9 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 1 Jun 2023 01:50:06 +0100 Subject: [PATCH 11/95] add tests for vivisect's usage of debug symbols --- tests/fixtures.py | 8 ++++++++ tests/test_viv_features.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 04c9c53b..b339c994 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -761,6 +761,14 @@ FEATURE_PRESENCE_TESTS = sorted( key=lambda t: (t[0], t[1]), ) +# this list should be merged into the one above (FEATURE_PRESENSE_TESTS) +# once the debug symbol functionality has been added to all backends +FEATURE_SYMTAB_FUNC_TESTS = [ + ("2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", capa.features.insn.API("__GI_connect"), True), + ("2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", capa.features.insn.API("connect"), True), + ("2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", capa.features.insn.API("__libc_connect"), True), +] + FEATURE_PRESENCE_TESTS_DOTNET = sorted( [ ("b9f5b", "file", Arch(ARCH_I386), True), diff --git a/tests/test_viv_features.py b/tests/test_viv_features.py index fcf49c84..58ce5ace 100644 --- a/tests/test_viv_features.py +++ b/tests/test_viv_features.py @@ -11,7 +11,7 @@ from fixtures import * @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS, + fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, indirect=["sample", "scope"], ) def test_viv_features(sample, scope, feature, expected): From f10a43abe648eb261f39b7af635e5086c6dff09a Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 1 Jun 2023 02:02:40 +0100 Subject: [PATCH 12/95] fix style issues --- tests/fixtures.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index b339c994..5f02116c 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -764,9 +764,24 @@ FEATURE_PRESENCE_TESTS = sorted( # this list should be merged into the one above (FEATURE_PRESENSE_TESTS) # once the debug symbol functionality has been added to all backends FEATURE_SYMTAB_FUNC_TESTS = [ - ("2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", capa.features.insn.API("__GI_connect"), True), - ("2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", capa.features.insn.API("connect"), True), - ("2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", capa.features.insn.API("__libc_connect"), True), + ( + "2bf18d", + "function=0x4027b3,bb=0x402861,insn=0x40286d", + capa.features.insn.API("__GI_connect"), + True, + ), + ( + "2bf18d", + "function=0x4027b3,bb=0x402861,insn=0x40286d", + capa.features.insn.API("connect"), + True, + ), + ( + "2bf18d", + "function=0x4027b3,bb=0x402861,insn=0x40286d", + capa.features.insn.API("__libc_connect"), + True, + ), ] FEATURE_PRESENCE_TESTS_DOTNET = sorted( From 0d42ac39120c5056d70aa0f82292c06af5536e29 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 1 Jun 2023 02:14:25 +0100 Subject: [PATCH 13/95] add missing function-name feature testing --- tests/fixtures.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index 5f02116c..5f79b451 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -770,18 +770,36 @@ FEATURE_SYMTAB_FUNC_TESTS = [ capa.features.insn.API("__GI_connect"), True, ), + ( + "2bf18d", + "function=0x4027b3,bb=0x402861,insn=0x40286d", + capa.features.file.FunctionName("__GI_connect"), + True, + ), ( "2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", capa.features.insn.API("connect"), True, ), + ( + "2bf18d", + "function=0x4027b3,bb=0x402861,insn=0x40286d", + capa.features.file.FunctionName("connect"), + True, + ), ( "2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", capa.features.insn.API("__libc_connect"), True, ), + ( + "2bf18d", + "function=0x4027b3,bb=0x402861,insn=0x40286d", + capa.features.file.FunctionName("__libc_connect"), + True, + ), ] FEATURE_PRESENCE_TESTS_DOTNET = sorted( From 57386812f92c7275d33026333a6f0f58b9f25bce Mon Sep 17 00:00:00 2001 From: Yacine Elhamer <16624109+yelhamer@users.noreply.github.com> Date: Thu, 1 Jun 2023 10:26:21 +0100 Subject: [PATCH 14/95] use ELF class member instead of vsGetField() Co-authored-by: Willi Ballenthin --- capa/features/extractors/viv/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 730472e2..cf5263ce 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -132,7 +132,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato SHT_SYMTAB = 0x2 for section in elf.sections: - if section.vsGetField("sh_info") & SHT_SYMTAB != 0: + if section.sh_info & SHT_SYMTAB: strtab = elf.sections[section.vsGetField("sh_link")] sh_symtab = Shdr.from_viv(section, elf.getSectionBytes(section.name)) sh_strtab = Shdr.from_viv(strtab, elf.getSectionBytes(strtab.name)) From ffb1cb31288c76d7796f991e997f62f3eea66146 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer <16624109+yelhamer@users.noreply.github.com> Date: Thu, 1 Jun 2023 10:26:40 +0100 Subject: [PATCH 15/95] rename strtab to strtab_section Co-authored-by: Willi Ballenthin --- capa/features/extractors/viv/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index cf5263ce..d5492741 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -133,7 +133,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato SHT_SYMTAB = 0x2 for section in elf.sections: if section.sh_info & SHT_SYMTAB: - strtab = elf.sections[section.vsGetField("sh_link")] + strtab_section = elf.sections[section.vsGetField("sh_link")] sh_symtab = Shdr.from_viv(section, elf.getSectionBytes(section.name)) sh_strtab = Shdr.from_viv(strtab, elf.getSectionBytes(strtab.name)) From ab089c024de9cc7004bf3f51ea525c7241bb438f Mon Sep 17 00:00:00 2001 From: Yacine Elhamer <16624109+yelhamer@users.noreply.github.com> Date: Thu, 1 Jun 2023 11:46:39 +0100 Subject: [PATCH 16/95] fetch section data by offset (not name) Co-authored-by: Willi Ballenthin --- capa/features/extractors/viv/insn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index d5492741..ce2c2f01 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -134,8 +134,8 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato for section in elf.sections: if section.sh_info & SHT_SYMTAB: strtab_section = elf.sections[section.vsGetField("sh_link")] - sh_symtab = Shdr.from_viv(section, elf.getSectionBytes(section.name)) - sh_strtab = Shdr.from_viv(strtab, elf.getSectionBytes(strtab.name)) + sh_symtab = Shdr.from_viv(section, elf.readAtOffset(section.sh_offset, section.sh_size)) + sh_strtab = Shdr.from_viv(strtab, elf.readAtOffset(strtab.sh_offset, strtab.sh_size)) symtab = SymTab(endian, bitness, sh_symtab, sh_strtab) f.vw.metadata["SymbolTable"] = symtab From f9291d4e501bd0b5224cbe2395dcf8c32f880742 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 1 Jun 2023 12:45:10 +0100 Subject: [PATCH 17/95] extract symtab-api names before processing library functions --- capa/features/extractors/viv/insn.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index ce2c2f01..74f0132a 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -111,17 +111,6 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if not target: return - if viv_utils.flirt.is_library_function(f.vw, target): - name = viv_utils.get_function_name(f.vw, target) - yield API(name), ih.address - if name.startswith("_"): - # some linkers may prefix linked routines with a `_` to avoid name collisions. - # extract features for both the mangled and un-mangled representations. - # e.g. `_fwrite` -> `fwrite` - # see: https://stackoverflow.com/a/2628384/87207 - yield API(name[1:]), ih.address - return - if f.vw.metadata["Format"] == "elf": if "SymbolTable" not in f.vw.metadata: # the symbol table gets stored as a function's attribute in order to avoid running @@ -151,6 +140,17 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato yield API(sym_name), ih.address yield FunctionName(sym_name), ih.address + if viv_utils.flirt.is_library_function(f.vw, target): + name = viv_utils.get_function_name(f.vw, target) + yield API(name), ih.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield API(name[1:]), ih.address + return + for _ in range(THUNK_CHAIN_DEPTH_DELTA): if target in imports: dll, symbol = imports[target] From 994edf66fee042fb5f99345bb6205f7f31a9d327 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 1 Jun 2023 12:45:49 +0100 Subject: [PATCH 18/95] return the target's address for the function-name feature --- capa/features/extractors/viv/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 74f0132a..489e95c7 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -138,7 +138,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato STT_FUNC = 0x2 if sym_value == target and sym_info & STT_FUNC != 0: yield API(sym_name), ih.address - yield FunctionName(sym_name), ih.address + yield FunctionName(sym_name), target if viv_utils.flirt.is_library_function(f.vw, target): name = viv_utils.get_function_name(f.vw, target) From 445214b23bd63799a83132c8243185f57529d8c4 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Wed, 31 May 2023 13:30:55 +0530 Subject: [PATCH 19/95] Update Metadata type in capa main --- CHANGELOG.md | 1 + capa/ida/helpers.py | 60 +++++++++--------- capa/ida/plugin/form.py | 5 +- capa/main.py | 77 ++++++++++++++---------- capa/render/result_document.py | 69 +++++++++------------ scripts/bulk-process.py | 4 +- scripts/capa_as_library.py | 4 +- scripts/show-capabilities-by-function.py | 4 +- tests/test_result_document.py | 2 +- 9 files changed, 117 insertions(+), 109 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18b951c9..54270b1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - better handle exceptional cases when parsing ELF files [#1458](https://github.com/mandiant/capa/issues/1458) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - Improved testing coverage for Binary Ninja Backend [#1446](https://github.com/mandiant/capa/issues/1446) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - Add logging and print redirect to tqdm for capa main [#749](https://github.com/mandiant/capa/issues/749) [@Aayush-Goel-04](https://github.com/aayush-goel-04) +- Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat - extractor: fix binja installation path detection does not work with Python 3.11 ### capa explorer IDA Pro plugin diff --git a/capa/ida/helpers.py b/capa/ida/helpers.py index fbd502fe..46e8907e 100644 --- a/capa/ida/helpers.py +++ b/capa/ida/helpers.py @@ -140,37 +140,39 @@ def collect_metadata(rules): else: os = "unknown os" - return { - "timestamp": datetime.datetime.now().isoformat(), - "argv": [], - "sample": { - "md5": md5, - "sha1": "", # not easily accessible - "sha256": sha256, - "path": idaapi.get_input_file_path(), - }, - "analysis": { - "format": idaapi.get_file_type_name(), - "arch": arch, - "os": os, - "extractor": "ida", - "rules": rules, - "base_address": idaapi.get_imagebase(), - "layout": { - # this is updated after capabilities have been collected. - # will look like: - # - # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... } + return capa.render.result_document.Metadata.from_capa( + { + "timestamp": datetime.datetime.now().isoformat(), + "argv": [], + "sample": { + "md5": md5, + "sha1": "", # not easily accessible + "sha256": sha256, + "path": idaapi.get_input_file_path(), }, - # ignore these for now - not used by IDA plugin. - "feature_counts": { - "file": {}, - "functions": {}, + "analysis": { + "format": idaapi.get_file_type_name(), + "arch": arch, + "os": os, + "extractor": "ida", + "rules": rules, + "base_address": idaapi.get_imagebase(), + "layout": { + # this is updated after capabilities have been collected. + # will look like: + # + # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... } + }, + # ignore these for now - not used by IDA plugin. + "feature_counts": { + "file": {}, + "functions": {}, + }, + "library_functions": {}, }, - "library_functions": {}, - }, - "version": capa.version.__version__, - } + "version": capa.version.__version__, + } + ) class IDAIO: diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 30f41f9f..cf1434fa 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -771,8 +771,9 @@ class CapaExplorerForm(idaapi.PluginForm): try: meta = capa.ida.helpers.collect_metadata([settings.user[CAPA_SETTINGS_RULE_PATH]]) capabilities, counts = capa.main.find_capabilities(ruleset, extractor, disable_progress=True) - meta["analysis"].update(counts) - meta["analysis"]["layout"] = capa.main.compute_layout(ruleset, extractor, capabilities) + meta.analysis.__dict__.update(counts) + meta.analysis.__dict__.update(capa.main.compute_layout(ruleset, extractor, capabilities)) + meta = capa.render.result_document.Metadata.from_capa(meta.dict()) except UserCancelledError: logger.info("User cancelled analysis.") return False diff --git a/capa/main.py b/capa/main.py index b305673c..722597a5 100644 --- a/capa/main.py +++ b/capa/main.py @@ -41,6 +41,8 @@ import capa.features.common import capa.features.freeze import capa.render.vverbose import capa.features.extractors +import capa.render.result_document +import capa.render.result_document as rdoc import capa.features.extractors.common import capa.features.extractors.pefile import capa.features.extractors.dnfile_ @@ -315,6 +317,11 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro all_file_matches.items(), ) } + meta["feature_counts"] = rdoc.FeatureCounts.from_capa(meta["feature_counts"]) + meta["library_functions"] = tuple( + rdoc.LibraryFunction(address=capa.features.freeze.Address.from_capa(address), name=name) + for address, name in meta["library_functions"].items() + ) return matches, meta @@ -739,7 +746,7 @@ def collect_metadata( os_: str, rules_path: List[str], extractor: capa.features.extractors.base_extractor.FeatureExtractor, -): +) -> rdoc.Metadata: md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() @@ -758,34 +765,39 @@ def collect_metadata( arch = get_arch(sample_path) os_ = get_os(sample_path) if os_ == OS_AUTO else os_ - return { - "timestamp": datetime.datetime.now().isoformat(), - "version": capa.version.__version__, - "argv": argv, - "sample": { - "md5": md5.hexdigest(), - "sha1": sha1.hexdigest(), - "sha256": sha256.hexdigest(), - "path": os.path.normpath(sample_path), - }, - "analysis": { - "format": format_, - "arch": arch, - "os": os_, - "extractor": extractor.__class__.__name__, - "rules": rules_path, - "base_address": extractor.get_base_address(), - "layout": { - # this is updated after capabilities have been collected. - # will look like: - # - # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... } + return rdoc.Metadata.from_capa( + { + "timestamp": datetime.datetime.now().isoformat(), + "version": capa.version.__version__, + "argv": argv, + "sample": { + "md5": md5.hexdigest(), + "sha1": sha1.hexdigest(), + "sha256": sha256.hexdigest(), + "path": os.path.normpath(sample_path), }, - }, - } + "analysis": { + "format": format_, + "arch": arch, + "os": os_, + "extractor": extractor.__class__.__name__, + "rules": rules_path, + "base_address": extractor.get_base_address(), + "layout": { + "functions": {}, + # this is updated after capabilities have been collected. + # will look like: + # + # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... } + }, + "feature_counts": {"file": 0, "functions": {}}, + "library_functions": {}, + }, + } + ) -def compute_layout(rules, extractor, capabilities): +def compute_layout(rules, extractor, capabilities) -> Dict[str, rdoc.Layout]: """ compute a metadata structure that links basic blocks to the functions in which they're found. @@ -822,7 +834,7 @@ def compute_layout(rules, extractor, capabilities): } } - return layout + return {"layout": rdoc.Layout.from_capa(layout)} def install_common_args(parser, wanted=None): @@ -1198,7 +1210,7 @@ def main(argv=None): return E_FILE_LIMITATION # TODO: #1411 use a real type, not a dict here. - meta: Dict[str, Any] + meta: rdoc.Metadata capabilities: MatchResults counts: Dict[str, Any] @@ -1255,15 +1267,15 @@ def main(argv=None): meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor) capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet) - meta["analysis"].update(counts) - meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities) + + meta.analysis.__dict__.update(counts) + meta.analysis.__dict__.update(compute_layout(rules, extractor, capabilities)) if has_file_limitation(rules, capabilities): # bail if capa encountered file limitation e.g. a packed binary # do show the output in verbose mode, though. if not (args.verbose or args.vverbose or args.json): return E_FILE_LIMITATION - if args.json: print(capa.render.json.render(meta, rules, capabilities)) elif args.vverbose: @@ -1308,7 +1320,8 @@ def ida_main(): meta = capa.ida.helpers.collect_metadata([rules_path]) capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.extractor.IdaFeatureExtractor()) - meta["analysis"].update(counts) + + meta.analysis.__dict__.update(counts) if has_file_limitation(rules, capabilities, is_standalone=False): capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis") diff --git a/capa/render/result_document.py b/capa/render/result_document.py index cef49d12..9680144d 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -47,6 +47,20 @@ class FunctionLayout(FrozenModel): class Layout(FrozenModel): functions: Tuple[FunctionLayout, ...] + @classmethod + def from_capa(cls, layout: dict) -> "Layout": + return cls( + functions=tuple( + FunctionLayout( + address=frz.Address.from_capa(address), + matched_basic_blocks=tuple( + BasicBlockLayout(address=frz.Address.from_capa(bb)) for bb in f["matched_basic_blocks"] + ), + ) + for address, f in layout["functions"].items() + ) + ) + class LibraryFunction(FrozenModel): address: frz.Address @@ -62,6 +76,16 @@ class FeatureCounts(FrozenModel): file: int functions: Tuple[FunctionFeatureCount, ...] + @classmethod + def from_capa(cls, feature_counts: dict) -> "FeatureCounts": + return cls( + file=feature_counts["file"], + functions=tuple( + FunctionFeatureCount(address=frz.Address.from_capa(address), count=count) + for address, count in feature_counts["functions"].items() + ), + ) + class Analysis(FrozenModel): format: str @@ -83,7 +107,7 @@ class Metadata(FrozenModel): analysis: Analysis @classmethod - def from_capa(cls, meta: Any) -> "Metadata": + def from_capa(cls, meta: dict) -> "Metadata": return cls( timestamp=meta["timestamp"], version=meta["version"], @@ -126,41 +150,6 @@ class Metadata(FrozenModel): ), ) - def to_capa(self) -> Dict[str, Any]: - capa_meta = { - "timestamp": self.timestamp.isoformat(), - "version": self.version, - "sample": { - "md5": self.sample.md5, - "sha1": self.sample.sha1, - "sha256": self.sample.sha256, - "path": self.sample.path, - }, - "analysis": { - "format": self.analysis.format, - "arch": self.analysis.arch, - "os": self.analysis.os, - "extractor": self.analysis.extractor, - "rules": self.analysis.rules, - "base_address": self.analysis.base_address.to_capa(), - "layout": { - "functions": { - f.address.to_capa(): { - "matched_basic_blocks": [bb.address.to_capa() for bb in f.matched_basic_blocks] - } - for f in self.analysis.layout.functions - } - }, - "feature_counts": { - "file": self.analysis.feature_counts.file, - "functions": {fc.address.to_capa(): fc.count for fc in self.analysis.feature_counts.functions}, - }, - "library_functions": {lf.address.to_capa(): lf.name for lf in self.analysis.library_functions}, - }, - } - - return capa_meta - class CompoundStatementType: AND = "and" @@ -659,10 +648,12 @@ class ResultDocument(FrozenModel): ), ) + if isinstance(meta, Metadata): + return ResultDocument(meta=meta, rules=rule_matches) + return ResultDocument(meta=Metadata.from_capa(meta), rules=rule_matches) - def to_capa(self) -> Tuple[Dict, Dict]: - meta = self.meta.to_capa() + def to_capa(self) -> Tuple[Metadata, Dict]: capabilities: Dict[ str, List[Tuple[capa.features.address.Address, capa.features.common.Result]] ] = collections.defaultdict(list) @@ -678,4 +669,4 @@ class ResultDocument(FrozenModel): capabilities[rule_name].append((addr.to_capa(), result)) - return meta, capabilities + return self.meta, capabilities diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 51834a9a..6c9114da 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -131,8 +131,8 @@ def get_capa_results(args): meta = capa.main.collect_metadata([], path, format, os_, [], extractor) capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True) - meta["analysis"].update(counts) - meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) + meta.analysis.__dict__.update(counts) + meta.analysis.__dict__.update(capa.main.compute_layout(rules, extractor, capabilities)) doc = rd.ResultDocument.from_capa(meta, rules, capabilities) diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index f15ca3b4..f97dc2de 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -172,8 +172,8 @@ def capa_details(rules_path, file_path, output_format="dictionary"): # collect metadata (used only to make rendering more complete) meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, rules_path, extractor) - meta["analysis"].update(counts) - meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) + meta.analysis.__dict__.update(counts) + meta.analysis.__dict__.update(capa.main.compute_layout(rules, extractor, capabilities)) capa_output: Any = False if output_format == "dictionary": diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 3f37269b..5ed38a46 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -178,8 +178,8 @@ def main(argv=None): meta = capa.main.collect_metadata(argv, args.sample, format_, args.os, args.rules, extractor) capabilities, counts = capa.main.find_capabilities(rules, extractor) - meta["analysis"].update(counts) - meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) + meta.analysis.__dict__.update(counts) + meta.analysis.__dict__.update(capa.main.compute_layout(rules, extractor, capabilities)) if capa.main.has_file_limitation(rules, capabilities): # bail if capa encountered file limitation e.g. a packed binary diff --git a/tests/test_result_document.py b/tests/test_result_document.py index bd074c6b..5ae9af26 100644 --- a/tests/test_result_document.py +++ b/tests/test_result_document.py @@ -282,5 +282,5 @@ def test_rdoc_to_capa(): rd = rdoc.ResultDocument.parse_file(path) meta, capabilites = rd.to_capa() - assert isinstance(meta, dict) + assert isinstance(meta, rdoc.Metadata) assert isinstance(capabilites, dict) From 0d9e74028eff26d8e6e2906b9d5d2f391e66ebb2 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 2 Jun 2023 01:19:42 +0530 Subject: [PATCH 20/95] Update Metadata --- capa/ida/plugin/form.py | 1 - capa/render/result_document.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index cf1434fa..f74c0c76 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -773,7 +773,6 @@ class CapaExplorerForm(idaapi.PluginForm): capabilities, counts = capa.main.find_capabilities(ruleset, extractor, disable_progress=True) meta.analysis.__dict__.update(counts) meta.analysis.__dict__.update(capa.main.compute_layout(ruleset, extractor, capabilities)) - meta = capa.render.result_document.Metadata.from_capa(meta.dict()) except UserCancelledError: logger.info("User cancelled analysis.") return False diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 9680144d..d1656e1f 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -631,7 +631,7 @@ class ResultDocument(FrozenModel): rules: Dict[str, RuleMatches] @classmethod - def from_capa(cls, meta, rules: RuleSet, capabilities: MatchResults) -> "ResultDocument": + def from_capa(cls, meta: Metadata, rules: RuleSet, capabilities: MatchResults) -> "ResultDocument": rule_matches: Dict[str, RuleMatches] = {} for rule_name, matches in capabilities.items(): rule = rules[rule_name] From 8d1e1cc54c9e4a0326609a9832fed706decd926e Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 1 Jun 2023 21:56:34 +0100 Subject: [PATCH 21/95] fix strtab naming --- capa/features/extractors/viv/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 489e95c7..9c31394e 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -124,7 +124,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if section.sh_info & SHT_SYMTAB: strtab_section = elf.sections[section.vsGetField("sh_link")] sh_symtab = Shdr.from_viv(section, elf.readAtOffset(section.sh_offset, section.sh_size)) - sh_strtab = Shdr.from_viv(strtab, elf.readAtOffset(strtab.sh_offset, strtab.sh_size)) + sh_strtab = Shdr.from_viv(strtab_section, elf.readAtOffset(strtab.sh_offset, strtab.sh_size)) symtab = SymTab(endian, bitness, sh_symtab, sh_strtab) f.vw.metadata["SymbolTable"] = symtab From d85d01eea161c1210c2cca8414f1d675f295bc9f Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 1 Jun 2023 22:15:47 +0100 Subject: [PATCH 22/95] use the function-handle's cache instead of the VivWorkspace file metadata --- capa/features/extractors/viv/extractor.py | 3 ++- capa/features/extractors/viv/insn.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index 4d877ab2..ac936142 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -49,8 +49,9 @@ class VivisectFeatureExtractor(FeatureExtractor): yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf) def get_functions(self) -> Iterator[FunctionHandle]: + cache = {} for va in sorted(self.vw.getFunctions()): - yield FunctionHandle(address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va)) + yield FunctionHandle(address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache}) def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: yield from capa.features.extractors.viv.function.extract_features(fh) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 9c31394e..cf3dfa61 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -112,7 +112,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato return if f.vw.metadata["Format"] == "elf": - if "SymbolTable" not in f.vw.metadata: + if "symtab" not in fh.ctx["cache"]: # the symbol table gets stored as a function's attribute in order to avoid running # this code everytime the call is made, thus preventing the computational overhead. elf = f.vw.parsedbin @@ -127,9 +127,9 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato sh_strtab = Shdr.from_viv(strtab_section, elf.readAtOffset(strtab.sh_offset, strtab.sh_size)) symtab = SymTab(endian, bitness, sh_symtab, sh_strtab) - f.vw.metadata["SymbolTable"] = symtab + fh.ctx["cache"]["symtab"] = symtab - symtab = f.vw.metadata["SymbolTable"] + symtab = fh.ctx["cache"]["symtab"] for symbol in symtab.get_symbols(): sym_name = symtab.get_name(symbol) sym_value = symbol.value From 1cec768521bc59ed988d8a4c4596df571309a26d Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 1 Jun 2023 22:20:23 +0100 Subject: [PATCH 23/95] fix strtab renaming error --- capa/features/extractors/viv/insn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index cf3dfa61..cfb2fcc0 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -124,7 +124,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if section.sh_info & SHT_SYMTAB: strtab_section = elf.sections[section.vsGetField("sh_link")] sh_symtab = Shdr.from_viv(section, elf.readAtOffset(section.sh_offset, section.sh_size)) - sh_strtab = Shdr.from_viv(strtab_section, elf.readAtOffset(strtab.sh_offset, strtab.sh_size)) + sh_strtab = Shdr.from_viv(strtab_section, elf.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)) symtab = SymTab(endian, bitness, sh_symtab, sh_strtab) fh.ctx["cache"]["symtab"] = symtab From 236c1c9d17a20947058ce5e3a7ed9e34b4e403b9 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Fri, 2 Jun 2023 10:40:47 +0200 Subject: [PATCH 24/95] tests: refine the IDA test runner ref #1364 --- .github/mypy/mypy.ini | 3 ++ tests/test_ida_features.py | 84 ++++++++++++++++++++++++++++++++------ 2 files changed, 75 insertions(+), 12 deletions(-) diff --git a/.github/mypy/mypy.ini b/.github/mypy/mypy.ini index c80af3de..603f2e42 100644 --- a/.github/mypy/mypy.ini +++ b/.github/mypy/mypy.ini @@ -42,6 +42,9 @@ ignore_missing_imports = True [mypy-idautils.*] ignore_missing_imports = True +[mypy-ida_auto.*] +ignore_missing_imports = True + [mypy-ida_bytes.*] ignore_missing_imports = True diff --git a/tests/test_ida_features.py b/tests/test_ida_features.py index b6917262..99e7d5a1 100644 --- a/tests/test_ida_features.py +++ b/tests/test_ida_features.py @@ -1,5 +1,50 @@ -# run this script from within IDA with ./tests/data/mimikatz.exe open +""" +run this script from within IDA to test the IDA feature extractor. +you must have loaded a file referenced by a test case in order +for this to do anything meaningful. for example, mimikatz.exe from testfiles. + +you can invoke from the command line like this: + + & 'C:\\Program Files\\IDA Pro 8.2\\idat.exe' \ + -S"C:\\Exclusions\\code\\capa\\tests\\test_ida_features.py --CAPA_AUTOEXIT=true" \ + -A \ + -Lidalog \ + 'C:\\Exclusions\\code\\capa\\tests\\data\\mimikatz.exe_' + +if you invoke from the command line, and provide the script argument `--CAPA_AUTOEXIT=true`, +then the script will exit IDA after running the tests. + +the output (in idalog) will look like this: + +``` +Loading processor module C:\\Program Files\\IDA Pro 8.2\\procs\\pc.dll for metapc...Initializing processor module metapc...OK +Loading type libraries... +Autoanalysis subsystem has been initialized. +Database for file 'mimikatz.exe_' has been loaded. +-------------------------------------------------------------------------------- +PASS: test_ida_feature_counts/mimikatz-function=0x40E5C2-basic block-7 +PASS: test_ida_feature_counts/mimikatz-function=0x4702FD-characteristic(calls from)-0 +SKIP: test_ida_features/294b8d...-function=0x404970,bb=0x404970,insn=0x40499F-string(\r\n\x00:ht)-False +SKIP: test_ida_features/64d9f-function=0x10001510,bb=0x100015B0-offset(0x4000)-True +... +SKIP: test_ida_features/pma16-01-function=0x404356,bb=0x4043B9-arch(i386)-True +PASS: test_ida_features/mimikatz-file-import(cabinet.FCIAddFile)-True +DONE +C:\\Exclusions\\code\\capa\\tests\\test_ida_features.py: Traceback (most recent call last): + File "C:\\Program Files\\IDA Pro 8.2\\python\\3\\ida_idaapi.py", line 588, in IDAPython_ExecScript + exec(code, g) + File "C:/Exclusions/code/capa/tests/test_ida_features.py", line 120, in + sys.exit(0) +SystemExit: 0 + -> OK +Flushing buffers, please wait...ok +``` + +Look for lines that start with "FAIL" to identify test failures. +""" +import io import sys +import inspect import logging import os.path import binascii @@ -35,8 +80,6 @@ def check_input_file(wanted): def get_ida_extractor(_path): - check_input_file("5f66b82558ca92e54e77f216ef4c066c") - # have to import this inline so pytest doesn't bail outside of IDA import capa.features.extractors.ida.extractor @@ -45,13 +88,15 @@ def get_ida_extractor(_path): @pytest.mark.skip(reason="IDA Pro tests must be run within IDA") def test_ida_features(): + # we're guaranteed to be in a function here, so there's a stack frame + this_name = inspect.currentframe().f_code.co_name # type: ignore for sample, scope, feature, expected in fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_PRESENCE_TESTS_IDA: id = fixtures.make_test_id((sample, scope, feature, expected)) try: check_input_file(fixtures.get_sample_md5_by_name(sample)) except RuntimeError: - print(f"SKIP {id}") + yield this_name, id, "skip", None continue scope = fixtures.resolve_scope(scope) @@ -60,21 +105,24 @@ def test_ida_features(): try: fixtures.do_test_feature_presence(get_ida_extractor, sample, scope, feature, expected) except Exception as e: - print(f"FAIL {id}") - traceback.print_exc() + f = io.StringIO() + traceback.print_exc(file=f) + yield this_name, id, "fail", f.getvalue() else: - print(f"OK {id}") + yield this_name, id, "pass", None @pytest.mark.skip(reason="IDA Pro tests must be run within IDA") def test_ida_feature_counts(): + # we're guaranteed to be in a function here, so there's a stack frame + this_name = inspect.currentframe().f_code.co_name # type: ignore for sample, scope, feature, expected in fixtures.FEATURE_COUNT_TESTS: id = fixtures.make_test_id((sample, scope, feature, expected)) try: check_input_file(fixtures.get_sample_md5_by_name(sample)) except RuntimeError: - print(f"SKIP {id}") + yield this_name, id, "skip", None continue scope = fixtures.resolve_scope(scope) @@ -83,13 +131,19 @@ def test_ida_feature_counts(): try: fixtures.do_test_feature_count(get_ida_extractor, sample, scope, feature, expected) except Exception as e: - print(f"FAIL {id}") - traceback.print_exc() + f = io.StringIO() + traceback.print_exc(file=f) + yield this_name, id, "fail", f.getvalue() else: - print(f"OK {id}") + yield this_name, id, "pass", None if __name__ == "__main__": + import idc + import ida_auto + + ida_auto.auto_wait() + print("-" * 80) # invoke all functions in this module that start with `test_` @@ -100,6 +154,12 @@ if __name__ == "__main__": test = getattr(sys.modules[__name__], name) logger.debug("invoking test: %s", name) sys.stderr.flush() - test() + for name, id, state, info in test(): + print(f"{state.upper()}: {name}/{id}") + if info: + print(info) print("DONE") + + if "--CAPA_AUTOEXIT=true" in idc.ARGV: + sys.exit(0) From 8b867836e9060133147ca403373b59487a42688e Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Fri, 2 Jun 2023 10:45:05 +0200 Subject: [PATCH 25/95] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fe6ae2b..6d6ebee6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ - Improved testing coverage for Binary Ninja Backend [#1446](https://github.com/mandiant/capa/issues/1446) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - Add logging and print redirect to tqdm for capa main [#749](https://github.com/mandiant/capa/issues/749) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - extractor: fix binja installation path detection does not work with Python 3.11 +- tests: refine the IDA test runner script #1513 @williballenthin ### capa explorer IDA Pro plugin From 5ded85f46e11805bac2ff2316cce7a4ad1674118 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Fri, 2 Jun 2023 14:54:36 +0530 Subject: [PATCH 26/95] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffa504a0..822d5b03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04) ### Breaking Changes +- Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat ### New Rules (7) @@ -27,7 +28,6 @@ - better handle exceptional cases when parsing ELF files [#1458](https://github.com/mandiant/capa/issues/1458) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - Improved testing coverage for Binary Ninja Backend [#1446](https://github.com/mandiant/capa/issues/1446) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - Add logging and print redirect to tqdm for capa main [#749](https://github.com/mandiant/capa/issues/749) [@Aayush-Goel-04](https://github.com/aayush-goel-04) -- Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat - extractor: fix binja installation path detection does not work with Python 3.11 ### capa explorer IDA Pro plugin From dde76e301df8fe3a01b26504e890bf38f3c334dd Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 2 Jun 2023 12:15:05 +0100 Subject: [PATCH 27/95] add a method to construct SymTab objects from Elf objects --- capa/features/extractors/elf.py | 32 ++++++++++++---------------- capa/features/extractors/viv/insn.py | 14 +----------- 2 files changed, 15 insertions(+), 31 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index a32b5761..7818f507 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -91,24 +91,6 @@ class Shdr: entsize: int buf: bytes - @classmethod - def from_viv(cls, section, buf: bytes): - """ - construct a Shdr object from vivisect's representation of - section headers (Elf.Elf32Section or Elf.Elf64Section) - """ - return cls( - section.sh_name, - section.sh_type, - section.sh_flags, - section.sh_addr, - section.sh_offset, - section.sh_size, - section.sh_link, - section.sh_entsize, - buf, - ) - class ELF: def __init__(self, f: BinaryIO): @@ -713,6 +695,20 @@ class SymTab: for symbol in self.symbols: yield symbol + @classmethod + def from_Elf(cls, ElfBinary) -> "SymTab": + endian = "<" if ElfBinary.getEndian() == 0 else ">" + bitness = ElfBinary.bits + + SHT_SYMTAB = 0x2 + for section in ElfBinary.sections: + if section.sh_info & SHT_SYMTAB: + strtab_section = ElfBinary.sections[section.vsGetField("sh_link")] + sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size)) + sh_strtab = Shdr.from_viv(strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)) + + return cls(endian, bitness, sh_symtab, sh_strtab) + def guess_os_from_osabi(elf: ELF) -> Optional[OS]: return elf.ei_osabi diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index cfb2fcc0..73bc4548 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -115,19 +115,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if "symtab" not in fh.ctx["cache"]: # the symbol table gets stored as a function's attribute in order to avoid running # this code everytime the call is made, thus preventing the computational overhead. - elf = f.vw.parsedbin - endian = "<" if elf.getEndian() == 0 else ">" - bitness = elf.bits - - SHT_SYMTAB = 0x2 - for section in elf.sections: - if section.sh_info & SHT_SYMTAB: - strtab_section = elf.sections[section.vsGetField("sh_link")] - sh_symtab = Shdr.from_viv(section, elf.readAtOffset(section.sh_offset, section.sh_size)) - sh_strtab = Shdr.from_viv(strtab_section, elf.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)) - - symtab = SymTab(endian, bitness, sh_symtab, sh_strtab) - fh.ctx["cache"]["symtab"] = symtab + fh.ctx["cache"]["symtab"] = SymTab.from_Elf(f.vw.parsedbin) symtab = fh.ctx["cache"]["symtab"] for symbol in symtab.get_symbols(): From 9467ee6f103599e9ebc585a2ad68e832bb7fbdfd Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 2 Jun 2023 14:42:04 +0100 Subject: [PATCH 28/95] add FunctionName extraction at the function scope --- capa/features/extractors/viv/function.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index 50d5792e..ab35fa7b 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -30,6 +30,23 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature raise NotImplementedError +def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + if fh.inner.vw.metadata["Format"] == "Elf": + # the file's symbol table gets added to the metadata of the vivisect workspace. + # this is in order to eliminate the computational overhead of refetching symtab each time. + fh.ctx["cache"]["symtab"] = SymTab.from_Elf(fh.inner.vw.parsedbin) + + symtab = fh.ctx["cache"]["symtab"] + for symbol in symtab.get_symbols(): + sym_name = symtab.get_name(symbol) + sym_value = symbol.value + sym_info = symbol.info + + STT_FUNC = 0x2 + if sym_value == fh.address and sym_info & STT_FUNC != 0: + yield FunctionName(sym_name), fh.address + + def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: f: viv_utils.Function = fhandle.inner for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE): @@ -79,4 +96,8 @@ def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: yield feature, addr -FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop) +FUNCTION_HANDLERS = ( + extract_function_symtab_names, + extract_function_calls_to, + extract_function_loop, +) From 41c512624bd2315922f9732d29ef4b02efc99a27 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 2 Jun 2023 14:44:51 +0100 Subject: [PATCH 29/95] update symtab-based FunctionName feature extraction --- tests/fixtures.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 5f79b451..2f074f4b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -770,24 +770,12 @@ FEATURE_SYMTAB_FUNC_TESTS = [ capa.features.insn.API("__GI_connect"), True, ), - ( - "2bf18d", - "function=0x4027b3,bb=0x402861,insn=0x40286d", - capa.features.file.FunctionName("__GI_connect"), - True, - ), ( "2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", capa.features.insn.API("connect"), True, ), - ( - "2bf18d", - "function=0x4027b3,bb=0x402861,insn=0x40286d", - capa.features.file.FunctionName("connect"), - True, - ), ( "2bf18d", "function=0x4027b3,bb=0x402861,insn=0x40286d", @@ -796,7 +784,19 @@ FEATURE_SYMTAB_FUNC_TESTS = [ ), ( "2bf18d", - "function=0x4027b3,bb=0x402861,insn=0x40286d", + "function=0x40286d", + capa.features.file.FunctionName("__GI_connect"), + True, + ), + ( + "2bf18d", + "function=0x40286d", + capa.features.file.FunctionName("connect"), + True, + ), + ( + "2bf18d", + "function=0x40286d", capa.features.file.FunctionName("__libc_connect"), True, ), From 0b834a162345997af6fc5222de7f5f0b4a509e64 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer <16624109+yelhamer@users.noreply.github.com> Date: Fri, 2 Jun 2023 15:56:14 +0100 Subject: [PATCH 30/95] delete functionName extraction at instruction level Co-authored-by: Willi Ballenthin --- capa/features/extractors/viv/insn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 73bc4548..c5bcd4ca 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -126,7 +126,6 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato STT_FUNC = 0x2 if sym_value == target and sym_info & STT_FUNC != 0: yield API(sym_name), ih.address - yield FunctionName(sym_name), target if viv_utils.flirt.is_library_function(f.vw, target): name = viv_utils.get_function_name(f.vw, target) From 4976375d749094ff0bd2989feb85e4ec3b25eb6a Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 2 Jun 2023 16:30:17 +0100 Subject: [PATCH 31/95] elf.py: fix identation error --- capa/features/extractors/elf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 7818f507..1fb809fc 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -707,7 +707,7 @@ class SymTab: sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size)) sh_strtab = Shdr.from_viv(strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)) - return cls(endian, bitness, sh_symtab, sh_strtab) + return cls(endian, bitness, sh_symtab, sh_strtab) def guess_os_from_osabi(elf: ELF) -> Optional[OS]: From 151ef95b79edcae6b746c5db8436a3e611cb1a20 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 2 Jun 2023 17:14:44 +0100 Subject: [PATCH 32/95] remove usage of vsGetField --- capa/features/extractors/elf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 1fb809fc..4a5fd0e7 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -703,7 +703,7 @@ class SymTab: SHT_SYMTAB = 0x2 for section in ElfBinary.sections: if section.sh_info & SHT_SYMTAB: - strtab_section = ElfBinary.sections[section.vsGetField("sh_link")] + strtab_section = ElfBinary.sections[section.sh_link] sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size)) sh_strtab = Shdr.from_viv(strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)) From 764fda8e7b1feaee6aa5c6d58cd88a39a1733a8c Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 2 Jun 2023 17:57:37 +0100 Subject: [PATCH 33/95] add missing Shdr.from_viv() method --- capa/features/extractors/elf.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 4a5fd0e7..4aaf0d8e 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -91,6 +91,21 @@ class Shdr: entsize: int buf: bytes + @classmethod + def from_viv(cls, section, buf: bytes): + return cls( + section.sh_name, + section.sh_type, + section.sh_flags, + section.sh_addr, + section.sh_offset, + section.sh_size, + section.sh_link, + section.sh_entsize, + buf, + ) + + class ELF: def __init__(self, f: BinaryIO): From 6b2710ac7ecb0f176b4f8a4399fd3ebb395862ea Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 2 Jun 2023 22:43:58 +0100 Subject: [PATCH 34/95] fix broken logic in extract_function_symtab_names() --- capa/features/extractors/viv/function.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index ab35fa7b..a75f9311 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -34,17 +34,18 @@ def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, if fh.inner.vw.metadata["Format"] == "Elf": # the file's symbol table gets added to the metadata of the vivisect workspace. # this is in order to eliminate the computational overhead of refetching symtab each time. - fh.ctx["cache"]["symtab"] = SymTab.from_Elf(fh.inner.vw.parsedbin) + if "symtab" not in fh.ctx["cache"]: + fh.ctx["cache"]["symtab"] = SymTab.from_Elf(fh.inner.vw.parsedbin) - symtab = fh.ctx["cache"]["symtab"] - for symbol in symtab.get_symbols(): - sym_name = symtab.get_name(symbol) - sym_value = symbol.value - sym_info = symbol.info + symtab = fh.ctx["cache"]["symtab"] + for symbol in symtab.get_symbols(): + sym_name = symtab.get_name(symbol) + sym_value = symbol.value + sym_info = symbol.info - STT_FUNC = 0x2 - if sym_value == fh.address and sym_info & STT_FUNC != 0: - yield FunctionName(sym_name), fh.address + STT_FUNC = 0x2 + if sym_value == fh.address and sym_info & STT_FUNC != 0: + yield FunctionName(sym_name), fh.address def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: From 5b903ca4f3d7ff20def0a2c60d388d544bd3560b Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 2 Jun 2023 23:19:14 +0100 Subject: [PATCH 35/95] add error handling to SymTab and its callers --- capa/features/extractors/elf.py | 9 ++++++++- capa/features/extractors/viv/function.py | 20 ++++++++++++-------- capa/features/extractors/viv/insn.py | 20 ++++++++++++-------- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 4aaf0d8e..133b50e9 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -722,7 +722,14 @@ class SymTab: sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size)) sh_strtab = Shdr.from_viv(strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)) - return cls(endian, bitness, sh_symtab, sh_strtab) + try: + return cls(endian, bitness, sh_symtab, sh_strtab) + except NameError: + return None + except: + # all exceptions that could be encountered by + # cls._parse() imply a faulty symbol's table. + raise CorruptElfFile("malformed symbol's table") def guess_os_from_osabi(elf: ELF) -> Optional[OS]: diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index a75f9311..8e0fafaf 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -35,17 +35,21 @@ def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, # the file's symbol table gets added to the metadata of the vivisect workspace. # this is in order to eliminate the computational overhead of refetching symtab each time. if "symtab" not in fh.ctx["cache"]: - fh.ctx["cache"]["symtab"] = SymTab.from_Elf(fh.inner.vw.parsedbin) + try: + fh.ctx["cache"]["symtab"] = SymTab.from_Elf(fh.inner.vw.parsedbin) + except: + fh.ctx["cache"]["symtab"] = None symtab = fh.ctx["cache"]["symtab"] - for symbol in symtab.get_symbols(): - sym_name = symtab.get_name(symbol) - sym_value = symbol.value - sym_info = symbol.info + if symtab: + for symbol in symtab.get_symbols(): + sym_name = symtab.get_name(symbol) + sym_value = symbol.value + sym_info = symbol.info - STT_FUNC = 0x2 - if sym_value == fh.address and sym_info & STT_FUNC != 0: - yield FunctionName(sym_name), fh.address + STT_FUNC = 0x2 + if sym_value == fh.address and sym_info & STT_FUNC != 0: + yield FunctionName(sym_name), fh.address def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index c5bcd4ca..d8d6edbe 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -115,17 +115,21 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if "symtab" not in fh.ctx["cache"]: # the symbol table gets stored as a function's attribute in order to avoid running # this code everytime the call is made, thus preventing the computational overhead. - fh.ctx["cache"]["symtab"] = SymTab.from_Elf(f.vw.parsedbin) + try: + fh.ctx["cache"]["symtab"] = SymTab.from_Elf(f.vw.parsedbin) + except: + fh.ctx["cache"]["symtab"] = None symtab = fh.ctx["cache"]["symtab"] - for symbol in symtab.get_symbols(): - sym_name = symtab.get_name(symbol) - sym_value = symbol.value - sym_info = symbol.info + if symtab: + for symbol in symtab.get_symbols(): + sym_name = symtab.get_name(symbol) + sym_value = symbol.value + sym_info = symbol.info - STT_FUNC = 0x2 - if sym_value == target and sym_info & STT_FUNC != 0: - yield API(sym_name), ih.address + STT_FUNC = 0x2 + if sym_value == target and sym_info & STT_FUNC != 0: + yield API(sym_name), ih.address if viv_utils.flirt.is_library_function(f.vw, target): name = viv_utils.get_function_name(f.vw, target) From be5ada26eaf366b251a51398da06c48ff3c366d8 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Sat, 3 Jun 2023 01:12:56 +0100 Subject: [PATCH 36/95] fix code style --- capa/features/extractors/elf.py | 2 +- capa/features/extractors/viv/extractor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 133b50e9..9008d3e9 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -92,7 +92,7 @@ class Shdr: buf: bytes @classmethod - def from_viv(cls, section, buf: bytes): + def from_viv(cls, section, buf: bytes) -> "Shdr": return cls( section.sh_name, section.sh_type, diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index ac936142..c73b24a2 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -51,7 +51,7 @@ class VivisectFeatureExtractor(FeatureExtractor): def get_functions(self) -> Iterator[FunctionHandle]: cache = {} for va in sorted(self.vw.getFunctions()): - yield FunctionHandle(address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache}) + yield FunctionHandle(address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache":cache}) def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: yield from capa.features.extractors.viv.function.extract_features(fh) From b4870b120e36adee6eec10470239c637e6c71457 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Sat, 3 Jun 2023 15:33:49 +0530 Subject: [PATCH 37/95] Remove from_capa API for MetaData --- capa/ida/helpers.py | 67 ++++++------- capa/ida/plugin/form.py | 7 +- capa/main.py | 121 ++++++++++++----------- capa/render/result_document.py | 74 +------------- scripts/bulk-process.py | 6 +- scripts/capa_as_library.py | 7 +- scripts/show-capabilities-by-function.py | 6 +- 7 files changed, 112 insertions(+), 176 deletions(-) diff --git a/capa/ida/helpers.py b/capa/ida/helpers.py index 46e8907e..d66bfdd0 100644 --- a/capa/ida/helpers.py +++ b/capa/ida/helpers.py @@ -22,7 +22,8 @@ import capa import capa.version import capa.render.utils as rutils import capa.features.common -import capa.render.result_document +import capa.features.freeze +import capa.render.result_document as rdoc from capa.features.address import AbsoluteVirtualAddress logger = logging.getLogger("capa") @@ -140,38 +141,34 @@ def collect_metadata(rules): else: os = "unknown os" - return capa.render.result_document.Metadata.from_capa( - { - "timestamp": datetime.datetime.now().isoformat(), - "argv": [], - "sample": { - "md5": md5, - "sha1": "", # not easily accessible - "sha256": sha256, - "path": idaapi.get_input_file_path(), - }, - "analysis": { - "format": idaapi.get_file_type_name(), - "arch": arch, - "os": os, - "extractor": "ida", - "rules": rules, - "base_address": idaapi.get_imagebase(), - "layout": { - # this is updated after capabilities have been collected. - # will look like: - # - # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... } - }, - # ignore these for now - not used by IDA plugin. - "feature_counts": { - "file": {}, - "functions": {}, - }, - "library_functions": {}, - }, - "version": capa.version.__version__, - } + return rdoc.Metadata( + timestamp=datetime.datetime.now(), + version=capa.version.__version__, + argv=(), + sample=rdoc.Sample( + md5=md5, + sha1="", # not easily accessible + sha256=sha256, + path=idaapi.get_input_file_path(), + ), + analysis=rdoc.Analysis( + format=idaapi.get_file_type_name(), + arch=arch, + os=os, + extractor="ida", + rules=rules, + base_address=capa.features.freeze.Address.from_capa(idaapi.get_imagebase()), + layout=rdoc.Layout( + functions=tuple() + # this is updated after capabilities have been collected. + # will look like: + # + # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... } + ), + # ignore these for now - not used by IDA plugin. + feature_counts=rdoc.FeatureCounts(file=0, functions=tuple()), + library_functions=tuple(), + ), ) @@ -219,12 +216,12 @@ def idb_contains_cached_results() -> bool: return False -def load_and_verify_cached_results() -> Optional[capa.render.result_document.ResultDocument]: +def load_and_verify_cached_results() -> Optional[rdoc.ResultDocument]: """verifies that cached results have valid (mapped) addresses for the current database""" logger.debug("loading cached capa results from netnode '%s'", CAPA_NETNODE) n = netnode.Netnode(CAPA_NETNODE) - doc = capa.render.result_document.ResultDocument.parse_obj(json.loads(n[NETNODE_RESULTS])) + doc = rdoc.ResultDocument.parse_obj(json.loads(n[NETNODE_RESULTS])) for rule in rutils.capability_rules(doc): for location_, _ in rule.matches: diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index f74c0c76..72b33a66 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -771,8 +771,11 @@ class CapaExplorerForm(idaapi.PluginForm): try: meta = capa.ida.helpers.collect_metadata([settings.user[CAPA_SETTINGS_RULE_PATH]]) capabilities, counts = capa.main.find_capabilities(ruleset, extractor, disable_progress=True) - meta.analysis.__dict__.update(counts) - meta.analysis.__dict__.update(capa.main.compute_layout(ruleset, extractor, capabilities)) + + meta.analysis.feature_counts = counts["feature_counts"] + meta.analysis.library_functions = counts["library_functions"] + meta.analysis.layout = capa.main.compute_layout(ruleset, extractor, capabilities) + except UserCancelledError: logger.info("User cancelled analysis.") return False diff --git a/capa/main.py b/capa/main.py index 722597a5..76e7bffc 100644 --- a/capa/main.py +++ b/capa/main.py @@ -38,7 +38,7 @@ import capa.rules.cache import capa.render.default import capa.render.verbose import capa.features.common -import capa.features.freeze +import capa.features.freeze as frz import capa.render.vverbose import capa.features.extractors import capa.render.result_document @@ -247,13 +247,8 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro all_bb_matches = collections.defaultdict(list) # type: MatchResults all_insn_matches = collections.defaultdict(list) # type: MatchResults - meta = { - "feature_counts": { - "file": 0, - "functions": {}, - }, - "library_functions": {}, - } # type: Dict[str, Any] + feature_counts = rdoc.FeatureCounts(file=0, functions=tuple()) + library_functions: Tuple[rdoc.LibraryFunction, ...] = tuple() with redirecting_print_to_tqdm(disable_progress): with tqdm.contrib.logging.logging_redirect_tqdm(): @@ -272,8 +267,10 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro if extractor.is_library_function(f.address): function_name = extractor.get_function_name(f.address) logger.debug("skipping library function 0x%x (%s)", f.address, function_name) - meta["library_functions"][f.address] = function_name - n_libs = len(meta["library_functions"]) + library_functions += ( + rdoc.LibraryFunction(address=frz.Address.from_capa(f.address), name=function_name), + ) + n_libs = len(library_functions) percentage = round(100 * (n_libs / n_funcs)) if isinstance(pb, tqdm.tqdm): pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)") @@ -282,7 +279,9 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities( ruleset, extractor, f ) - meta["feature_counts"]["functions"][f.address] = feature_count + feature_counts.functions += ( + rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count), + ) logger.debug("analyzed function 0x%x and extracted %d features", f.address, feature_count) for rule_name, res in function_matches.items(): @@ -303,7 +302,7 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro capa.engine.index_rule_matches(function_and_lower_features, rule, locations) all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features) - meta["feature_counts"]["file"] = feature_count + feature_counts.file = feature_count matches = { rule_name: results @@ -317,11 +316,11 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro all_file_matches.items(), ) } - meta["feature_counts"] = rdoc.FeatureCounts.from_capa(meta["feature_counts"]) - meta["library_functions"] = tuple( - rdoc.LibraryFunction(address=capa.features.freeze.Address.from_capa(address), name=name) - for address, name in meta["library_functions"].items() - ) + + meta = { + "feature_counts": feature_counts, + "library_functions": library_functions, + } return matches, meta @@ -765,39 +764,37 @@ def collect_metadata( arch = get_arch(sample_path) os_ = get_os(sample_path) if os_ == OS_AUTO else os_ - return rdoc.Metadata.from_capa( - { - "timestamp": datetime.datetime.now().isoformat(), - "version": capa.version.__version__, - "argv": argv, - "sample": { - "md5": md5.hexdigest(), - "sha1": sha1.hexdigest(), - "sha256": sha256.hexdigest(), - "path": os.path.normpath(sample_path), - }, - "analysis": { - "format": format_, - "arch": arch, - "os": os_, - "extractor": extractor.__class__.__name__, - "rules": rules_path, - "base_address": extractor.get_base_address(), - "layout": { - "functions": {}, - # this is updated after capabilities have been collected. - # will look like: - # - # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... } - }, - "feature_counts": {"file": 0, "functions": {}}, - "library_functions": {}, - }, - } + return rdoc.Metadata( + timestamp=datetime.datetime.now(), + version=capa.version.__version__, + argv=tuple(argv) if argv else None, + sample=rdoc.Sample( + md5=md5.hexdigest(), + sha1=sha1.hexdigest(), + sha256=sha256.hexdigest(), + path=os.path.normpath(sample_path), + ), + analysis=rdoc.Analysis( + format=format_, + arch=arch, + os=os_, + extractor=extractor.__class__.__name__, + rules=tuple(rules_path), + base_address=frz.Address.from_capa(extractor.get_base_address()), + layout=rdoc.Layout( + functions=tuple(), + # this is updated after capabilities have been collected. + # will look like: + # + # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... } + ), + feature_counts=rdoc.FeatureCounts(file=0, functions=tuple()), + library_functions=tuple(), + ), ) -def compute_layout(rules, extractor, capabilities) -> Dict[str, rdoc.Layout]: +def compute_layout(rules, extractor, capabilities) -> rdoc.Layout: """ compute a metadata structure that links basic blocks to the functions in which they're found. @@ -822,19 +819,21 @@ def compute_layout(rules, extractor, capabilities) -> Dict[str, rdoc.Layout]: assert addr in functions_by_bb matched_bbs.add(addr) - layout = { - "functions": { - f: { - "matched_basic_blocks": [bb for bb in bbs if bb in matched_bbs] - # this object is open to extension in the future, + layout = rdoc.Layout( + functions=tuple( + rdoc.FunctionLayout( + address=frz.Address.from_capa(f), + matched_basic_blocks=tuple( + rdoc.BasicBlockLayout(address=frz.Address.from_capa(bb)) for bb in bbs if bb in matched_bbs + ) # this object is open to extension in the future, # such as with the function name, etc. - } + ) for f, bbs in bbs_by_function.items() if len([bb for bb in bbs if bb in matched_bbs]) > 0 - } - } + ) + ) - return {"layout": rdoc.Layout.from_capa(layout)} + return layout def install_common_args(parser, wanted=None): @@ -1226,7 +1225,7 @@ def main(argv=None): if format_ == FORMAT_FREEZE: # freeze format deserializes directly into an extractor with open(args.sample, "rb") as f: - extractor = capa.features.freeze.load(f.read()) + extractor = frz.load(f.read()) else: # all other formats we must create an extractor, # such as viv, binary ninja, etc. workspaces @@ -1268,8 +1267,9 @@ def main(argv=None): capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet) - meta.analysis.__dict__.update(counts) - meta.analysis.__dict__.update(compute_layout(rules, extractor, capabilities)) + meta.analysis.feature_counts = counts["feature_counts"] + meta.analysis.library_functions = counts["library_functions"] + meta.analysis.layout = compute_layout(rules, extractor, capabilities) if has_file_limitation(rules, capabilities): # bail if capa encountered file limitation e.g. a packed binary @@ -1321,7 +1321,8 @@ def ida_main(): capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.extractor.IdaFeatureExtractor()) - meta.analysis.__dict__.update(counts) + meta.analysis.feature_counts = counts["feature_counts"] + meta.analysis.library_functions = counts["library_functions"] if has_file_limitation(rules, capabilities, is_standalone=False): capa.ida.helpers.inform_user_ida_ui("capa encountered warnings during analysis") diff --git a/capa/render/result_document.py b/capa/render/result_document.py index d1656e1f..11faf145 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -24,7 +24,6 @@ from capa.helpers import assert_never class FrozenModel(BaseModel): class Config: - frozen = True extra = "forbid" @@ -47,20 +46,6 @@ class FunctionLayout(FrozenModel): class Layout(FrozenModel): functions: Tuple[FunctionLayout, ...] - @classmethod - def from_capa(cls, layout: dict) -> "Layout": - return cls( - functions=tuple( - FunctionLayout( - address=frz.Address.from_capa(address), - matched_basic_blocks=tuple( - BasicBlockLayout(address=frz.Address.from_capa(bb)) for bb in f["matched_basic_blocks"] - ), - ) - for address, f in layout["functions"].items() - ) - ) - class LibraryFunction(FrozenModel): address: frz.Address @@ -76,16 +61,6 @@ class FeatureCounts(FrozenModel): file: int functions: Tuple[FunctionFeatureCount, ...] - @classmethod - def from_capa(cls, feature_counts: dict) -> "FeatureCounts": - return cls( - file=feature_counts["file"], - functions=tuple( - FunctionFeatureCount(address=frz.Address.from_capa(address), count=count) - for address, count in feature_counts["functions"].items() - ), - ) - class Analysis(FrozenModel): format: str @@ -106,50 +81,6 @@ class Metadata(FrozenModel): sample: Sample analysis: Analysis - @classmethod - def from_capa(cls, meta: dict) -> "Metadata": - return cls( - timestamp=meta["timestamp"], - version=meta["version"], - argv=meta["argv"] if "argv" in meta else None, - sample=Sample( - md5=meta["sample"]["md5"], - sha1=meta["sample"]["sha1"], - sha256=meta["sample"]["sha256"], - path=meta["sample"]["path"], - ), - analysis=Analysis( - format=meta["analysis"]["format"], - arch=meta["analysis"]["arch"], - os=meta["analysis"]["os"], - extractor=meta["analysis"]["extractor"], - rules=meta["analysis"]["rules"], - base_address=frz.Address.from_capa(meta["analysis"]["base_address"]), - layout=Layout( - functions=tuple( - FunctionLayout( - address=frz.Address.from_capa(address), - matched_basic_blocks=tuple( - BasicBlockLayout(address=frz.Address.from_capa(bb)) for bb in f["matched_basic_blocks"] - ), - ) - for address, f in meta["analysis"]["layout"]["functions"].items() - ) - ), - feature_counts=FeatureCounts( - file=meta["analysis"]["feature_counts"]["file"], - functions=tuple( - FunctionFeatureCount(address=frz.Address.from_capa(address), count=count) - for address, count in meta["analysis"]["feature_counts"]["functions"].items() - ), - ), - library_functions=tuple( - LibraryFunction(address=frz.Address.from_capa(address), name=name) - for address, name in meta["analysis"]["library_functions"].items() - ), - ), - ) - class CompoundStatementType: AND = "and" @@ -648,10 +579,7 @@ class ResultDocument(FrozenModel): ), ) - if isinstance(meta, Metadata): - return ResultDocument(meta=meta, rules=rule_matches) - - return ResultDocument(meta=Metadata.from_capa(meta), rules=rule_matches) + return ResultDocument(meta=meta, rules=rule_matches) def to_capa(self) -> Tuple[Metadata, Dict]: capabilities: Dict[ diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 6c9114da..5e3ed0a1 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -131,8 +131,10 @@ def get_capa_results(args): meta = capa.main.collect_metadata([], path, format, os_, [], extractor) capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True) - meta.analysis.__dict__.update(counts) - meta.analysis.__dict__.update(capa.main.compute_layout(rules, extractor, capabilities)) + + meta.analysis.feature_counts = counts["feature_counts"] + meta.analysis.library_functions = counts["library_functions"] + meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities) doc = rd.ResultDocument.from_capa(meta, rules, capabilities) diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index f97dc2de..57657018 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -172,10 +172,13 @@ def capa_details(rules_path, file_path, output_format="dictionary"): # collect metadata (used only to make rendering more complete) meta = capa.main.collect_metadata([], file_path, FORMAT_AUTO, OS_AUTO, rules_path, extractor) - meta.analysis.__dict__.update(counts) - meta.analysis.__dict__.update(capa.main.compute_layout(rules, extractor, capabilities)) + + meta.analysis.feature_counts = counts["feature_counts"] + meta.analysis.library_functions = counts["library_functions"] + meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities) capa_output: Any = False + if output_format == "dictionary": # ...as python dictionary, simplified as textable but in dictionary doc = rd.ResultDocument.from_capa(meta, rules, capabilities) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 5ed38a46..b58c7568 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -178,8 +178,10 @@ def main(argv=None): meta = capa.main.collect_metadata(argv, args.sample, format_, args.os, args.rules, extractor) capabilities, counts = capa.main.find_capabilities(rules, extractor) - meta.analysis.__dict__.update(counts) - meta.analysis.__dict__.update(capa.main.compute_layout(rules, extractor, capabilities)) + + meta.analysis.feature_counts = counts["feature_counts"] + meta.analysis.library_functions = counts["library_functions"] + meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities) if capa.main.has_file_limitation(rules, capabilities): # bail if capa encountered file limitation e.g. a packed binary From e971bc4044cc1f203f6d0bdcdd51389a20940a8f Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Mon, 5 Jun 2023 12:01:39 +0100 Subject: [PATCH 38/95] fix codestyle issues --- capa/features/extractors/elf.py | 5 +++-- capa/features/extractors/viv/extractor.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 9008d3e9..24a48e60 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -106,7 +106,6 @@ class Shdr: ) - class ELF: def __init__(self, f: BinaryIO): self.f = f @@ -720,7 +719,9 @@ class SymTab: if section.sh_info & SHT_SYMTAB: strtab_section = ElfBinary.sections[section.sh_link] sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size)) - sh_strtab = Shdr.from_viv(strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)) + sh_strtab = Shdr.from_viv( + strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size) + ) try: return cls(endian, bitness, sh_symtab, sh_strtab) diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index c73b24a2..25e4b0de 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -51,7 +51,9 @@ class VivisectFeatureExtractor(FeatureExtractor): def get_functions(self) -> Iterator[FunctionHandle]: cache = {} for va in sorted(self.vw.getFunctions()): - yield FunctionHandle(address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache":cache}) + yield FunctionHandle( + address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache} + ) def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: yield from capa.features.extractors.viv.function.extract_features(fh) From 65f18aecc8d2d7e26d69c5655cad27e4f4764784 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Mon, 5 Jun 2023 12:14:56 +0100 Subject: [PATCH 39/95] fix mypy typing issues --- capa/features/extractors/elf.py | 2 +- capa/features/extractors/viv/extractor.py | 4 ++-- capa/features/extractors/viv/function.py | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 24a48e60..1f23d53e 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -710,7 +710,7 @@ class SymTab: yield symbol @classmethod - def from_Elf(cls, ElfBinary) -> "SymTab": + def from_Elf(cls, ElfBinary) -> Optional["SymTab"]: endian = "<" if ElfBinary.getEndian() == 0 else ">" bitness = ElfBinary.bits diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index 25e4b0de..d73df10c 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator +from typing import Dict, List, Tuple, Iterator, Any import viv_utils import viv_utils.flirt @@ -49,7 +49,7 @@ class VivisectFeatureExtractor(FeatureExtractor): yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf) def get_functions(self) -> Iterator[FunctionHandle]: - cache = {} + cache: Dict[str, Any] = {} for va in sorted(self.vw.getFunctions()): yield FunctionHandle( address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache} diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index 8e0fafaf..dcdaf5a5 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -11,9 +11,11 @@ import envi import viv_utils import vivisect.const +from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops +from capa.features.extractors.elf import SymTab from capa.features.extractors.base_extractor import FunctionHandle From 103b384c094108706acc24d6cabc21df9a677646 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Mon, 5 Jun 2023 12:17:27 +0100 Subject: [PATCH 40/95] fix viv/extractor.py codestyle imports --- capa/features/extractors/viv/extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index d73df10c..16b97ef3 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Tuple, Iterator, Any +from typing import Any, Dict, List, Tuple, Iterator import viv_utils import viv_utils.flirt From 9b0fb74d9434832ab3bfde1189445eaf4ead3d3f Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Mon, 5 Jun 2023 13:36:50 +0100 Subject: [PATCH 41/95] fix typo: "Elf" to "elf" --- capa/features/extractors/viv/function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index dcdaf5a5..87403a65 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -33,7 +33,7 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: - if fh.inner.vw.metadata["Format"] == "Elf": + if fh.inner.vw.metadata["Format"] == "elf": # the file's symbol table gets added to the metadata of the vivisect workspace. # this is in order to eliminate the computational overhead of refetching symtab each time. if "symtab" not in fh.ctx["cache"]: From 5b260c00f4f25482090ffaedd22c0a2e2f8efc25 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Mon, 5 Jun 2023 13:37:19 +0100 Subject: [PATCH 42/95] fix symtab FunctionName feature scope address --- tests/fixtures.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 2f074f4b..84e40209 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -784,19 +784,19 @@ FEATURE_SYMTAB_FUNC_TESTS = [ ), ( "2bf18d", - "function=0x40286d", + "function=0x4088a4", capa.features.file.FunctionName("__GI_connect"), True, ), ( "2bf18d", - "function=0x40286d", + "function=0x4088a4", capa.features.file.FunctionName("connect"), True, ), ( "2bf18d", - "function=0x40286d", + "function=0x4088a4", capa.features.file.FunctionName("__libc_connect"), True, ), From b2080cdfbca18170f6238db29578e39f7ca4d42d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jun 2023 14:58:02 +0000 Subject: [PATCH 43/95] build(deps-dev): bump types-requests from 2.28.1 to 2.31.0.1 Bumps [types-requests](https://github.com/python/typeshed) from 2.28.1 to 2.31.0.1. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-requests dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2f8c5348..e83ba483 100644 --- a/setup.py +++ b/setup.py @@ -89,7 +89,7 @@ setuptools.setup( "types-tabulate==0.9.0.1", "types-termcolor==1.1.4", "types-psutil==5.8.23", - "types_requests==2.28.1", + "types_requests==2.31.0.1", "types-protobuf==4.23.0.1", ], "build": [ From 28c24c9d4875d8626458aac1befa59a754421daa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jun 2023 14:58:17 +0000 Subject: [PATCH 44/95] build(deps-dev): bump requests from 2.28.0 to 2.31.0 Bumps [requests](https://github.com/psf/requests) from 2.28.0 to 2.31.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.28.0...v2.31.0) --- updated-dependencies: - dependency-name: requests dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2f8c5348..8aa5309a 100644 --- a/setup.py +++ b/setup.py @@ -80,7 +80,7 @@ setuptools.setup( "mypy==1.3.0", "psutil==5.9.2", "stix2==3.0.1", - "requests==2.28.0", + "requests==2.31.0", "mypy-protobuf==3.4.0", # type stubs for mypy "types-backports==0.1.3", From 28b2cd5117bd0d7ea4fcdd4e949d0faf2c235688 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jun 2023 14:58:21 +0000 Subject: [PATCH 45/95] build(deps-dev): bump pytest-cov from 4.0.0 to 4.1.0 Bumps [pytest-cov](https://github.com/pytest-dev/pytest-cov) from 4.0.0 to 4.1.0. - [Changelog](https://github.com/pytest-dev/pytest-cov/blob/master/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest-cov/compare/v4.0.0...v4.1.0) --- updated-dependencies: - dependency-name: pytest-cov dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2f8c5348..2265fe1b 100644 --- a/setup.py +++ b/setup.py @@ -72,7 +72,7 @@ setuptools.setup( "pytest==7.3.1", "pytest-sugar==0.9.4", "pytest-instafail==0.5.0", - "pytest-cov==4.0.0", + "pytest-cov==4.1.0", "pycodestyle==2.10.0", "ruff==0.0.270", "black==23.3.0", From c6815ef1266b2aad11a3786138d0e6e00a3c2278 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Tue, 6 Jun 2023 13:02:30 +0530 Subject: [PATCH 46/95] Update Model and FrozenModel Class --- capa/main.py | 1 - capa/render/result_document.py | 24 +++++++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/capa/main.py b/capa/main.py index 76e7bffc..bdf0cec3 100644 --- a/capa/main.py +++ b/capa/main.py @@ -1208,7 +1208,6 @@ def main(argv=None): logger.debug("file limitation short circuit, won't analyze fully.") return E_FILE_LIMITATION - # TODO: #1411 use a real type, not a dict here. meta: rdoc.Metadata capabilities: MatchResults counts: Dict[str, Any] diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 11faf145..21957ddf 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -24,45 +24,51 @@ from capa.helpers import assert_never class FrozenModel(BaseModel): class Config: + frozen = True extra = "forbid" -class Sample(FrozenModel): +class Model(BaseModel): + class Config: + extra = "forbid" + + +class Sample(Model): md5: str sha1: str sha256: str path: str -class BasicBlockLayout(FrozenModel): +class BasicBlockLayout(Model): address: frz.Address -class FunctionLayout(FrozenModel): +class FunctionLayout(Model): address: frz.Address matched_basic_blocks: Tuple[BasicBlockLayout, ...] -class Layout(FrozenModel): +class Layout(Model): functions: Tuple[FunctionLayout, ...] -class LibraryFunction(FrozenModel): +class LibraryFunction(Model): address: frz.Address name: str -class FunctionFeatureCount(FrozenModel): +class FunctionFeatureCount(Model): address: frz.Address count: int -class FeatureCounts(FrozenModel): +class FeatureCounts(Model): file: int functions: Tuple[FunctionFeatureCount, ...] -class Analysis(FrozenModel): +class Analysis(Model): format: str arch: str os: str @@ -74,7 +80,7 @@ class Analysis(FrozenModel): library_functions: Tuple[LibraryFunction, ...] -class Metadata(FrozenModel): +class Metadata(Model): timestamp: datetime.datetime version: str argv: Optional[Tuple[str, ...]] From 2fe0713faa80ebdda8a98b37078f255ee0a58d18 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 7 Jun 2023 10:17:28 +0000 Subject: [PATCH 47/95] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 188e6552..5eb25253 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 188e65528ec496eaaa792c3470cb4ab680a1b156 +Subproject commit 5eb25253656e4425377fda84eb3e5291e9ef3d4a From 0559e61af1558f80fbd230e5ae77e44cbc40e198 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Thu, 8 Jun 2023 08:41:14 +0000 Subject: [PATCH 48/95] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a2778e4..7a736a60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (7) +### New Rules (8) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -18,6 +18,7 @@ - communication/mailslot/read-from-mailslot nick.simonian@mandiant.com - nursery/hash-data-using-sha512managed-in-dotnet jonathanlepore@google.com - nursery/compiled-with-exescript jonathanlepore@google.com +- nursery/check-for-sandbox-via-mac-address-ouis-in-dotnet jonathanlepore@google.com - ### Bug Fixes diff --git a/README.md b/README.md index 16d561bc..809a5651 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-799-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-800-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 5eb25253..5f433fdf 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 5eb25253656e4425377fda84eb3e5291e9ef3d4a +Subproject commit 5f433fdf8ea03b592db035b6b0c934bf04bb0812 From 8cc16e8de9fa0156d88151d094d9239d6e66775f Mon Sep 17 00:00:00 2001 From: Xusheng Date: Fri, 9 Jun 2023 11:34:03 +0800 Subject: [PATCH 49/95] Update the stack string detection with BN's builtin outlining of constant expressions --- CHANGELOG.md | 1 + capa/features/extractors/binja/basicblock.py | 72 +++++++++++++++++++- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a2778e4..68950d3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - ### Bug Fixes +- extractor: update Binary Ninja stack string detection after the new constant outlining feature #1473 @xusheng6 - extractor: update vivisect Arch extraction #1334 @mr-tz - extractor: avoid Binary Ninja exception when analyzing certain files #1441 @xusheng6 - symtab: fix struct.unpack() format for 64-bit ELF files @yelhamer diff --git a/capa/features/extractors/binja/basicblock.py b/capa/features/extractors/binja/basicblock.py index ff464b1d..e354669d 100644 --- a/capa/features/extractors/binja/basicblock.py +++ b/capa/features/extractors/binja/basicblock.py @@ -11,10 +11,13 @@ import string import struct from typing import Tuple, Iterator -from binaryninja import Function +from binaryninja import Function, Settings from binaryninja import BasicBlock as BinjaBasicBlock from binaryninja import ( BinaryView, + DataBuffer, + SymbolType, + RegisterValueType, VariableSourceType, MediumLevelILSetVar, MediumLevelILOperation, @@ -28,6 +31,66 @@ from capa.features.basicblock import BasicBlock from capa.features.extractors.helpers import MIN_STACKSTRING_LEN from capa.features.extractors.base_extractor import BBHandle, FunctionHandle +use_const_outline: bool = False +settings: Settings = Settings() +if settings.contains("analysis.outlining.builtins") and settings.get_bool("analysis.outlining.builtins"): + use_const_outline = True + + +def get_printable_len_ascii(s: bytes) -> int: + """Return string length if all operand bytes are ascii or utf16-le printable""" + count = 0 + for c in s: + if c == 0: + return count + if c < 127 and chr(c) in string.printable: + count += 1 + return count + + +def get_printable_len_wide(s: bytes) -> int: + """Return string length if all operand bytes are ascii or utf16-le printable""" + if all(c == 0x00 for c in s[1::2]): + return get_printable_len_ascii(s[::2]) + return 0 + + +def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int: + bv: BinaryView = f.view + + if il.operation != MediumLevelILOperation.MLIL_CALL: + return 0 + + target = il.dest + if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]: + return 0 + + addr = target.value.value + sym = bv.get_symbol_at(addr) + if not sym or sym.type != SymbolType.LibraryFunctionSymbol: + return 0 + + if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]: + return 0 + + if len(il.params) < 2: + return 0 + + dest = il.params[0] + if dest.operation != MediumLevelILOperation.MLIL_ADDRESS_OF: + return 0 + + var = dest.src + if var.source_type != VariableSourceType.StackVariableSourceType: + return 0 + + src = il.params[1] + if src.value.type != RegisterValueType.ConstantDataAggregateValue: + return 0 + + s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value) + return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s))) + def get_printable_len(il: MediumLevelILSetVar) -> int: """Return string length if all operand bytes are ascii or utf16-le printable""" @@ -82,8 +145,11 @@ def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool: """ count = 0 for il in bb: - if is_mov_imm_to_stack(il): - count += get_printable_len(il) + if use_const_outline: + count += get_stack_string_len(f, il) + else: + if is_mov_imm_to_stack(il): + count += get_printable_len(il) if count > MIN_STACKSTRING_LEN: return True From a74911e926b7b0bff815d620b54415da7cd3b476 Mon Sep 17 00:00:00 2001 From: Xusheng Date: Fri, 9 Jun 2023 13:41:31 +0800 Subject: [PATCH 50/95] Add a test that asserts on the binja version --- CHANGELOG.md | 1 + tests/test_binja_features.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68950d3c..210664d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - ### Bug Fixes +- extractor: add a Binary Ninja test that asserts its version #1487 @xusheng6 - extractor: update Binary Ninja stack string detection after the new constant outlining feature #1473 @xusheng6 - extractor: update vivisect Arch extraction #1334 @mr-tz - extractor: avoid Binary Ninja exception when analyzing certain files #1441 @xusheng6 diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index 06e91ff1..04c8a49e 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -55,3 +55,9 @@ def test_standalone_binja_backend(): CD = os.path.dirname(__file__) test_path = os.path.join(CD, "..", "tests", "data", "Practical Malware Analysis Lab 01-01.exe_") assert capa.main.main([test_path, "-b", capa.main.BACKEND_BINJA]) == 0 + + +@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") +def test_binja_version(): + version = binaryninja.core_version_info() + assert version.major == 3 and version.minor == 4 From cc993b67a3ec0dc2695fdc424e048e4d9f418f04 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 12 Jun 2023 06:58:29 +0000 Subject: [PATCH 51/95] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a736a60..c553d088 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (8) +### New Rules (9) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -19,6 +19,7 @@ - nursery/hash-data-using-sha512managed-in-dotnet jonathanlepore@google.com - nursery/compiled-with-exescript jonathanlepore@google.com - nursery/check-for-sandbox-via-mac-address-ouis-in-dotnet jonathanlepore@google.com +- host-interaction/hardware/enumerate-devices-by-category @mr-tz - ### Bug Fixes diff --git a/README.md b/README.md index 809a5651..8bfa9207 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-800-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-801-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 5f433fdf..baab4e37 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 5f433fdf8ea03b592db035b6b0c934bf04bb0812 +Subproject commit baab4e37d3bf7749980663b41a36c89cb9fdadcc From 85a7c87830f35ea1289787898b05f31d2d846938 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 12 Jun 2023 12:18:23 +0000 Subject: [PATCH 52/95] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index baab4e37..1ecaa98d 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit baab4e37d3bf7749980663b41a36c89cb9fdadcc +Subproject commit 1ecaa98de4a2040d10b519c6b9a8a8228d417655 From 43b2ee3c5281f3ec0f3834a34769e7e10af9ede3 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 12 Jun 2023 12:28:18 +0000 Subject: [PATCH 53/95] Sync capa rules submodule --- README.md | 2 +- rules | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8bfa9207..809a5651 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-801-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-800-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 1ecaa98d..368a27e7 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 1ecaa98de4a2040d10b519c6b9a8a8228d417655 +Subproject commit 368a27e739cdedfa37588ff8176a809159aa562b From 366c55231e2f6e4bece1b1441da4a5f8cc44b3d3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 14:58:23 +0000 Subject: [PATCH 54/95] build(deps): bump pydantic from 1.10.7 to 1.10.9 Bumps [pydantic](https://github.com/pydantic/pydantic) from 1.10.7 to 1.10.9. - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/main/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v1.10.7...v1.10.9) --- updated-dependencies: - dependency-name: pydantic dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2f8c5348..0f4cdd5e 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ requirements = [ "pyelftools==0.29", "dnfile==0.13.0", "dncil==1.0.2", - "pydantic==1.10.7", + "pydantic==1.10.9", "protobuf==4.23.2", ] From 7ef78fdbce3959529eed5d8bd96b67348838f6e7 Mon Sep 17 00:00:00 2001 From: Stephen Eckels Date: Tue, 13 Jun 2023 14:00:06 -0400 Subject: [PATCH 55/95] explorer: optimize cache and extractor interface (#1470) * Optimize cache and extractor interface * Update changelog * Run linter formatters * Implement review feedback * Move rulegen extractor construction to tab change * Change rulegen cache construction behavior * Adjust return values for CR, format * Fix mypy errors * Format * Fix merge --------- Co-authored-by: Stephen Eckels --- CHANGELOG.md | 2 ++ capa/ida/plugin/cache.py | 69 ++++++++++++++++++++++------------------ capa/ida/plugin/form.py | 66 ++++++++++++++------------------------ 3 files changed, 63 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69023a2c..8846b14f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,12 +88,14 @@ Thanks for all the support, especially to @xusheng6, @captainGeech42, @ggold7046 - nursery/contain-a-thread-local-storage-tls-section-in-dotnet michael.hunhoff@mandiant.com ### Bug Fixes +- extractor: interface of cache modified to prevent extracting file and global features multiple times @stevemk14ebr - extractor: removed '.dynsym' as the library name for ELF imports #1318 @stevemk14ebr - extractor: fix vivisect loop detection corner case #1310 @mr-tz - match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff - extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6 ### capa explorer IDA Pro plugin +- rule generator plugin now loads faster when jumping between functions @stevemk14ebr - fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff - improve embedded PE detection performance and reduce FP potential #1344 @mike-hunhoff diff --git a/capa/ida/plugin/cache.py b/capa/ida/plugin/cache.py index fd34824e..5226df9f 100644 --- a/capa/ida/plugin/cache.py +++ b/capa/ida/plugin/cache.py @@ -48,7 +48,8 @@ class CapaRuleGenFeatureCacheNode: class CapaRuleGenFeatureCache: - def __init__(self, fh_list: List[FunctionHandle], extractor: CapaExplorerFeatureExtractor): + def __init__(self, extractor: CapaExplorerFeatureExtractor): + self.extractor = extractor self.global_features: FeatureSet = collections.defaultdict(set) self.file_node: CapaRuleGenFeatureCacheNode = CapaRuleGenFeatureCacheNode(None, None) @@ -56,12 +57,11 @@ class CapaRuleGenFeatureCache: self.bb_nodes: Dict[Address, CapaRuleGenFeatureCacheNode] = {} self.insn_nodes: Dict[Address, CapaRuleGenFeatureCacheNode] = {} - self._find_global_features(extractor) - self._find_file_features(extractor) - self._find_function_and_below_features(fh_list, extractor) + self._find_global_features() + self._find_file_features() - def _find_global_features(self, extractor: CapaExplorerFeatureExtractor): - for feature, addr in extractor.extract_global_features(): + def _find_global_features(self): + for feature, addr in self.extractor.extract_global_features(): # not all global features may have virtual addresses. # if not, then at least ensure the feature shows up in the index. # the set of addresses will still be empty. @@ -71,46 +71,45 @@ class CapaRuleGenFeatureCache: if feature not in self.global_features: self.global_features[feature] = set() - def _find_file_features(self, extractor: CapaExplorerFeatureExtractor): + def _find_file_features(self): # not all file features may have virtual addresses. # if not, then at least ensure the feature shows up in the index. # the set of addresses will still be empty. - for feature, addr in extractor.extract_file_features(): + for feature, addr in self.extractor.extract_file_features(): if addr is not None: self.file_node.features[feature].add(addr) else: if feature not in self.file_node.features: self.file_node.features[feature] = set() - def _find_function_and_below_features(self, fh_list: List[FunctionHandle], extractor: CapaExplorerFeatureExtractor): - for fh in fh_list: - f_node: CapaRuleGenFeatureCacheNode = CapaRuleGenFeatureCacheNode(fh, self.file_node) + def _find_function_and_below_features(self, fh: FunctionHandle): + f_node: CapaRuleGenFeatureCacheNode = CapaRuleGenFeatureCacheNode(fh, self.file_node) - # extract basic block and below features - for bbh in extractor.get_basic_blocks(fh): - bb_node: CapaRuleGenFeatureCacheNode = CapaRuleGenFeatureCacheNode(bbh, f_node) + # extract basic block and below features + for bbh in self.extractor.get_basic_blocks(fh): + bb_node: CapaRuleGenFeatureCacheNode = CapaRuleGenFeatureCacheNode(bbh, f_node) - # extract instruction features - for ih in extractor.get_instructions(fh, bbh): - inode: CapaRuleGenFeatureCacheNode = CapaRuleGenFeatureCacheNode(ih, bb_node) + # extract instruction features + for ih in self.extractor.get_instructions(fh, bbh): + inode: CapaRuleGenFeatureCacheNode = CapaRuleGenFeatureCacheNode(ih, bb_node) - for feature, addr in extractor.extract_insn_features(fh, bbh, ih): - inode.features[feature].add(addr) + for feature, addr in self.extractor.extract_insn_features(fh, bbh, ih): + inode.features[feature].add(addr) - self.insn_nodes[inode.address] = inode + self.insn_nodes[inode.address] = inode - # extract basic block features - for feature, addr in extractor.extract_basic_block_features(fh, bbh): - bb_node.features[feature].add(addr) + # extract basic block features + for feature, addr in self.extractor.extract_basic_block_features(fh, bbh): + bb_node.features[feature].add(addr) - # store basic block features in cache and function parent - self.bb_nodes[bb_node.address] = bb_node + # store basic block features in cache and function parent + self.bb_nodes[bb_node.address] = bb_node - # extract function features - for feature, addr in extractor.extract_function_features(fh): - f_node.features[feature].add(addr) + # extract function features + for feature, addr in self.extractor.extract_function_features(fh): + f_node.features[feature].add(addr) - self.func_nodes[f_node.address] = f_node + self.func_nodes[f_node.address] = f_node def _find_instruction_capabilities( self, ruleset: RuleSet, insn: CapaRuleGenFeatureCacheNode @@ -155,7 +154,7 @@ class CapaRuleGenFeatureCache: def find_code_capabilities( self, ruleset: RuleSet, fh: FunctionHandle ) -> Tuple[FeatureSet, MatchResults, MatchResults, MatchResults]: - f_node: Optional[CapaRuleGenFeatureCacheNode] = self.func_nodes.get(fh.address, None) + f_node: Optional[CapaRuleGenFeatureCacheNode] = self._get_cached_func_node(fh) if f_node is None: return {}, {}, {}, {} @@ -195,8 +194,16 @@ class CapaRuleGenFeatureCache: _, matches = ruleset.match(Scope.FILE, features, NO_ADDRESS) return features, matches - def get_all_function_features(self, fh: FunctionHandle) -> FeatureSet: + def _get_cached_func_node(self, fh: FunctionHandle) -> Optional[CapaRuleGenFeatureCacheNode]: f_node: Optional[CapaRuleGenFeatureCacheNode] = self.func_nodes.get(fh.address, None) + if f_node is None: + # function is not in our cache, do extraction now + self._find_function_and_below_features(fh) + f_node = self.func_nodes.get(fh.address, None) + return f_node + + def get_all_function_features(self, fh: FunctionHandle) -> FeatureSet: + f_node: Optional[CapaRuleGenFeatureCacheNode] = self._get_cached_func_node(fh) if f_node is None: return {} diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 72b33a66..07fbe69f 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -192,8 +192,10 @@ class CapaExplorerForm(idaapi.PluginForm): # caches used to speed up capa explorer analysis - these must be init to None self.resdoc_cache: Optional[capa.render.result_document.ResultDocument] = None self.program_analysis_ruleset_cache: Optional[capa.rules.RuleSet] = None - self.rulegen_ruleset_cache: Optional[capa.rules.RuleSet] = None + self.feature_extractor: Optional[CapaExplorerFeatureExtractor] = None + self.rulegen_feature_extractor: Optional[CapaExplorerFeatureExtractor] = None self.rulegen_feature_cache: Optional[CapaRuleGenFeatureCache] = None + self.rulegen_ruleset_cache: Optional[capa.rules.RuleSet] = None self.rulegen_current_function: Optional[FunctionHandle] = None # models @@ -727,13 +729,11 @@ class CapaExplorerForm(idaapi.PluginForm): update_wait_box(f"{text} ({self.process_count} of {self.process_total})") self.process_count += 1 - update_wait_box("initializing feature extractor") - try: - extractor = CapaExplorerFeatureExtractor() - extractor.indicator.progress.connect(slot_progress_feature_extraction) + self.feature_extractor = CapaExplorerFeatureExtractor() + self.feature_extractor.indicator.progress.connect(slot_progress_feature_extraction) except Exception as e: - logger.error("Failed to initialize feature extractor (error: %s).", e, exc_info=True) + logger.error("Failed to initialize feature extractor (error: %s)", e, exc_info=True) return False if ida_kernwin.user_cancelled(): @@ -743,7 +743,7 @@ class CapaExplorerForm(idaapi.PluginForm): update_wait_box("calculating analysis") try: - self.process_total += len(tuple(extractor.get_functions())) + self.process_total += len(tuple(self.feature_extractor.get_functions())) except Exception as e: logger.error("Failed to calculate analysis (error: %s).", e, exc_info=True) return False @@ -770,12 +770,13 @@ class CapaExplorerForm(idaapi.PluginForm): try: meta = capa.ida.helpers.collect_metadata([settings.user[CAPA_SETTINGS_RULE_PATH]]) - capabilities, counts = capa.main.find_capabilities(ruleset, extractor, disable_progress=True) + capabilities, counts = capa.main.find_capabilities( + ruleset, self.feature_extractor, disable_progress=True + ) meta.analysis.feature_counts = counts["feature_counts"] meta.analysis.library_functions = counts["library_functions"] - meta.analysis.layout = capa.main.compute_layout(ruleset, extractor, capabilities) - + meta.analysis.layout = capa.main.compute_layout(ruleset, self.feature_extractor, capabilities) except UserCancelledError: logger.info("User cancelled analysis.") return False @@ -978,26 +979,21 @@ class CapaExplorerForm(idaapi.PluginForm): # so we'll work with a local copy of the ruleset. ruleset = copy.deepcopy(self.rulegen_ruleset_cache) - # clear feature cache - if self.rulegen_feature_cache is not None: - self.rulegen_feature_cache = None - # clear cached function if self.rulegen_current_function is not None: self.rulegen_current_function = None - if ida_kernwin.user_cancelled(): - logger.info("User cancelled analysis.") - return False - - update_wait_box("Initializing feature extractor") - - try: - # must use extractor to get function, as capa analysis requires casted object - extractor = CapaExplorerFeatureExtractor() - except Exception as e: - logger.error("Failed to initialize feature extractor (error: %s)", e, exc_info=True) - return False + # these are init once objects, create on tab change + if self.rulegen_feature_cache is None or self.rulegen_feature_extractor is None: + try: + update_wait_box("performing one-time file analysis") + self.rulegen_feature_extractor = CapaExplorerFeatureExtractor() + self.rulegen_feature_cache = CapaRuleGenFeatureCache(self.rulegen_feature_extractor) + except Exception as e: + logger.error("Failed to initialize feature extractor (error: %s)", e, exc_info=True) + return False + else: + logger.info("Reusing prior rulegen cache") if ida_kernwin.user_cancelled(): logger.info("User cancelled analysis.") @@ -1009,7 +1005,7 @@ class CapaExplorerForm(idaapi.PluginForm): try: f = idaapi.get_func(idaapi.get_screen_ea()) if f is not None: - self.rulegen_current_function = extractor.get_function(f.start_ea) + self.rulegen_current_function = self.rulegen_feature_extractor.get_function(f.start_ea) except Exception as e: logger.error("Failed to resolve function at address 0x%X (error: %s)", f.start_ea, e, exc_info=True) return False @@ -1018,21 +1014,6 @@ class CapaExplorerForm(idaapi.PluginForm): logger.info("User cancelled analysis.") return False - # extract features - try: - fh_list: List[FunctionHandle] = [] - if self.rulegen_current_function is not None: - fh_list.append(self.rulegen_current_function) - - self.rulegen_feature_cache = CapaRuleGenFeatureCache(fh_list, extractor) - except Exception as e: - logger.error("Failed to extract features (error: %s)", e, exc_info=True) - return False - - if ida_kernwin.user_cancelled(): - logger.info("User cancelled analysis.") - return False - update_wait_box("generating function rule matches") all_function_features: FeatureSet = collections.defaultdict(set) @@ -1264,7 +1245,6 @@ class CapaExplorerForm(idaapi.PluginForm): elif index == 1: self.set_view_status_label(self.view_status_label_rulegen_cache) self.view_status_label_analysis_cache = status_prev - self.view_reset_button.setText("Clear") def slot_rulegen_editor_update(self): From 7a6b7c5ef07ee20bf1177218563d62d73ef48703 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Jun 2023 14:58:25 +0000 Subject: [PATCH 56/95] build(deps): bump ruamel-yaml from 0.17.28 to 0.17.32 Bumps [ruamel-yaml](https://sourceforge.net/p/ruamel-yaml/code/ci/default/tree) from 0.17.28 to 0.17.32. --- updated-dependencies: - dependency-name: ruamel-yaml dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2f8c5348..5b6d2527 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ requirements = [ "viv-utils[flirt]==0.7.9", "halo==0.0.31", "networkx==2.5.1", # newer versions no longer support py3.7. - "ruamel.yaml==0.17.28", + "ruamel.yaml==0.17.32", "vivisect==1.1.1", "pefile==2023.2.7", "pyelftools==0.29", From 33a3170bc4160dbfe925ef053e61823ece245996 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Thu, 22 Jun 2023 07:11:54 +0000 Subject: [PATCH 57/95] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 368a27e7..e1c0ac2f 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 368a27e739cdedfa37588ff8176a809159aa562b +Subproject commit e1c0ac2f1c1966bec08f6f9c627619698d6fc1b9 From a92d91e82a79e507742dafed714e8d9277d2df69 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Sat, 24 Jun 2023 08:21:24 +0000 Subject: [PATCH 58/95] Sync capa rules submodule --- CHANGELOG.md | 4 +++- README.md | 2 +- rules | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13502cd9..88182091 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (9) +### New Rules (11) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -20,6 +20,8 @@ - nursery/compiled-with-exescript jonathanlepore@google.com - nursery/check-for-sandbox-via-mac-address-ouis-in-dotnet jonathanlepore@google.com - host-interaction/hardware/enumerate-devices-by-category @mr-tz +- host-interaction/service/continue-service @mr-tz +- host-interaction/service/pause-service @mr-tz - ### Bug Fixes diff --git a/README.md b/README.md index 809a5651..15a5e096 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-800-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-802-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index e1c0ac2f..08e0c617 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit e1c0ac2f1c1966bec08f6f9c627619698d6fc1b9 +Subproject commit 08e0c6178a9b7d2da56a2dcc964e9be3ce285a58 From 8429d6b8e2d46bec2dc32a78d046b4995541d84d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Jun 2023 14:58:26 +0000 Subject: [PATCH 59/95] build(deps-dev): bump mypy from 1.3.0 to 1.4.1 Bumps [mypy](https://github.com/python/mypy) from 1.3.0 to 1.4.1. - [Commits](https://github.com/python/mypy/compare/v1.3.0...v1.4.1) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9231571e..2f059135 100644 --- a/setup.py +++ b/setup.py @@ -77,7 +77,7 @@ setuptools.setup( "ruff==0.0.270", "black==23.3.0", "isort==5.11.4", - "mypy==1.3.0", + "mypy==1.4.1", "psutil==5.9.2", "stix2==3.0.1", "requests==2.31.0", From 7c72b56a4e60f34b628217992e91f907c4b99d1a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Jun 2023 14:58:39 +0000 Subject: [PATCH 60/95] build(deps-dev): bump pytest from 7.3.1 to 7.4.0 Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.3.1 to 7.4.0. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.3.1...7.4.0) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9231571e..4dc1f086 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ setuptools.setup( install_requires=requirements, extras_require={ "dev": [ - "pytest==7.3.1", + "pytest==7.4.0", "pytest-sugar==0.9.4", "pytest-instafail==0.5.0", "pytest-cov==4.1.0", From 8edeb0e6e8cf9f787ff9ff4f5a0ddbd88d3a30d1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 27 Jun 2023 07:33:03 +0000 Subject: [PATCH 61/95] build(deps-dev): bump ruff from 0.0.270 to 0.0.275 Bumps [ruff](https://github.com/astral-sh/ruff) from 0.0.270 to 0.0.275. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/BREAKING_CHANGES.md) - [Commits](https://github.com/astral-sh/ruff/compare/v0.0.270...v0.0.275) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 45a8d732..4a67c68c 100644 --- a/setup.py +++ b/setup.py @@ -74,7 +74,7 @@ setuptools.setup( "pytest-instafail==0.5.0", "pytest-cov==4.1.0", "pycodestyle==2.10.0", - "ruff==0.0.270", + "ruff==0.0.275", "black==23.3.0", "isort==5.11.4", "mypy==1.4.1", From 4548303a0c17ee5e163298ede71514c64236c47d Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 28 Jun 2023 06:25:24 +0000 Subject: [PATCH 62/95] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 08e0c617..58ac3d72 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 08e0c6178a9b7d2da56a2dcc964e9be3ce285a58 +Subproject commit 58ac3d724bb3ec74b2d0030827d474d97adbf364 From 7ab2a9b163a0f139fcad3b2b640f15fc69f877e4 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Thu, 29 Jun 2023 09:47:46 +0000 Subject: [PATCH 63/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index a37873c8..ccf0d07d 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit a37873c8a571b515f2baaf19bfcfaff5c7ef5342 +Subproject commit ccf0d07d273da47ff3e00a066e1b109c97920b99 From 2cd6b8bdaccb95174c4f8ba3eb708a5ac85cae2a Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Thu, 29 Jun 2023 10:01:38 +0000 Subject: [PATCH 64/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index ccf0d07d..9d6a155b 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit ccf0d07d273da47ff3e00a066e1b109c97920b99 +Subproject commit 9d6a155b77f62f967bd859dffd1d262cd52a0e54 From 06dd6f45c006506710659aa745179ab28dbb527c Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 3 Jul 2023 07:54:42 +0000 Subject: [PATCH 65/95] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 58ac3d72..cb3bc24e 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 58ac3d724bb3ec74b2d0030827d474d97adbf364 +Subproject commit cb3bc24e7a33ffdecd74d85506da43eeb229b6c5 From 165fe87aca4d47b3a0645ec22456b7bbdc89221b Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 3 Jul 2023 14:04:39 +0000 Subject: [PATCH 66/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 9d6a155b..78b620ba 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 9d6a155b77f62f967bd859dffd1d262cd52a0e54 +Subproject commit 78b620ba30d92689f2d98d6ad0e8a6c12553b4ed From 301d8425c1922a574fd09fc3b3e17cf44989a8f9 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 3 Jul 2023 14:05:01 +0000 Subject: [PATCH 67/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 78b620ba..915f9d9d 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 78b620ba30d92689f2d98d6ad0e8a6c12553b4ed +Subproject commit 915f9d9d85d3104aeb4dd2fa1b7d9f023b3c43ed From 066e42e2711535f038f2c31802babf016d650ce9 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 3 Jul 2023 14:05:29 +0000 Subject: [PATCH 68/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 915f9d9d..effa7ae9 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 915f9d9d85d3104aeb4dd2fa1b7d9f023b3c43ed +Subproject commit effa7ae91ee9ab13c949064ff24ffa7f3379f1e7 From 30950f129ec9cedcebf81ed57994950bf9039131 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 4 Jul 2023 08:54:40 +0000 Subject: [PATCH 69/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index effa7ae9..451d187c 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit effa7ae91ee9ab13c949064ff24ffa7f3379f1e7 +Subproject commit 451d187c1784ee2cb6e5e6d7bc32bce5e1c04f89 From 446114acc3524ae9356c2a1ffd2475e2de5672d5 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 4 Jul 2023 08:54:56 +0000 Subject: [PATCH 70/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 451d187c..16e38a33 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 451d187c1784ee2cb6e5e6d7bc32bce5e1c04f89 +Subproject commit 16e38a33d183d0afb0ca0d0d1a311090e9c93be7 From 9a7ce0b04848348b10ef90329c904efa6e72cac4 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 4 Jul 2023 08:55:21 +0000 Subject: [PATCH 71/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 16e38a33..76810b63 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 16e38a33d183d0afb0ca0d0d1a311090e9c93be7 +Subproject commit 76810b63f8bdf829d9b36133e961ea6c14967e8a From 2d55976cb4711c327340d2a439a68691ed870aac Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 5 Jul 2023 06:40:30 +0000 Subject: [PATCH 72/95] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88182091..150f5494 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (11) +### New Rules (12) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -22,6 +22,7 @@ - host-interaction/hardware/enumerate-devices-by-category @mr-tz - host-interaction/service/continue-service @mr-tz - host-interaction/service/pause-service @mr-tz +- persistence/exchange/act-as-exchange-transport-agent jakub.jozwiak@mandiant.com - ### Bug Fixes diff --git a/README.md b/README.md index 15a5e096..fc744088 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-802-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-803-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index cb3bc24e..71450724 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit cb3bc24e7a33ffdecd74d85506da43eeb229b6c5 +Subproject commit 71450724d331a5bcc57bf3d8c5dd950f72c8c2cd From 1e258c3bc263ababd79263e7df543d016a329f4c Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 5 Jul 2023 06:41:20 +0000 Subject: [PATCH 73/95] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 150f5494..d1c23988 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (12) +### New Rules (14) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -23,6 +23,7 @@ - host-interaction/service/continue-service @mr-tz - host-interaction/service/pause-service @mr-tz - persistence/exchange/act-as-exchange-transport-agent jakub.jozwiak@mandiant.com +- host-interaction/file-system/create-virtual-file-system-in-dotnet jakub.jozwiak@mandiant.com - ### Bug Fixes diff --git a/README.md b/README.md index fc744088..7d73da68 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-803-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-804-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 71450724..b46b6b26 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 71450724d331a5bcc57bf3d8c5dd950f72c8c2cd +Subproject commit b46b6b2687b9395dfa4e66ff5001122b8fed510d From 0c3c5e42ffacc632cf6cecfe0d9be323e1e62aed Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 5 Jul 2023 06:41:40 +0000 Subject: [PATCH 74/95] Sync capa rules submodule --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1c23988..5aa96cb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (14) +### New Rules (15) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -24,6 +24,7 @@ - host-interaction/service/pause-service @mr-tz - persistence/exchange/act-as-exchange-transport-agent jakub.jozwiak@mandiant.com - host-interaction/file-system/create-virtual-file-system-in-dotnet jakub.jozwiak@mandiant.com +- compiler/cx_freeze/compiled-with-cx_freeze @mr-tz jakub.jozwiak@mandiant.com - ### Bug Fixes From 0a74eb671f1fedae9aa885fe50eeea1b90a50faa Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 5 Jul 2023 06:58:23 +0000 Subject: [PATCH 75/95] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5aa96cb2..a0b973cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (15) +### New Rules (16) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -25,6 +25,7 @@ - persistence/exchange/act-as-exchange-transport-agent jakub.jozwiak@mandiant.com - host-interaction/file-system/create-virtual-file-system-in-dotnet jakub.jozwiak@mandiant.com - compiler/cx_freeze/compiled-with-cx_freeze @mr-tz jakub.jozwiak@mandiant.com +- communication/socket/create-vmci-socket jakub.jozwiak@mandiant.com - ### Bug Fixes diff --git a/README.md b/README.md index 7d73da68..b1d5a1bd 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-804-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-805-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index b46b6b26..6b449aa9 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit b46b6b2687b9395dfa4e66ff5001122b8fed510d +Subproject commit 6b449aa96f0e737dc0ed70c5f61ed5836c5f68f9 From 16ce6a5ef243adc132a483f897e761ca09d37f9e Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 5 Jul 2023 08:57:27 +0000 Subject: [PATCH 76/95] Sync capa rules submodule --- CHANGELOG.md | 5 ++++- README.md | 2 +- rules | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0b973cc..0ecf391b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (16) +### New Rules (19) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -26,6 +26,9 @@ - host-interaction/file-system/create-virtual-file-system-in-dotnet jakub.jozwiak@mandiant.com - compiler/cx_freeze/compiled-with-cx_freeze @mr-tz jakub.jozwiak@mandiant.com - communication/socket/create-vmci-socket jakub.jozwiak@mandiant.com +- persistence/office/act-as-excel-xll-add-in jakub.jozwiak@mandiant.com +- persistence/office/act-as-office-com-add-in jakub.jozwiak@mandiant.com +- persistence/office/act-as-word-wll-add-in jakub.jozwiak@mandiant.com - ### Bug Fixes diff --git a/README.md b/README.md index b1d5a1bd..2458b9b5 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-805-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-808-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 6b449aa9..e541c244 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 6b449aa96f0e737dc0ed70c5f61ed5836c5f68f9 +Subproject commit e541c2444fa294452e0f908cdebb5f094495ad8c From a6763d8882540af6a128915a8cb38d248a66b762 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 5 Jul 2023 08:59:18 +0000 Subject: [PATCH 77/95] Sync capa rules submodule --- CHANGELOG.md | 3 ++- rules | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ecf391b..12a29261 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (19) +### New Rules (20) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -29,6 +29,7 @@ - persistence/office/act-as-excel-xll-add-in jakub.jozwiak@mandiant.com - persistence/office/act-as-office-com-add-in jakub.jozwiak@mandiant.com - persistence/office/act-as-word-wll-add-in jakub.jozwiak@mandiant.com +- anti-analysis/anti-debugging/debugger-evasion/hide-thread-from-debugger michael.hunhoff@mandiant.com jakub.jozwiak@mandiant.com - ### Bug Fixes diff --git a/rules b/rules index e541c244..76eccb54 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit e541c2444fa294452e0f908cdebb5f094495ad8c +Subproject commit 76eccb548b502f83522d885c93256bfcd91ccc79 From 1a2e034ee03a29ae45e4253224717b81ade204e6 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Sat, 24 Jun 2023 10:31:14 +0200 Subject: [PATCH 78/95] update data via script --- CHANGELOG.md | 1 + scripts/linter-data.json | 25 ++++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12a29261..2e78e731 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ ### capa explorer IDA Pro plugin ### Development +- update ATT&CK/MBC data for linting #1568 @mr-tz ### Raw diffs - [capa v5.1.0...master](https://github.com/mandiant/capa/compare/v5.1.0...master) diff --git a/scripts/linter-data.json b/scripts/linter-data.json index 5b9eb2ab..3be54c62 100644 --- a/scripts/linter-data.json +++ b/scripts/linter-data.json @@ -54,6 +54,7 @@ "T1583.005": "Acquire Infrastructure::Botnet", "T1583.006": "Acquire Infrastructure::Web Services", "T1583.007": "Acquire Infrastructure::Serverless", + "T1583.008": "Acquire Infrastructure::Malvertising", "T1584": "Compromise Infrastructure", "T1584.001": "Compromise Infrastructure::Domains", "T1584.002": "Compromise Infrastructure::DNS Server", @@ -88,7 +89,8 @@ "T1608.003": "Stage Capabilities::Install Digital Certificate", "T1608.004": "Stage Capabilities::Drive-by Target", "T1608.005": "Stage Capabilities::Link Target", - "T1608.006": "Stage Capabilities::SEO Poisoning" + "T1608.006": "Stage Capabilities::SEO Poisoning", + "T1650": "Acquire Access" }, "Initial Access": { "T1078": "Valid Accounts", @@ -128,6 +130,7 @@ "T1059.006": "Command and Scripting Interpreter::Python", "T1059.007": "Command and Scripting Interpreter::JavaScript", "T1059.008": "Command and Scripting Interpreter::Network Device CLI", + "T1059.009": "Command and Scripting Interpreter::Cloud API", "T1072": "Software Deployment Tools", "T1106": "Native API", "T1129": "Shared Modules", @@ -145,7 +148,8 @@ "T1569.002": "System Services::Service Execution", "T1609": "Container Administration Command", "T1610": "Deploy Container", - "T1648": "Serverless Execution" + "T1648": "Serverless Execution", + "T1651": "Cloud Administration Command" }, "Persistence": { "T1037": "Boot or Logon Initialization Scripts", @@ -247,6 +251,7 @@ "T1556.005": "Modify Authentication Process::Reversible Encryption", "T1556.006": "Modify Authentication Process::Multi-Factor Authentication", "T1556.007": "Modify Authentication Process::Hybrid Identity", + "T1556.008": "Modify Authentication Process::Network Provider DLL", "T1574": "Hijack Execution Flow", "T1574.001": "Hijack Execution Flow::DLL Search Order Hijacking", "T1574.002": "Hijack Execution Flow::DLL Side-Loading", @@ -372,6 +377,8 @@ "T1027.007": "Obfuscated Files or Information::Dynamic API Resolution", "T1027.008": "Obfuscated Files or Information::Stripped Payloads", "T1027.009": "Obfuscated Files or Information::Embedded Payloads", + "T1027.010": "Obfuscated Files or Information::Command Obfuscation", + "T1027.011": "Obfuscated Files or Information::Fileless Storage", "T1036": "Masquerading", "T1036.001": "Masquerading::Invalid Code Signature", "T1036.002": "Masquerading::Right-to-Left Override", @@ -380,6 +387,7 @@ "T1036.005": "Masquerading::Match Legitimate Name or Location", "T1036.006": "Masquerading::Space after Filename", "T1036.007": "Masquerading::Double File Extension", + "T1036.008": "Masquerading::Masquerade File Type", "T1055": "Process Injection", "T1055.001": "Process Injection::Dynamic-link Library Injection", "T1055.002": "Process Injection::Portable Executable Injection", @@ -487,6 +495,7 @@ "T1556.005": "Modify Authentication Process::Reversible Encryption", "T1556.006": "Modify Authentication Process::Multi-Factor Authentication", "T1556.007": "Modify Authentication Process::Hybrid Identity", + "T1556.008": "Modify Authentication Process::Network Provider DLL", "T1562": "Impair Defenses", "T1562.001": "Impair Defenses::Disable or Modify Tools", "T1562.002": "Impair Defenses::Disable Windows Event Logging", @@ -497,6 +506,7 @@ "T1562.008": "Impair Defenses::Disable Cloud Logs", "T1562.009": "Impair Defenses::Safe Mode Boot", "T1562.010": "Impair Defenses::Downgrade Attack", + "T1562.011": "Impair Defenses::Spoof Security Alerting", "T1564": "Hide Artifacts", "T1564.001": "Hide Artifacts::Hidden Files and Directories", "T1564.002": "Hide Artifacts::Hidden Users", @@ -574,6 +584,7 @@ "T1552.005": "Unsecured Credentials::Cloud Instance Metadata API", "T1552.006": "Unsecured Credentials::Group Policy Preferences", "T1552.007": "Unsecured Credentials::Container API", + "T1552.008": "Unsecured Credentials::Chat Messages", "T1555": "Credentials from Password Stores", "T1555.001": "Credentials from Password Stores::Keychain", "T1555.002": "Credentials from Password Stores::Securityd Memory", @@ -588,6 +599,7 @@ "T1556.005": "Modify Authentication Process::Reversible Encryption", "T1556.006": "Modify Authentication Process::Multi-Factor Authentication", "T1556.007": "Modify Authentication Process::Hybrid Identity", + "T1556.008": "Modify Authentication Process::Network Provider DLL", "T1557": "Adversary-in-the-Middle", "T1557.001": "Adversary-in-the-Middle::LLMNR/NBT-NS Poisoning and SMB Relay", "T1557.002": "Adversary-in-the-Middle::ARP Cache Poisoning", @@ -630,7 +642,7 @@ "T1124": "System Time Discovery", "T1135": "Network Share Discovery", "T1201": "Password Policy Discovery", - "T1217": "Browser Bookmark Discovery", + "T1217": "Browser Information Discovery", "T1482": "Domain Trust Discovery", "T1497": "Virtualization/Sandbox Evasion", "T1497.001": "Virtualization/Sandbox Evasion::System Checks", @@ -646,7 +658,8 @@ "T1614.001": "System Location Discovery::System Language Discovery", "T1615": "Group Policy Discovery", "T1619": "Cloud Storage Object Discovery", - "T1622": "Debugger Evasion" + "T1622": "Debugger Evasion", + "T1652": "Device Driver Discovery" }, "Lateral Movement": { "T1021": "Remote Services", @@ -656,6 +669,7 @@ "T1021.004": "Remote Services::SSH", "T1021.005": "Remote Services::VNC", "T1021.006": "Remote Services::Windows Remote Management", + "T1021.007": "Remote Services::Cloud Services", "T1072": "Software Deployment Tools", "T1080": "Taint Shared Content", "T1091": "Replication Through Removable Media", @@ -768,7 +782,8 @@ "T1537": "Transfer Data to Cloud Account", "T1567": "Exfiltration Over Web Service", "T1567.001": "Exfiltration Over Web Service::Exfiltration to Code Repository", - "T1567.002": "Exfiltration Over Web Service::Exfiltration to Cloud Storage" + "T1567.002": "Exfiltration Over Web Service::Exfiltration to Cloud Storage", + "T1567.003": "Exfiltration Over Web Service::Exfiltration to Text Storage Sites" }, "Impact": { "T1485": "Data Destruction", From bb053561ef215034282fef0de11303811a07a6fb Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 18:03:57 +0200 Subject: [PATCH 79/95] import-to-ida: decode MD5 to hex --- scripts/import-to-ida.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/import-to-ida.py b/scripts/import-to-ida.py index 058c2553..c0f788c6 100644 --- a/scripts/import-to-ida.py +++ b/scripts/import-to-ida.py @@ -28,6 +28,7 @@ Unless required by applicable law or agreed to in writing, software distributed is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ +import binascii import json import logging @@ -77,7 +78,7 @@ def main(): # # see: https://github.com/idapython/bin/issues/11 a = doc["meta"]["sample"]["md5"].lower() - b = ida_nalt.retrieve_input_file_md5().lower() + b = binascii.hexlify(ida_nalt.retrieve_input_file_md5()).decode("ascii").lower() if not a.startswith(b): logger.error("sample mismatch") return -2 From 169b3d60a81ff0ac8fc7be1ef2d1c67ea3292678 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 18:04:15 +0200 Subject: [PATCH 80/95] import-to-ida: update to use v5 JSON format closes #1584 --- scripts/import-to-ida.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/import-to-ida.py b/scripts/import-to-ida.py index c0f788c6..fd762f58 100644 --- a/scripts/import-to-ida.py +++ b/scripts/import-to-ida.py @@ -94,8 +94,11 @@ def main(): name = rule["meta"]["name"] ns = rule["meta"].get("namespace", "") - for va in rule["matches"].keys(): - va = int(va) + for address, match in rule["matches"]: + if address["type"] != "absolute": + continue + + va = address["value"] rows.append((ns, name, va)) # order by (namespace, name) so that like things show up together From 19a5ef8a644327f2fe436a73ccea390adaf95c6e Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 18:21:03 +0200 Subject: [PATCH 81/95] import-to-ida: use existing result document json parser --- scripts/import-to-ida.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/scripts/import-to-ida.py b/scripts/import-to-ida.py index fd762f58..8b9b3471 100644 --- a/scripts/import-to-ida.py +++ b/scripts/import-to-ida.py @@ -29,13 +29,16 @@ Unless required by applicable law or agreed to in writing, software distributed See the License for the specific language governing permissions and limitations under the License. """ import binascii -import json import logging import ida_nalt import ida_funcs import ida_kernwin +import capa.rules +import capa.features.freeze +import capa.render.result_document + logger = logging.getLogger("capa") @@ -65,40 +68,37 @@ def main(): if not path: return 0 - with open(path, "rb") as f: - doc = json.loads(f.read().decode("utf-8")) - - if "meta" not in doc or "rules" not in doc: - logger.error("doesn't appear to be a capa report") - return -1 + result_doc = capa.render.result_document.ResultDocument.parse_file(path) + meta, capabilities = result_doc.to_capa() # in IDA 7.4, the MD5 hash may be truncated, for example: # wanted: 84882c9d43e23d63b82004fae74ebb61 # found: b'84882C9D43E23D63B82004FAE74EBB6\x00' # # see: https://github.com/idapython/bin/issues/11 - a = doc["meta"]["sample"]["md5"].lower() + a = meta["sample"]["md5"].lower() b = binascii.hexlify(ida_nalt.retrieve_input_file_md5()).decode("ascii").lower() if not a.startswith(b): logger.error("sample mismatch") return -2 rows = [] - for rule in doc["rules"].values(): - if rule["meta"].get("lib"): + for name in capabilities.keys(): + rule = result_doc.rules[name] + if rule.meta.lib: continue - if rule["meta"].get("capa/subscope"): + if rule.meta.is_subscope_rule: continue - if rule["meta"]["scope"] != "function": + if rule.meta.scope != capa.rules.Scope.FUNCTION: continue - name = rule["meta"]["name"] - ns = rule["meta"].get("namespace", "") - for address, match in rule["matches"]: - if address["type"] != "absolute": + ns = rule.meta.namespace + + for address, _ in rule.matches: + if address.type != capa.features.freeze.AddressType.ABSOLUTE: continue - va = address["value"] + va = address.value rows.append((ns, name, va)) # order by (namespace, name) so that like things show up together From 694143ce6bf31eca8669c05ef51262d29a97aad3 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 18:24:37 +0200 Subject: [PATCH 82/95] import-to-ida: use Metadata type not json document --- scripts/import-to-ida.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/import-to-ida.py b/scripts/import-to-ida.py index 8b9b3471..42c56445 100644 --- a/scripts/import-to-ida.py +++ b/scripts/import-to-ida.py @@ -28,8 +28,8 @@ Unless required by applicable law or agreed to in writing, software distributed is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ -import binascii import logging +import binascii import ida_nalt import ida_funcs @@ -76,7 +76,7 @@ def main(): # found: b'84882C9D43E23D63B82004FAE74EBB6\x00' # # see: https://github.com/idapython/bin/issues/11 - a = meta["sample"]["md5"].lower() + a = meta.sample.md5.lower() b = binascii.hexlify(ida_nalt.retrieve_input_file_md5()).decode("ascii").lower() if not a.startswith(b): logger.error("sample mismatch") From bf5b2612c89acfe6ae5923a610cf1bc9c1a3851b Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 18:27:20 +0200 Subject: [PATCH 83/95] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9334beea..23294012 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ - Add logging and print redirect to tqdm for capa main [#749](https://github.com/mandiant/capa/issues/749) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - extractor: fix binja installation path detection does not work with Python 3.11 - tests: refine the IDA test runner script #1513 @williballenthin +- import-to-ida: fix bug introduced with JSON report changes in v5 #1584 @williballenthin ### capa explorer IDA Pro plugin From 5d1e26a95e477ab125743d3fd28134b3a249a42a Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 18:34:41 +0200 Subject: [PATCH 84/95] update minimum supported python version to 3.8 --- .github/workflows/publish.yml | 2 +- .github/workflows/tests.yml | 4 ++-- CHANGELOG.md | 1 + capa/helpers.py | 2 +- capa/ida/plugin/README.md | 4 ++-- capa/main.py | 4 ++-- setup.py | 2 +- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 520e0894..002a7095 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -15,7 +15,7 @@ jobs: - name: Set up Python uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0 with: - python-version: '3.7' + python-version: '3.8' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 64475f65..b6db661b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -69,7 +69,7 @@ jobs: matrix: os: [ubuntu-20.04, windows-2019, macos-11] # across all operating systems - python-version: ["3.7", "3.11"] + python-version: ["3.8", "3.11"] include: # on Ubuntu run these as well - os: ubuntu-20.04 @@ -104,7 +104,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.11"] + python-version: ["3.8", "3.11"] steps: - name: Checkout capa with submodules # do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118 diff --git a/CHANGELOG.md b/CHANGELOG.md index 9334beea..a001f47f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat +- Python 3.8 is now the minimum supported Python version #1578 @williballenthin ### New Rules (21) diff --git a/capa/helpers.py b/capa/helpers.py index c03e0553..38bd2d56 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -155,7 +155,7 @@ def log_unsupported_runtime_error(): logger.error("-" * 80) logger.error(" Unsupported runtime or Python interpreter.") logger.error(" ") - logger.error(" capa supports running under Python 3.7 and higher.") + logger.error(" capa supports running under Python 3.8 and higher.") logger.error(" ") logger.error( " If you're seeing this message on the command line, please ensure you're running a supported Python version." diff --git a/capa/ida/plugin/README.md b/capa/ida/plugin/README.md index 6dd07002..4bf3616c 100644 --- a/capa/ida/plugin/README.md +++ b/capa/ida/plugin/README.md @@ -95,7 +95,7 @@ can update using the `Settings` button. ### Requirements -capa explorer supports Python versions >= 3.7.x and IDA Pro versions >= 7.4. The following IDA Pro versions have been tested: +capa explorer supports Python versions >= 3.8.x and IDA Pro versions >= 7.4. The following IDA Pro versions have been tested: * IDA 7.4 * IDA 7.5 @@ -105,7 +105,7 @@ capa explorer supports Python versions >= 3.7.x and IDA Pro versions >= 7.4. The * IDA 8.1 * IDA 8.2 -capa explorer is however limited to the Python versions supported by your IDA installation (which may not include all Python versions >= 3.7.x). +capa explorer is however limited to the Python versions supported by your IDA installation (which may not include all Python versions >= 3.8.x). If you encounter issues with your specific setup, please open a new [Issue](https://github.com/mandiant/capa/issues). diff --git a/capa/main.py b/capa/main.py index bdf0cec3..64b0509b 100644 --- a/capa/main.py +++ b/capa/main.py @@ -1072,8 +1072,8 @@ def handle_common_args(args): def main(argv=None): - if sys.version_info < (3, 7): - raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.7+") + if sys.version_info < (3, 8): + raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+") if argv is None: argv = sys.argv[1:] diff --git a/setup.py b/setup.py index 4a67c68c..52ad0a43 100644 --- a/setup.py +++ b/setup.py @@ -107,5 +107,5 @@ setuptools.setup( "Programming Language :: Python :: 3", "Topic :: Security", ], - python_requires=">=3.7", + python_requires=">=3.8", ) From dd2d5431a916e21eae99ea3d65cbfee0c449f77f Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 18:44:12 +0200 Subject: [PATCH 85/95] setup: bump networkx to 3.1 since we now have python 3.8 as min version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 52ad0a43..85ce8ec0 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ requirements = [ "ida-settings==2.1.0", "viv-utils[flirt]==0.7.9", "halo==0.0.31", - "networkx==2.5.1", # newer versions no longer support py3.7. + "networkx==3.1", "ruamel.yaml==0.17.32", "vivisect==1.1.1", "pefile==2023.2.7", From 6a767600338a567ffcfc719c46b98e5fc51f0889 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 18:55:32 +0200 Subject: [PATCH 86/95] render: use fancy boxes closes #1586 --- capa/render/default.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/capa/render/default.py b/capa/render/default.py index 76659252..1bf1cf6d 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -40,7 +40,7 @@ def render_meta(doc: rd.ResultDocument, ostream: StringIO): ("path", doc.meta.sample.path), ] - ostream.write(tabulate.tabulate(rows, tablefmt="psql")) + ostream.write(tabulate.tabulate(rows, tablefmt="mixed_outline")) ostream.write("\n") @@ -102,7 +102,7 @@ def render_capabilities(doc: rd.ResultDocument, ostream: StringIO): if rows: ostream.write( - tabulate.tabulate(rows, headers=[width("CAPABILITY", 50), width("NAMESPACE", 50)], tablefmt="psql") + tabulate.tabulate(rows, headers=[width("Capability", 50), width("Namespace", 50)], tablefmt="mixed_outline") ) ostream.write("\n") else: @@ -148,7 +148,7 @@ def render_attack(doc: rd.ResultDocument, ostream: StringIO): if rows: ostream.write( tabulate.tabulate( - rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 80)], tablefmt="psql" + rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 80)], tablefmt="mixed_grid" ) ) ostream.write("\n") @@ -190,7 +190,7 @@ def render_mbc(doc: rd.ResultDocument, ostream: StringIO): if rows: ostream.write( - tabulate.tabulate(rows, headers=[width("MBC Objective", 25), width("MBC Behavior", 75)], tablefmt="psql") + tabulate.tabulate(rows, headers=[width("MBC Objective", 25), width("MBC Behavior", 75)], tablefmt="mixed_grid") ) ostream.write("\n") From 2b6cc6fee2dfe7034a39a89d9f881abb4574f5ab Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 18:57:37 +0200 Subject: [PATCH 87/95] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9334beea..ebc389e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New Features - Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04) +- use fancy box drawing characters for default output #1586 @williballenthin ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat From 23ed0a5d9ddf88421e79c10a3928ed342afab30e Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 19:06:33 +0200 Subject: [PATCH 88/95] main: don't leave behind traces of the progress bar --- CHANGELOG.md | 1 + capa/main.py | 2 +- scripts/lint.py | 2 +- scripts/profile-time.py | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9334beea..8ef51d71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ - Add logging and print redirect to tqdm for capa main [#749](https://github.com/mandiant/capa/issues/749) [@Aayush-Goel-04](https://github.com/aayush-goel-04) - extractor: fix binja installation path detection does not work with Python 3.11 - tests: refine the IDA test runner script #1513 @williballenthin +- output: don't leave behind traces of progress bar @williballenthin ### capa explorer IDA Pro plugin diff --git a/capa/main.py b/capa/main.py index bdf0cec3..af52b82d 100644 --- a/capa/main.py +++ b/capa/main.py @@ -262,7 +262,7 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro functions = list(extractor.get_functions()) n_funcs = len(functions) - pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions") + pb = pbar(functions, desc="matching", unit=" functions", postfix="skipped 0 library functions", leave=False) for f in pb: if extractor.is_library_function(f.address): function_name = extractor.get_function_name(f.address) diff --git a/scripts/lint.py b/scripts/lint.py index a80d3e12..8348cdea 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -873,7 +873,7 @@ def lint(ctx: Context): ret = {} source_rules = [rule for rule in ctx.rules.rules.values() if not rule.is_subscope_rule()] - with tqdm.contrib.logging.tqdm_logging_redirect(source_rules, unit="rule") as pbar: + with tqdm.contrib.logging.tqdm_logging_redirect(source_rules, unit="rule", leave=False) as pbar: with capa.helpers.redirecting_print_to_tqdm(False): for rule in pbar: name = rule.name diff --git a/scripts/profile-time.py b/scripts/profile-time.py index 09d125d8..7ce28962 100644 --- a/scripts/profile-time.py +++ b/scripts/profile-time.py @@ -109,7 +109,7 @@ def main(argv=None): args.sample, args.format, args.os, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False ) - with tqdm.tqdm(total=args.number * args.repeat) as pbar: + with tqdm.tqdm(total=args.number * args.repeat, leave=False) as pbar: def do_iteration(): capa.perf.reset() From 9bcd7678a43210fc76485c7177ecb122ba1730d6 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 19:14:15 +0200 Subject: [PATCH 89/95] main: fix console output on windows (in CI) --- capa/main.py | 16 ++++++++++++++++ setup.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/capa/main.py b/capa/main.py index bdf0cec3..b8730326 100644 --- a/capa/main.py +++ b/capa/main.py @@ -8,6 +8,7 @@ Unless required by applicable law or agreed to in writing, software distributed is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ +import io import os import sys import time @@ -997,6 +998,21 @@ def handle_common_args(args): codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None) + if isinstance(sys.stdout, io.TextIOWrapper) or hasattr(sys.stdout, "reconfigure"): + # from sys.stdout type hint: + # + # TextIO is used instead of more specific types for the standard streams, + # since they are often monkeypatched at runtime. At startup, the objects + # are initialized to instances of TextIOWrapper. + # + # To use methods from TextIOWrapper, use an isinstance check to ensure that + # the streams have not been overridden: + # + # if isinstance(sys.stdout, io.TextIOWrapper): + # sys.stdout.reconfigure(...) + sys.stdout.reconfigure(encoding="utf-8") + colorama.just_fix_windows_console() + if args.color == "always": colorama.init(strip=False) elif args.color == "auto": diff --git a/setup.py b/setup.py index 4a67c68c..f68e64d9 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ requirements = [ "tqdm==4.65.0", "pyyaml==6.0", "tabulate==0.9.0", - "colorama==0.4.5", + "colorama==0.4.6", "termcolor==2.3.0", "wcwidth==0.2.6", "ida-settings==2.1.0", From ba8040ace5b538168cb17e1e070dbc4b4cd108ba Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 19:15:33 +0200 Subject: [PATCH 90/95] main: remove old codec registration for py3.7 --- capa/main.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/capa/main.py b/capa/main.py index 64b0509b..e2470d19 100644 --- a/capa/main.py +++ b/capa/main.py @@ -990,13 +990,6 @@ def handle_common_args(args): # disable vivisect-related logging, it's verbose and not relevant for capa users set_vivisect_log_level(logging.CRITICAL) - # Since Python 3.8 cp65001 is an alias to utf_8, but not for Python < 3.8 - # TODO: remove this code when only supporting Python 3.8+ - # https://stackoverflow.com/a/3259271/87207 - import codecs - - codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None) - if args.color == "always": colorama.init(strip=False) elif args.color == "auto": From b5a063b0d982be78d337cb819b59787d0c369c8c Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 19:19:26 +0200 Subject: [PATCH 91/95] pep8 --- capa/render/default.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/capa/render/default.py b/capa/render/default.py index 1bf1cf6d..15e2a5e8 100644 --- a/capa/render/default.py +++ b/capa/render/default.py @@ -190,7 +190,9 @@ def render_mbc(doc: rd.ResultDocument, ostream: StringIO): if rows: ostream.write( - tabulate.tabulate(rows, headers=[width("MBC Objective", 25), width("MBC Behavior", 75)], tablefmt="mixed_grid") + tabulate.tabulate( + rows, headers=[width("MBC Objective", 25), width("MBC Behavior", 75)], tablefmt="mixed_grid" + ) ) ostream.write("\n") From 2e27745b5f481952ff1aa7b8654adf6683d74942 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 5 Jul 2023 19:30:55 +0200 Subject: [PATCH 92/95] setup: bump mypy hints for colorama --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f68e64d9..872c277c 100644 --- a/setup.py +++ b/setup.py @@ -84,7 +84,7 @@ setuptools.setup( "mypy-protobuf==3.4.0", # type stubs for mypy "types-backports==0.1.3", - "types-colorama==0.4.15", + "types-colorama==0.4.15.11", "types-PyYAML==6.0.8", "types-tabulate==0.9.0.1", "types-termcolor==1.1.4", From 49ffbdd54d7d331dea3dc259f964d08cb86e0248 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Thu, 6 Jul 2023 08:04:33 +0000 Subject: [PATCH 93/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 76810b63..c2c61f05 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 76810b63f8bdf829d9b36133e961ea6c14967e8a +Subproject commit c2c61f05fbd8a7c3a6d5283dd05289507e0cbc2e From b57188e98c94b90b2b9ce47d3081fb8fd6870dc9 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Thu, 6 Jul 2023 08:17:32 +0000 Subject: [PATCH 94/95] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e78e731..9334beea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Breaking Changes - Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat -### New Rules (20) +### New Rules (21) - load-code/shellcode/execute-shellcode-via-windows-callback-function ervin.ocampo@mandiant.com jakub.jozwiak@mandiant.com - nursery/execute-shellcode-via-indirect-call ronnie.salomonsen@mandiant.com @@ -30,6 +30,7 @@ - persistence/office/act-as-office-com-add-in jakub.jozwiak@mandiant.com - persistence/office/act-as-word-wll-add-in jakub.jozwiak@mandiant.com - anti-analysis/anti-debugging/debugger-evasion/hide-thread-from-debugger michael.hunhoff@mandiant.com jakub.jozwiak@mandiant.com +- host-interaction/memory/create-new-application-domain-in-dotnet jakub.jozwiak@mandiant.com - ### Bug Fixes diff --git a/README.md b/README.md index 2458b9b5..cd748a5d 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-808-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-809-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 76eccb54..f109d758 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 76eccb548b502f83522d885c93256bfcd91ccc79 +Subproject commit f109d758ced8235892da97a5cfe31bcd6b09a4fa From 46ff798faed8feabb1722624266751442fc33be0 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Thu, 6 Jul 2023 09:26:23 +0000 Subject: [PATCH 95/95] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index c2c61f05..bc0c0fe2 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit c2c61f05fbd8a7c3a6d5283dd05289507e0cbc2e +Subproject commit bc0c0fe29a445be7da2a45c40e59cb9ad14651ec