From 8527d02dc8ea1aa1e798db808f4cc2b581df6761 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 6 Dec 2022 15:37:31 +0000 Subject: [PATCH 01/74] pylint fixes --- capa/features/common.py | 6 +++--- capa/features/insn.py | 1 + capa/main.py | 5 ++--- capa/optimizer.py | 2 +- scripts/show-features.py | 3 ++- setup.cfg | 7 +++++++ 6 files changed, 16 insertions(+), 8 deletions(-) diff --git a/capa/features/common.py b/capa/features/common.py index 67c9ed0d..a939f16e 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -11,7 +11,7 @@ import abc import codecs import logging import collections -from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional, Sequence +from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional if TYPE_CHECKING: # circular import, otherwise @@ -279,12 +279,12 @@ class Regex(String): flags |= re.IGNORECASE try: self.re = re.compile(pat, flags) - except re.error: + except re.error as exc: if value.endswith("/i"): value = value[: -len("i")] raise ValueError( "invalid regular expression: %s it should use Python syntax, try it at https://pythex.org" % value - ) + ) from exc def evaluate(self, ctx, short_circuit=True): capa.perf.counters["evaluate.feature"] += 1 diff --git a/capa/features/insn.py b/capa/features/insn.py index 50dd6133..7d01b7c6 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -72,6 +72,7 @@ class Offset(Feature): super(Offset, self).__init__(value, description=description) def get_value_str(self): + assert isinstance(self.value, int) return hex(self.value) diff --git a/capa/main.py b/capa/main.py index 373172b2..e9a2b728 100644 --- a/capa/main.py +++ b/capa/main.py @@ -461,6 +461,7 @@ def get_workspace(path, format_, sigpaths): # lazy import enables us to not require viv if user wants SMDA, for example. import viv_utils + import viv_utils.flirt logger.debug("generating vivisect workspace for: %s", path) # TODO should not be auto at this point, anymore @@ -1143,9 +1144,7 @@ def main(argv=None): def ida_main(): - import capa.rules import capa.ida.helpers - import capa.render.default import capa.features.extractors.ida.extractor logging.basicConfig(level=logging.INFO) @@ -1166,7 +1165,7 @@ def ida_main(): rules_path = os.path.join(get_default_root(), "rules") logger.debug("rule path: %s", rules_path) - rules = get_rules(rules_path) + rules = get_rules([rules_path]) rules = capa.rules.RuleSet(rules) meta = capa.ida.helpers.collect_metadata([rules_path]) diff --git a/capa/optimizer.py b/capa/optimizer.py index 0408bf07..997abd6c 100644 --- a/capa/optimizer.py +++ b/capa/optimizer.py @@ -47,7 +47,7 @@ def optimize_statement(statement): if isinstance(statement, (ceng.And, ceng.Or, ceng.Some)): # has .children - statement.children = sorted(statement.children, key=lambda n: get_node_cost(n)) + statement.children = sorted(statement.children, key=get_node_cost) return elif isinstance(statement, (ceng.Not, ceng.Range)): # has .child diff --git a/scripts/show-features.py b/scripts/show-features.py index f07dcf75..00c1eb05 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -79,6 +79,7 @@ import capa.exceptions import capa.render.verbose as v import capa.features.common import capa.features.freeze +import capa.features.address import capa.features.extractors.base_extractor from capa.helpers import log_unsupported_runtime_error @@ -108,7 +109,7 @@ def main(argv=None): try: sig_paths = capa.main.get_signatures(args.signatures) - except (IOError) as e: + except IOError as e: logger.error("%s", str(e)) return -1 diff --git a/setup.cfg b/setup.cfg index 5e0292f4..87eef850 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,3 +19,10 @@ test = pytest ignore = E203, E302, E402, E501, E712, E722, E731, W291, W503 max-line-length = 180 statistics = True + + +[pylint.FORMAT] +max-line-length = 180 + +[pylint] +disable = missing-docstring,invalid-name,import-outside-toplevel,redefined-outer-name,consider-using-f-string \ No newline at end of file From 1caeb248ca36940de663b50a81edc0a06d462baa Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 6 Dec 2022 16:02:21 +0000 Subject: [PATCH 02/74] pylint: fix old-style super calls --- capa/engine.py | 14 ++++----- capa/features/basicblock.py | 2 +- capa/features/common.py | 30 ++++++++++---------- capa/features/extractors/base_extractor.py | 2 +- capa/features/extractors/dnfile/extractor.py | 2 +- capa/features/extractors/dnfile_.py | 2 +- capa/features/extractors/dotnetfile.py | 2 +- capa/features/extractors/elffile.py | 2 +- capa/features/extractors/ida/extractor.py | 2 +- capa/features/extractors/pefile.py | 2 +- capa/features/extractors/smda/extractor.py | 2 +- capa/features/extractors/viv/extractor.py | 2 +- capa/features/file.py | 8 +++--- capa/features/insn.py | 18 ++++++------ capa/ida/helpers.py | 2 +- capa/ida/plugin/__init__.py | 2 +- capa/ida/plugin/form.py | 14 ++++----- capa/ida/plugin/hooks.py | 2 +- capa/ida/plugin/item.py | 28 ++++++++---------- capa/ida/plugin/model.py | 2 +- capa/ida/plugin/proxy.py | 4 +-- capa/ida/plugin/view.py | 16 +++++------ capa/rules.py | 28 +++++++++--------- scripts/lint.py | 2 +- scripts/setup-linter-dependencies.py | 2 +- 25 files changed, 95 insertions(+), 97 deletions(-) diff --git a/capa/engine.py b/capa/engine.py index 0b45dc06..bd26f454 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -38,7 +38,7 @@ class Statement: """ def __init__(self, description=None): - super(Statement, self).__init__() + super().__init__() self.name = self.__class__.__name__ self.description = description @@ -90,7 +90,7 @@ class And(Statement): """ def __init__(self, children, description=None): - super(And, self).__init__(description=description) + super().__init__(description=description) self.children = children def evaluate(self, ctx, short_circuit=True): @@ -123,7 +123,7 @@ class Or(Statement): """ def __init__(self, children, description=None): - super(Or, self).__init__(description=description) + super().__init__(description=description) self.children = children def evaluate(self, ctx, short_circuit=True): @@ -150,7 +150,7 @@ class Not(Statement): """match only if the child evaluates to False.""" def __init__(self, child, description=None): - super(Not, self).__init__(description=description) + super().__init__(description=description) self.child = child def evaluate(self, ctx, short_circuit=True): @@ -172,7 +172,7 @@ class Some(Statement): """ def __init__(self, count, children, description=None): - super(Some, self).__init__(description=description) + super().__init__(description=description) self.count = count self.children = children @@ -208,7 +208,7 @@ class Range(Statement): """match if the child is contained in the ctx set with a count in the given range.""" def __init__(self, child, min=None, max=None, description=None): - super(Range, self).__init__(description=description) + super().__init__(description=description) self.child = child self.min = min if min is not None else 0 self.max = max if max is not None else (1 << 64 - 1) @@ -237,7 +237,7 @@ class Subscope(Statement): """ def __init__(self, scope, child, description=None): - super(Subscope, self).__init__(description=description) + super().__init__(description=description) self.scope = scope self.child = child diff --git a/capa/features/basicblock.py b/capa/features/basicblock.py index a7a2d15c..09f1b26d 100644 --- a/capa/features/basicblock.py +++ b/capa/features/basicblock.py @@ -11,7 +11,7 @@ from capa.features.common import Feature class BasicBlock(Feature): def __init__(self, description=None): - super(BasicBlock, self).__init__(None, description=description) + super().__init__(0, description=description) def __str__(self): return "basic block" diff --git a/capa/features/common.py b/capa/features/common.py index a939f16e..a8dca781 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -81,7 +81,7 @@ class Result: children: List["Result"], locations: Optional[Set[Address]] = None, ): - super(Result, self).__init__() + super().__init__() self.success = success self.statement = statement self.children = children @@ -110,7 +110,7 @@ class Feature(abc.ABC): value (any): the value of the feature, such as the number or string. description (str): a human-readable description that explains the feature value. """ - super(Feature, self).__init__() + super().__init__() self.name = self.__class__.__name__.lower() self.value = value @@ -165,33 +165,33 @@ class Feature(abc.ABC): class MatchedRule(Feature): def __init__(self, value: str, description=None): - super(MatchedRule, self).__init__(value, description=description) + super().__init__(value, description=description) self.name = "match" class Characteristic(Feature): def __init__(self, value: str, description=None): - super(Characteristic, self).__init__(value, description=description) + super().__init__(value, description=description) class String(Feature): def __init__(self, value: str, description=None): - super(String, self).__init__(value, description=description) + super().__init__(value, description=description) class Class(Feature): def __init__(self, value: str, description=None): - super(Class, self).__init__(value, description=description) + super().__init__(value, description=description) class Namespace(Feature): def __init__(self, value: str, description=None): - super(Namespace, self).__init__(value, description=description) + super().__init__(value, description=description) class Substring(String): def __init__(self, value: str, description=None): - super(Substring, self).__init__(value, description=description) + super().__init__(value, description=description) self.value = value def evaluate(self, ctx, short_circuit=True): @@ -253,7 +253,7 @@ class _MatchedSubstring(Substring): substring: the substring feature that matches. match: mapping from matching string to its locations. """ - super(_MatchedSubstring, self).__init__(str(substring.value), description=substring.description) + super().__init__(str(substring.value), description=substring.description) # we want this to collide with the name of `Substring` above, # so that it works nicely with the renderers. self.name = "substring" @@ -269,7 +269,7 @@ class _MatchedSubstring(Substring): class Regex(String): def __init__(self, value: str, description=None): - super(Regex, self).__init__(value, description=description) + super().__init__(value, description=description) self.value = value pat = self.value[len("/") : -len("/")] @@ -350,7 +350,7 @@ class _MatchedRegex(Regex): regex: the regex feature that matches. matches: mapping from matching string to its locations. """ - super(_MatchedRegex, self).__init__(str(regex.value), description=regex.description) + super().__init__(str(regex.value), description=regex.description) # we want this to collide with the name of `Regex` above, # so that it works nicely with the renderers. self.name = "regex" @@ -373,7 +373,7 @@ class StringFactory: class Bytes(Feature): def __init__(self, value: bytes, description=None): - super(Bytes, self).__init__(value, description=description) + super().__init__(value, description=description) self.value = value def evaluate(self, ctx, **kwargs): @@ -403,7 +403,7 @@ VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY) class Arch(Feature): def __init__(self, value: str, description=None): - super(Arch, self).__init__(value, description=description) + super().__init__(value, description=description) self.name = "arch" @@ -418,7 +418,7 @@ VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY}) class OS(Feature): def __init__(self, value: str, description=None): - super(OS, self).__init__(value, description=description) + super().__init__(value, description=description) self.name = "os" @@ -436,7 +436,7 @@ FORMAT_UNKNOWN = "unknown" class Format(Feature): def __init__(self, value: str, description=None): - super(Format, self).__init__(value, description=description) + super().__init__(value, description=description) self.name = "format" diff --git a/capa/features/extractors/base_extractor.py b/capa/features/extractors/base_extractor.py index df1f706d..3be983ed 100644 --- a/capa/features/extractors/base_extractor.py +++ b/capa/features/extractors/base_extractor.py @@ -87,7 +87,7 @@ class FeatureExtractor: # for example, the Vivisect feature extract might require the vw and/or path. # this base class doesn't know what to do with that info, though. # - super(FeatureExtractor, self).__init__() + super().__init__() @abc.abstractmethod def get_base_address(self) -> Union[AbsoluteVirtualAddress, capa.features.address._NoAddress]: diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py index 4f23a34c..b5f707c9 100644 --- a/capa/features/extractors/dnfile/extractor.py +++ b/capa/features/extractors/dnfile/extractor.py @@ -23,7 +23,7 @@ from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bo class DnfileFeatureExtractor(FeatureExtractor): def __init__(self, path: str): - super(DnfileFeatureExtractor, self).__init__() + super().__init__() self.pe: dnfile.dnPE = dnfile.dnPE(path) # pre-compute these because we'll yield them at *every* scope. diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py index 7a459bec..998ea209 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dnfile_.py @@ -60,7 +60,7 @@ GLOBAL_HANDLERS = ( class DnfileFeatureExtractor(FeatureExtractor): def __init__(self, path: str): - super(DnfileFeatureExtractor, self).__init__() + super().__init__() self.path: str = path self.pe: dnfile.dnPE = dnfile.dnPE(path) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index b9c9f00a..ef6f9f07 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -144,7 +144,7 @@ GLOBAL_HANDLERS = ( class DotnetFileFeatureExtractor(FeatureExtractor): def __init__(self, path: str): - super(DotnetFileFeatureExtractor, self).__init__() + super().__init__() self.path: str = path self.pe: dnfile.dnPE = dnfile.dnPE(path) diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 9077f97c..4810bb5f 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -110,7 +110,7 @@ GLOBAL_HANDLERS = ( class ElfFeatureExtractor(FeatureExtractor): def __init__(self, path: str): - super(ElfFeatureExtractor, self).__init__() + super().__init__() self.path = path with open(self.path, "rb") as f: self.elf = ELFFile(io.BytesIO(f.read())) diff --git a/capa/features/extractors/ida/extractor.py b/capa/features/extractors/ida/extractor.py index a09d8fe3..1a587fa6 100644 --- a/capa/features/extractors/ida/extractor.py +++ b/capa/features/extractors/ida/extractor.py @@ -23,7 +23,7 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi class IdaFeatureExtractor(FeatureExtractor): def __init__(self): - super(IdaFeatureExtractor, self).__init__() + super().__init__() self.global_features: List[Tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.ida.global_.extract_os()) self.global_features.extend(capa.features.extractors.ida.global_.extract_arch()) diff --git a/capa/features/extractors/pefile.py b/capa/features/extractors/pefile.py index e6449096..dbdf72ac 100644 --- a/capa/features/extractors/pefile.py +++ b/capa/features/extractors/pefile.py @@ -172,7 +172,7 @@ GLOBAL_HANDLERS = ( class PefileFeatureExtractor(FeatureExtractor): def __init__(self, path: str): - super(PefileFeatureExtractor, self).__init__() + super().__init__() self.path = path self.pe = pefile.PE(path) diff --git a/capa/features/extractors/smda/extractor.py b/capa/features/extractors/smda/extractor.py index 9dbfc44e..b586ba03 100644 --- a/capa/features/extractors/smda/extractor.py +++ b/capa/features/extractors/smda/extractor.py @@ -15,7 +15,7 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi class SmdaFeatureExtractor(FeatureExtractor): def __init__(self, smda_report: SmdaReport, path): - super(SmdaFeatureExtractor, self).__init__() + super().__init__() self.smda_report = smda_report self.path = path with open(self.path, "rb") as f: diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index a99f9e75..53683f66 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) class VivisectFeatureExtractor(FeatureExtractor): def __init__(self, vw, path): - super(VivisectFeatureExtractor, self).__init__() + super().__init__() self.vw = vw self.path = path with open(self.path, "rb") as f: diff --git a/capa/features/file.py b/capa/features/file.py index a9b4598e..735464c6 100644 --- a/capa/features/file.py +++ b/capa/features/file.py @@ -12,19 +12,19 @@ from capa.features.common import Feature class Export(Feature): def __init__(self, value: str, description=None): # value is export name - super(Export, self).__init__(value, description=description) + super().__init__(value, description=description) class Import(Feature): def __init__(self, value: str, description=None): # value is import name - super(Import, self).__init__(value, description=description) + super().__init__(value, description=description) class Section(Feature): def __init__(self, value: str, description=None): # value is section name - super(Section, self).__init__(value, description=description) + super().__init__(value, description=description) class FunctionName(Feature): @@ -32,7 +32,7 @@ class FunctionName(Feature): def __init__(self, name: str, description=None): # value is function name - super(FunctionName, self).__init__(name, description=description) + super().__init__(name, description=description) # override the name property set by `capa.features.Feature` # that would be `functionname` (note missing dash) self.name = "function-name" diff --git a/capa/features/insn.py b/capa/features/insn.py index 7d01b7c6..c21178b7 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -21,13 +21,13 @@ def hex(n: int) -> str: class API(Feature): def __init__(self, name: str, description=None): - super(API, self).__init__(name, description=description) + super().__init__(name, description=description) class _AccessFeature(Feature, abc.ABC): # superclass: don't use directly def __init__(self, value: str, access: Optional[str] = None, description: Optional[str] = None): - super(_AccessFeature, self).__init__(value, description=description) + super().__init__(value, description=description) if access is not None: if access not in VALID_FEATURE_ACCESS: raise ValueError("%s access type %s not valid" % (self.name, access)) @@ -47,12 +47,12 @@ class _AccessFeature(Feature, abc.ABC): class Property(_AccessFeature): def __init__(self, value: str, access: Optional[str] = None, description=None): - super(Property, self).__init__(value, access=access, description=description) + super().__init__(value, access=access, description=description) class Number(Feature): def __init__(self, value: Union[int, float], description=None): - super(Number, self).__init__(value, description=description) + super().__init__(value, description=description) def get_value_str(self): if isinstance(self.value, int): @@ -69,7 +69,7 @@ MAX_STRUCTURE_SIZE = 0x10000 class Offset(Feature): def __init__(self, value: int, description=None): - super(Offset, self).__init__(value, description=description) + super().__init__(value, description=description) def get_value_str(self): assert isinstance(self.value, int) @@ -78,7 +78,7 @@ class Offset(Feature): class Mnemonic(Feature): def __init__(self, value: str, description=None): - super(Mnemonic, self).__init__(value, description=description) + super().__init__(value, description=description) # max number of operands to consider for a given instrucion. @@ -92,7 +92,7 @@ class _Operand(Feature, abc.ABC): # superclass: don't use directly # subclasses should set self.name and provide the value string formatter def __init__(self, index: int, value: int, description=None): - super(_Operand, self).__init__(value, description=description) + super().__init__(value, description=description) self.index = index def __hash__(self): @@ -108,7 +108,7 @@ class OperandNumber(_Operand): # operand[i].number: 0x12 def __init__(self, index: int, value: int, description=None): - super(OperandNumber, self).__init__(index, value, description=description) + super().__init__(index, value, description=description) self.name = self.NAMES[index] def get_value_str(self) -> str: @@ -122,7 +122,7 @@ class OperandOffset(_Operand): # operand[i].offset: 0x12 def __init__(self, index: int, value: int, description=None): - super(OperandOffset, self).__init__(index, value, description=description) + super().__init__(index, value, description=description) self.name = self.NAMES[index] def get_value_str(self) -> str: diff --git a/capa/ida/helpers.py b/capa/ida/helpers.py index 27b22a99..eb3151d9 100644 --- a/capa/ida/helpers.py +++ b/capa/ida/helpers.py @@ -170,7 +170,7 @@ class IDAIO: """ def __init__(self): - super(IDAIO, self).__init__() + super().__init__() self.offset = 0 def seek(self, offset, whence=0): diff --git a/capa/ida/plugin/__init__.py b/capa/ida/plugin/__init__.py index 6100f3db..4ffc09af 100644 --- a/capa/ida/plugin/__init__.py +++ b/capa/ida/plugin/__init__.py @@ -93,7 +93,7 @@ class OnUpdatedActionsHook(ida_kernwin.UI_Hooks): """register a callback to be invoked each time the UI actions are updated""" def __init__(self, cb): - super(OnUpdatedActionsHook, self).__init__() + super().__init__() self.cb = cb def updated_actions(self): diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 41d6ed94..e3c588bc 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -155,7 +155,7 @@ class CapaExplorerProgressIndicator(QtCore.QObject): def __init__(self): """initialize signal object""" - super(CapaExplorerProgressIndicator, self).__init__() + super().__init__() def update(self, text): """emit progress update @@ -174,18 +174,18 @@ class CapaExplorerFeatureExtractor(capa.features.extractors.ida.extractor.IdaFea """ def __init__(self): - super(CapaExplorerFeatureExtractor, self).__init__() + super().__init__() self.indicator = CapaExplorerProgressIndicator() def extract_function_features(self, fh: FunctionHandle): self.indicator.update("function at 0x%X" % fh.inner.start_ea) - return super(CapaExplorerFeatureExtractor, self).extract_function_features(fh) + return super().extract_function_features(fh) class QLineEditClicked(QtWidgets.QLineEdit): def __init__(self, content, parent=None): """ """ - super(QLineEditClicked, self).__init__(content, parent) + super().__init__(content, parent) def mouseReleaseEvent(self, e): """ """ @@ -204,7 +204,7 @@ class QLineEditClicked(QtWidgets.QLineEdit): class CapaSettingsInputDialog(QtWidgets.QDialog): def __init__(self, title, parent=None): """ """ - super(CapaSettingsInputDialog, self).__init__(parent) + super().__init__(parent) self.setWindowTitle(title) self.setMinimumWidth(500) @@ -241,7 +241,7 @@ class CapaExplorerForm(idaapi.PluginForm): def __init__(self, name, option=Options.DEFAULT): """initialize form elements""" - super(CapaExplorerForm, self).__init__() + super().__init__() self.form_title = name self.process_total = 0 @@ -305,7 +305,7 @@ class CapaExplorerForm(idaapi.PluginForm): def Show(self): """creates form if not already create, else brings plugin to front""" - return super(CapaExplorerForm, self).Show( + return super().Show( self.form_title, options=( idaapi.PluginForm.WOPN_TAB diff --git a/capa/ida/plugin/hooks.py b/capa/ida/plugin/hooks.py index 9043f989..23d87821 100644 --- a/capa/ida/plugin/hooks.py +++ b/capa/ida/plugin/hooks.py @@ -16,7 +16,7 @@ class CapaExplorerIdaHooks(idaapi.UI_Hooks): @param screen_ea_changed_hook: function hook for IDA screen ea changed @param action_hooks: dict of IDA action handles """ - super(CapaExplorerIdaHooks, self).__init__() + super().__init__() self.screen_ea_changed_hook = screen_ea_changed_hook self.process_action_hooks = action_hooks diff --git a/capa/ida/plugin/item.py b/capa/ida/plugin/item.py index 5cbc4b24..159333a4 100644 --- a/capa/ida/plugin/item.py +++ b/capa/ida/plugin/item.py @@ -181,7 +181,7 @@ class CapaExplorerRuleItem(CapaExplorerDataItem): @param source: rule source (tooltip) """ display = self.fmt % (name, count) if count > 1 else name - super(CapaExplorerRuleItem, self).__init__(parent, [display, "", namespace], can_check) + super().__init__(parent, [display, "", namespace], can_check) self._source = source @property @@ -200,7 +200,7 @@ class CapaExplorerRuleMatchItem(CapaExplorerDataItem): @param display: text to display in UI @param source: rule match source to display (tooltip) """ - super(CapaExplorerRuleMatchItem, self).__init__(parent, [display, "", ""]) + super().__init__(parent, [display, "", ""]) self._source = source @property @@ -222,14 +222,12 @@ class CapaExplorerFunctionItem(CapaExplorerDataItem): """ assert isinstance(location, AbsoluteVirtualAddress) ea = int(location) - super(CapaExplorerFunctionItem, self).__init__( - parent, [self.fmt % idaapi.get_name(ea), ea_to_hex(ea), ""], can_check - ) + super().__init__(parent, [self.fmt % idaapi.get_name(ea), ea_to_hex(ea), ""], can_check) @property def info(self): """return function name""" - info = super(CapaExplorerFunctionItem, self).info + info = super().info display = info_to_name(info) return display if display else info @@ -255,7 +253,7 @@ class CapaExplorerSubscopeItem(CapaExplorerDataItem): @param parent: parent node @param scope: subscope name """ - super(CapaExplorerSubscopeItem, self).__init__(parent, [self.fmt % scope, "", ""]) + super().__init__(parent, [self.fmt % scope, "", ""]) class CapaExplorerBlockItem(CapaExplorerDataItem): @@ -271,7 +269,7 @@ class CapaExplorerBlockItem(CapaExplorerDataItem): """ assert isinstance(location, AbsoluteVirtualAddress) ea = int(location) - super(CapaExplorerBlockItem, self).__init__(parent, [self.fmt % ea, ea_to_hex(ea), ""]) + super().__init__(parent, [self.fmt % ea, ea_to_hex(ea), ""]) class CapaExplorerInstructionItem(CapaExplorerBlockItem): @@ -298,9 +296,7 @@ class CapaExplorerDefaultItem(CapaExplorerDataItem): assert isinstance(location, AbsoluteVirtualAddress) ea = int(location) - super(CapaExplorerDefaultItem, self).__init__( - parent, [display, ea_to_hex(ea) if ea is not None else "", details] - ) + super().__init__(parent, [display, ea_to_hex(ea) if ea is not None else "", details]) class CapaExplorerFeatureItem(CapaExplorerDataItem): @@ -319,9 +315,9 @@ class CapaExplorerFeatureItem(CapaExplorerDataItem): if location: assert isinstance(location, (AbsoluteVirtualAddress, FileOffsetAddress)) ea = int(location) - super(CapaExplorerFeatureItem, self).__init__(parent, [display, ea_to_hex(ea), details]) + super().__init__(parent, [display, ea_to_hex(ea), details]) else: - super(CapaExplorerFeatureItem, self).__init__(parent, [display, "", details]) + super().__init__(parent, [display, "", details]) class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem): @@ -339,7 +335,7 @@ class CapaExplorerInstructionViewItem(CapaExplorerFeatureItem): assert isinstance(location, AbsoluteVirtualAddress) ea = int(location) details = capa.ida.helpers.get_disasm_line(ea) - super(CapaExplorerInstructionViewItem, self).__init__(parent, display, location=location, details=details) + super().__init__(parent, display, location=location, details=details) self.ida_highlight = idc.get_color(ea, idc.CIC_ITEM) @@ -365,7 +361,7 @@ class CapaExplorerByteViewItem(CapaExplorerFeatureItem): byte_snap = codecs.encode(byte_snap, "hex").upper() details = " ".join([byte_snap[i : i + 2].decode() for i in range(0, len(byte_snap), 2)]) - super(CapaExplorerByteViewItem, self).__init__(parent, display, location=location, details=details) + super().__init__(parent, display, location=location, details=details) self.ida_highlight = idc.get_color(ea, idc.CIC_ITEM) @@ -382,5 +378,5 @@ class CapaExplorerStringViewItem(CapaExplorerFeatureItem): assert isinstance(location, (AbsoluteVirtualAddress, FileOffsetAddress)) ea = int(location) - super(CapaExplorerStringViewItem, self).__init__(parent, display, location=location, details=value) + super().__init__(parent, display, location=location, details=value) self.ida_highlight = idc.get_color(ea, idc.CIC_ITEM) diff --git a/capa/ida/plugin/model.py b/capa/ida/plugin/model.py index 05ac83fb..71b51ca2 100644 --- a/capa/ida/plugin/model.py +++ b/capa/ida/plugin/model.py @@ -51,7 +51,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel): def __init__(self, parent=None): """initialize model""" - super(CapaExplorerDataModel, self).__init__(parent) + super().__init__(parent) # root node does not have parent, contains header columns self.root_node = CapaExplorerDataItem(None, ["Rule Information", "Address", "Details"]) diff --git a/capa/ida/plugin/proxy.py b/capa/ida/plugin/proxy.py index f61a0eb0..ae490d87 100644 --- a/capa/ida/plugin/proxy.py +++ b/capa/ida/plugin/proxy.py @@ -22,7 +22,7 @@ class CapaExplorerRangeProxyModel(QtCore.QSortFilterProxyModel): def __init__(self, parent=None): """initialize proxy filter""" - super(CapaExplorerRangeProxyModel, self).__init__(parent) + super().__init__(parent) self.min_ea = None self.max_ea = None @@ -145,7 +145,7 @@ class CapaExplorerSearchProxyModel(QtCore.QSortFilterProxyModel): def __init__(self, parent=None): """ """ - super(CapaExplorerSearchProxyModel, self).__init__(parent) + super().__init__(parent) self.query = "" self.setFilterKeyColumn(-1) # all columns diff --git a/capa/ida/plugin/view.py b/capa/ida/plugin/view.py index ef656736..9fadaff9 100644 --- a/capa/ida/plugin/view.py +++ b/capa/ida/plugin/view.py @@ -179,7 +179,7 @@ class CapaExplorerRulegenPreview(QtWidgets.QTextEdit): def __init__(self, parent=None): """ """ - super(CapaExplorerRulegenPreview, self).__init__(parent) + super().__init__(parent) self.setFont(QtGui.QFont("Courier", weight=QtGui.QFont.Bold)) self.setLineWrapMode(QtWidgets.QTextEdit.NoWrap) @@ -284,7 +284,7 @@ class CapaExplorerRulegenPreview(QtWidgets.QTextEdit): self.set_selection(select_start_ppos, select_end_ppos, len(self.toPlainText())) self.verticalScrollBar().setSliderPosition(scroll_ppos) else: - super(CapaExplorerRulegenPreview, self).keyPressEvent(e) + super().keyPressEvent(e) def count_previous_lines_from_block(self, block): """calculate number of lines preceding block""" @@ -310,7 +310,7 @@ class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget): def __init__(self, preview, parent=None): """ """ - super(CapaExplorerRulegenEditor, self).__init__(parent) + super().__init__(parent) self.preview = preview @@ -374,18 +374,18 @@ class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget): def dragMoveEvent(self, e): """ """ - super(CapaExplorerRulegenEditor, self).dragMoveEvent(e) + super().dragMoveEvent(e) def dragEventEnter(self, e): """ """ - super(CapaExplorerRulegenEditor, self).dragEventEnter(e) + super().dragEventEnter(e) def dropEvent(self, e): """ """ if not self.indexAt(e.pos()).isValid(): return - super(CapaExplorerRulegenEditor, self).dropEvent(e) + super().dropEvent(e) self.update_preview() expand_tree(self.invisibleRootItem()) @@ -784,7 +784,7 @@ class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget): class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget): def __init__(self, editor, parent=None): """ """ - super(CapaExplorerRulegenFeatures, self).__init__(parent) + super().__init__(parent) self.parent_items = {} self.editor = editor @@ -1072,7 +1072,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView): def __init__(self, model, parent=None): """initialize view""" - super(CapaExplorerQtreeView, self).__init__(parent) + super().__init__(parent) self.setModel(model) diff --git a/capa/rules.py b/capa/rules.py index 5da1f312..8db3d277 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -158,7 +158,7 @@ SUPPORTED_FEATURES[FUNCTION_SCOPE].update(SUPPORTED_FEATURES[BASIC_BLOCK_SCOPE]) class InvalidRule(ValueError): def __init__(self, msg): - super(InvalidRule, self).__init__() + super().__init__() self.msg = msg def __str__(self): @@ -170,7 +170,7 @@ class InvalidRule(ValueError): class InvalidRuleWithPath(InvalidRule): def __init__(self, path, msg): - super(InvalidRuleWithPath, self).__init__(msg) + super().__init__(msg) self.path = path self.msg = msg self.__cause__ = None @@ -181,7 +181,7 @@ class InvalidRuleWithPath(InvalidRule): class InvalidRuleSet(ValueError): def __init__(self, msg): - super(InvalidRuleSet, self).__init__() + super().__init__() self.msg = msg def __str__(self): @@ -539,14 +539,15 @@ def build_statements(d, scope: str): index = key[len("operand[") : -len("].number")] try: index = int(index) - except ValueError: - raise InvalidRule("operand index must be an integer") + except ValueError as e: + raise InvalidRule("operand index must be an integer") from e value, description = parse_description(d[key], key, d.get("description")) + assert isinstance(value, int) try: feature = capa.features.insn.OperandNumber(index, value, description=description) except ValueError as e: - raise InvalidRule(str(e)) + raise InvalidRule(str(e)) from e ensure_feature_valid_for_scope(scope, feature) return feature @@ -554,14 +555,15 @@ def build_statements(d, scope: str): index = key[len("operand[") : -len("].offset")] try: index = int(index) - except ValueError: - raise InvalidRule("operand index must be an integer") + except ValueError as e: + raise InvalidRule("operand index must be an integer") from e value, description = parse_description(d[key], key, d.get("description")) + assert isinstance(value, int) try: feature = capa.features.insn.OperandOffset(index, value, description=description) except ValueError as e: - raise InvalidRule(str(e)) + raise InvalidRule(str(e)) from e ensure_feature_valid_for_scope(scope, feature) return feature @@ -581,7 +583,7 @@ def build_statements(d, scope: str): try: feature = capa.features.insn.Property(value, access=access, description=description) except ValueError as e: - raise InvalidRule(str(e)) + raise InvalidRule(str(e)) from e ensure_feature_valid_for_scope(scope, feature) return feature @@ -591,7 +593,7 @@ def build_statements(d, scope: str): try: feature = Feature(value, description=description) except ValueError as e: - raise InvalidRule(str(e)) + raise InvalidRule(str(e)) from e ensure_feature_valid_for_scope(scope, feature) return feature @@ -606,7 +608,7 @@ def second(s: List[Any]) -> Any: class Rule: def __init__(self, name: str, scope: str, statement: Statement, meta, definition=""): - super(Rule, self).__init__() + super().__init__() self.name = name self.scope = scope self.statement = statement @@ -1067,7 +1069,7 @@ class RuleSet: """ def __init__(self, rules: List[Rule]): - super(RuleSet, self).__init__() + super().__init__() ensure_rules_are_unique(rules) diff --git a/scripts/lint.py b/scripts/lint.py index 467ad3cf..b3593f80 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -248,7 +248,7 @@ class InvalidAttckOrMbcTechnique(Lint): """ def __init__(self): - super(InvalidAttckOrMbcTechnique, self).__init__() + super().__init__() try: with open(f"{os.path.dirname(__file__)}/linter-data.json", "rb") as fd: diff --git a/scripts/setup-linter-dependencies.py b/scripts/setup-linter-dependencies.py index 326a684c..021c0e31 100644 --- a/scripts/setup-linter-dependencies.py +++ b/scripts/setup-linter-dependencies.py @@ -151,7 +151,7 @@ class MbcExtractor(MitreExtractor): def _get_tactics(self) -> List[Dict]: """Override _get_tactics to edit the tactic name for Micro-objective""" - tactics = super(MbcExtractor, self)._get_tactics() + tactics = super()._get_tactics() # We don't want the Micro-objective string inside objective names for tactic in tactics: tactic["name"] = tactic["name"].replace(" Micro-objective", "") From 78172b5f5bcebc068e5acb50c7c13656ae9ad62b Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 6 Dec 2022 16:06:08 +0000 Subject: [PATCH 03/74] rules: pylint --- capa/rules.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/capa/rules.py b/capa/rules.py index 8db3d277..8287eba0 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -233,23 +233,23 @@ def parse_range(s: str): min_spec = min_spec.strip() max_spec = max_spec.strip() - min = None + min_ = None if min_spec: - min = parse_int(min_spec) - if min < 0: + min_ = parse_int(min_spec) + if min_ < 0: raise InvalidRule("range min less than zero") - max = None + max_ = None if max_spec: - max = parse_int(max_spec) - if max < 0: + max_ = parse_int(max_spec) + if max_ < 0: raise InvalidRule("range max less than zero") - if min is not None and max is not None: - if max < min: + if min_ is not None and max_ is not None: + if max_ < min_: raise InvalidRule("range max less than min") - return min, max + return min_, max_ def parse_feature(key: str): @@ -828,9 +828,9 @@ class Rule: _ = RuleMetadata.from_capa(rule) return rule except InvalidRule as e: - raise InvalidRuleWithPath(path, str(e)) + raise InvalidRuleWithPath(path, str(e)) from e except pydantic.ValidationError as e: - raise InvalidRuleWithPath(path, str(e)) + raise InvalidRuleWithPath(path, str(e)) from e def to_yaml(self): # reformat the yaml document with a common style. @@ -1319,13 +1319,13 @@ class RuleSet: for k, v in rule.meta.items(): if isinstance(v, str) and tag in v: logger.debug('using rule "%s" and dependencies, found tag in meta.%s: %s', rule.name, k, v) - rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name))) + rules_filtered.update(set(get_rules_and_dependencies(rules, rule.name))) break if isinstance(v, list): for vv in v: if tag in vv: logger.debug('using rule "%s" and dependencies, found tag in meta.%s: %s', rule.name, k, vv) - rules_filtered.update(set(capa.rules.get_rules_and_dependencies(rules, rule.name))) + rules_filtered.update(set(get_rules_and_dependencies(rules, rule.name))) break return RuleSet(list(rules_filtered)) From a10abfebde0a86c2f7324ce9a2668a27c1ebec37 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 6 Dec 2022 16:23:10 +0000 Subject: [PATCH 04/74] main: pylint --- capa/main.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/capa/main.py b/capa/main.py index e9a2b728..1157a474 100644 --- a/capa/main.py +++ b/capa/main.py @@ -332,7 +332,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon logger.warning("-" * 80) for line in file_limitation_rule.meta.get("description", "").split("\n"): - logger.warning(" " + line) + logger.warning(" %s", line) logger.warning(" Identified via rule: %s", file_limitation_rule.name) if is_standalone: logger.warning(" ") @@ -433,7 +433,7 @@ def get_default_signatures() -> List[str]: logger.debug("signatures path: %s", sigs_path) ret = [] - for root, dirs, files in os.walk(sigs_path): + for root, _, files in os.walk(sigs_path): for file in files: if not (file.endswith(".pat") or file.endswith(".pat.gz") or file.endswith(".sig")): continue @@ -588,7 +588,7 @@ def get_rules(rule_paths: List[str], disable_progress=False) -> List[Rule]: rule_file_paths.append(rule_path) elif os.path.isdir(rule_path): logger.debug("reading rules from directory %s", rule_path) - for root, dirs, files in os.walk(rule_path): + for root, _, files in os.walk(rule_path): if ".git" in root: # the .github directory contains CI config in capa-rules # this includes some .yml files @@ -639,7 +639,7 @@ def get_signatures(sigs_path): paths.append(sigs_path) elif os.path.isdir(sigs_path): logger.debug("reading signatures from directory %s", os.path.abspath(os.path.normpath(sigs_path))) - for root, dirs, files in os.walk(sigs_path): + for root, _, files in os.walk(sigs_path): for file in files: if file.endswith((".pat", ".pat.gz", ".sig")): sig_path = os.path.join(root, file) @@ -730,7 +730,7 @@ def compute_layout(rules, extractor, capabilities): for rule_name, matches in capabilities.items(): rule = rules[rule_name] if rule.meta.get("scope") == capa.rules.BASIC_BLOCK_SCOPE: - for (addr, match) in matches: + for (addr, _) in matches: assert addr in functions_by_bb matched_bbs.add(addr) @@ -1024,7 +1024,7 @@ def main(argv=None): # during the load of the RuleSet, we extract subscope statements into their own rules # that are subsequently `match`ed upon. this inflates the total rule count. # so, filter out the subscope rules when reporting total number of loaded rules. - len([i for i in filter(lambda r: not r.is_subscope_rule(), rules.rules.values())]), + len(list(filter(lambda r: not r.is_subscope_rule(), rules.rules.values()))), ) if args.tag: rules = rules.filter_rules_by_meta(args.tag) @@ -1144,7 +1144,9 @@ def main(argv=None): def ida_main(): + import capa.rules import capa.ida.helpers + import capa.render.default import capa.features.extractors.ida.extractor logging.basicConfig(level=logging.INFO) From 473d0daf582c8648bd42ddc73ae5e15be192fe5c Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 11:41:05 +0000 Subject: [PATCH 05/74] render: pylint --- capa/render/result_document.py | 47 +++++++++++++++++----------------- capa/render/vverbose.py | 2 +- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 6ae18ead..71bdb6bd 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -15,6 +15,7 @@ import capa.engine import capa.features.common import capa.features.freeze as frz import capa.features.address +import capa.features.freeze.features as frzf from capa.rules import RuleSet from capa.engine import MatchResults from capa.helpers import assert_never @@ -99,27 +100,27 @@ class Metadata(FrozenModel): rules=meta["analysis"]["rules"], base_address=frz.Address.from_capa(meta["analysis"]["base_address"]), layout=Layout( - functions=[ + functions=tuple( FunctionLayout( address=frz.Address.from_capa(address), - matched_basic_blocks=[ + matched_basic_blocks=tuple( BasicBlockLayout(address=frz.Address.from_capa(bb)) for bb in f["matched_basic_blocks"] - ], + ), ) for address, f in meta["analysis"]["layout"]["functions"].items() - ] + ) ), feature_counts=FeatureCounts( file=meta["analysis"]["feature_counts"]["file"], - functions=[ + functions=tuple( FunctionFeatureCount(address=frz.Address.from_capa(address), count=count) for address, count in meta["analysis"]["feature_counts"]["functions"].items() - ], + ), ), - library_functions=[ + library_functions=tuple( LibraryFunction(address=frz.Address.from_capa(address), name=name) for address, name in meta["analysis"]["library_functions"].items() - ], + ), ), ) @@ -137,18 +138,18 @@ class StatementModel(FrozenModel): class CompoundStatement(StatementModel): type: str - description: Optional[str] + description: Optional[str] = None class SomeStatement(StatementModel): type = "some" - description: Optional[str] + description: Optional[str] = None count: int class RangeStatement(StatementModel): type = "range" - description: Optional[str] + description: Optional[str] = None min: int max: int child: frz.Feature @@ -156,7 +157,7 @@ class RangeStatement(StatementModel): class SubscopeStatement(StatementModel): type = "subscope" - description: Optional[str] + description: Optional[str] = None scope: capa.rules.Scope @@ -277,7 +278,7 @@ class Match(BaseModel): # finally, splice that logic into this tree. if ( isinstance(node, FeatureNode) - and isinstance(node.feature, frz.features.MatchFeature) + and isinstance(node.feature, frzf.MatchFeature) # only add subtree on success, # because there won't be results for the other rule on failure. and success @@ -359,14 +360,14 @@ class Match(BaseModel): def parse_parts_id(s: str): - id = "" + id_ = "" parts = s.split("::") if len(parts) > 0: last = parts.pop() - last, _, id = last.rpartition(" ") - id = id.lstrip("[").rstrip("]") + last, _, id_ = last.rpartition(" ") + id_ = id_.lstrip("[").rstrip("]") parts.append(last) - return parts, id + return tuple(parts), id_ class AttackSpec(FrozenModel): @@ -392,7 +393,7 @@ class AttackSpec(FrozenModel): tactic = "" technique = "" subtechnique = "" - parts, id = parse_parts_id(s) + parts, id_ = parse_parts_id(s) if len(parts) > 0: tactic = parts[0] if len(parts) > 1: @@ -405,7 +406,7 @@ class AttackSpec(FrozenModel): tactic=tactic, technique=technique, subtechnique=subtechnique, - id=id, + id=id_, ) @@ -432,7 +433,7 @@ class MBCSpec(FrozenModel): objective = "" behavior = "" method = "" - parts, id = parse_parts_id(s) + parts, id_ = parse_parts_id(s) if len(parts) > 0: objective = parts[0] if len(parts) > 1: @@ -445,7 +446,7 @@ class MBCSpec(FrozenModel): objective=objective, behavior=behavior, method=method, - id=id, + id=id_, ) @@ -532,10 +533,10 @@ class ResultDocument(BaseModel): rule_matches[rule_name] = RuleMatches( meta=RuleMetadata.from_capa(rule), source=rule.definition, - matches=[ + matches=tuple( (frz.Address.from_capa(addr), Match.from_capa(rules, capabilities, match)) for addr, match in matches - ], + ), ) return ResultDocument(meta=Metadata.from_capa(meta), rules=rule_matches) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 6411da22..c59646db 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List, Iterable +from typing import Dict, Iterable import tabulate From d1aafa37642aee322387c1b3a466d9f9e8a410fb Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 11:52:41 +0000 Subject: [PATCH 06/74] vverbose: render offset closes #1215 --- CHANGELOG.md | 1 + capa/render/vverbose.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 130e34cb..88257b4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ - decouple Token dependency / extractor and features #1139 @mr-tz - update pydantic model to guarantee type coercion #1176 @mike-hunhoff - do not overwrite version in version.py during PyInstaller build #1169 @mr-tz +- render: fix vverbose rendering of offsets #1215 @williballenthin ### capa explorer IDA Pro plugin - fix: display instruction items #1154 @mr-tz diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index c59646db..6a794954 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -146,7 +146,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0): if key == "string": value = render_string_value(value) - if key == "number": + if key in ("number", "offset"): assert isinstance(value, int) value = hex(value) From 1f091a4ccde8d3d32f2bd28054f7ef41fbcd195b Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 12:58:10 +0000 Subject: [PATCH 07/74] tests: add tests demonstrating vverbose feature rendering --- tests/test_render.py | 58 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/test_render.py b/tests/test_render.py index fff14a95..9277b9f2 100644 --- a/tests/test_render.py +++ b/tests/test_render.py @@ -1,10 +1,18 @@ import textwrap +import fixtures + import capa.rules import capa.render.utils +import capa.features.file import capa.features.insn import capa.features.common +import capa.features.freeze +import capa.render.vverbose +import capa.features.address +import capa.features.basicblock import capa.render.result_document +import capa.features.freeze.features def test_render_number(): @@ -92,3 +100,53 @@ def test_render_meta_mbc(): assert mbc.method == method assert capa.render.utils.format_parts_id(mbc) == canonical + + +@fixtures.parametrize( + "feature,expected", + [ + (capa.features.common.OS("windows"), "os: windows"), + (capa.features.common.Arch("i386"), "arch: i386"), + (capa.features.common.Format("pe"), "format: pe"), + (capa.features.common.MatchedRule("foo"), "match: foo @ 0x401000"), + (capa.features.common.Characteristic("foo"), "characteristic: foo @ 0x401000"), + (capa.features.file.Export("SvcMain"), "export: SvcMain @ 0x401000"), + (capa.features.file.Import("CreateFileW"), "import: CreateFileW @ 0x401000"), + (capa.features.file.Section(".detours"), "section: .detours @ 0x401000"), + (capa.features.file.FunctionName("memcmp"), "function name: memcmp @ 0x401000"), + (capa.features.common.Substring("foo"), "substring: foo"), + (capa.features.common.Regex("^foo"), "regex: ^foo"), + (capa.features.common.String("foo"), 'string: "foo" @ 0x401000'), + (capa.features.common.Class("BeanFactory"), "class: BeanFactory @ 0x401000"), + (capa.features.common.Namespace("std::enterprise"), "namespace: std::enterprise @ 0x401000"), + (capa.features.insn.API("CreateFileW"), "api: CreateFileW @ 0x401000"), + (capa.features.insn.Property("foo"), "property: foo @ 0x401000"), + (capa.features.insn.Property("foo", "read"), "property/read: foo @ 0x401000"), + (capa.features.insn.Property("foo", "write"), "property/write: foo @ 0x401000"), + (capa.features.insn.Number(12), "number: 0xC @ 0x401000"), + (capa.features.common.Bytes(b"AAAA"), "bytes: 41414141 @ 0x401000"), + (capa.features.insn.Offset(12), "offset: 0xC @ 0x401000"), + (capa.features.insn.Mnemonic("call"), "mnemonic: call @ 0x401000"), + (capa.features.insn.OperandNumber(0, 12), "operand[0].number: 0xC @ 0x401000"), + (capa.features.insn.OperandOffset(0, 12), "operand[0].offset: 0xC @ 0x401000"), + # unsupported + # (capa.features.basicblock.BasicBlock(), "basic block @ 0x401000"), + ], +) +def test_render_vverbose_feature(feature, expected): + ostream = capa.render.utils.StringIO() + + addr = capa.features.freeze.Address.from_capa(capa.features.address.AbsoluteVirtualAddress(0x401000)) + feature = capa.features.freeze.features.feature_from_capa(feature) + + matches = capa.render.result_document.Match( + success=True, + node=capa.render.result_document.FeatureNode(feature=feature), + children=(), + locations=(addr,), + captures={}, + ) + + capa.render.vverbose.render_feature(ostream, matches, feature, indent=0) + + assert ostream.getvalue().strip() == expected From 0ebba2cd15faf76b59128a25b61280ace45b862d Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 12:58:55 +0000 Subject: [PATCH 08/74] vverbose: guard against rendering basic blocks --- capa/render/vverbose.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 6a794954..376bac7b 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -128,7 +128,11 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0): ostream.write(" " * indent) key = feature.type - if isinstance(feature, frzf.ImportFeature): + if isinstance(feature, frzf.BasicBlockFeature): + # i don't think it makes sense to have standalone basic block features. + # we don't parse them from rules, only things like: `count(basic block) > 1` + raise ValueError("cannot render basic block feature directly") + elif isinstance(feature, frzf.ImportFeature): # fixup access to Python reserved name value = feature.import_ elif isinstance(feature, frzf.ClassFeature): From 659cbedc3c6bb83cae9d234dbf226417aeaeec1c Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 12:59:21 +0000 Subject: [PATCH 09/74] vverbose: dont show offset for format --- capa/render/vverbose.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 376bac7b..e753a12b 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -169,7 +169,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0): ostream.write(capa.rules.DESCRIPTION_SEPARATOR) ostream.write(feature.description) - if key not in ("os", "arch"): + if key not in ("os", "arch", "format"): render_locations(ostream, match.locations) ostream.write("\n") else: From 63e0d9b3f33c4a2b1431f5dc6e318330020f563d Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 12:59:37 +0000 Subject: [PATCH 10/74] vverbose: render offer and operand number/offset features closes #1215 --- capa/render/vverbose.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index e753a12b..b14ac9da 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -150,17 +150,21 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0): if key == "string": value = render_string_value(value) - if key in ("number", "offset"): + elif key in ("number", "offset", "operand number", "operand offset"): assert isinstance(value, int) - value = hex(value) + value = f"0x{value:X}" - ostream.write(key) - - if isinstance(feature, frzf.PropertyFeature): + if key == "property": if feature.access is not None: - ostream.write("/" + feature.access) + key = f"property/{feature.access}" - ostream.write(": ") + elif key == "operand number": + key = f"operand[{feature.index}].number" + + elif key == "operand offset": + key = f"operand[{feature.index}].offset" + + ostream.write(f"{key}: ") if value: ostream.write(rutils.bold2(value)) From c195afa0b34fe77f8ce3dfb2bfd2236df0f1e71d Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 13:07:24 +0000 Subject: [PATCH 11/74] explorer: improve rendering of operand number/offsets --- capa/ida/plugin/model.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/capa/ida/plugin/model.py b/capa/ida/plugin/model.py index 71b51ca2..d0a6a857 100644 --- a/capa/ida/plugin/model.py +++ b/capa/ida/plugin/model.py @@ -530,6 +530,14 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel): if value: if isinstance(feature, frzf.StringFeature): value = '"%s"' % capa.features.common.escape_string(value) + + if isinstance(feature, frzf.PropertyFeature) and feature.access is not None: + key = f"property/{feature.access}" + elif isinstance(feature, frzf.OperandNumberFeature): + key = f"operand[{feature.index}].number" + elif isinstance(feature, frzf.OperandOffsetFeature): + key = f"operand[{feature.index}].offset" + if feature.description: return "%s(%s = %s)" % (key, value, feature.description) else: From 1d8a3486cd97bef6cf8ea75a2440ff521ff12f81 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 13:14:05 +0000 Subject: [PATCH 12/74] vverbose: prefer isinstance checks over strings which also makes mypy happier --- capa/render/vverbose.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index b14ac9da..bd992495 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -144,24 +144,25 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0): if value is None: raise ValueError("%s contains None" % key) - if key not in ("regex", "substring"): + if not isinstance(feature, (frzf.RegexFeature, frzf.SubstringFeature)): # like: # number: 10 = SOME_CONSTANT @ 0x401000 - if key == "string": + if isinstance(feature, frzf.StringFeature): value = render_string_value(value) - elif key in ("number", "offset", "operand number", "operand offset"): + elif isinstance( + feature, (frzf.NumberFeature, frzf.OffsetFeature, frzf.OperandNumberFeature, frzf.OperandOffsetFeature) + ): assert isinstance(value, int) value = f"0x{value:X}" - if key == "property": - if feature.access is not None: - key = f"property/{feature.access}" + if isinstance(feature, frzf.PropertyFeature) and feature.access is not None: + key = f"property/{feature.access}" - elif key == "operand number": + elif isinstance(feature, frzf.OperandNumberFeature): key = f"operand[{feature.index}].number" - elif key == "operand offset": + elif isinstance(feature, frzf.OperandOffsetFeature): key = f"operand[{feature.index}].offset" ostream.write(f"{key}: ") @@ -173,7 +174,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0): ostream.write(capa.rules.DESCRIPTION_SEPARATOR) ostream.write(feature.description) - if key not in ("os", "arch", "format"): + if not isinstance(feature, (frzf.OSFeature, frzf.ArchFeature, frzf.FormatFeature)): render_locations(ostream, match.locations) ostream.write("\n") else: From 662ec1103149e6b3fe7e2d0c38b0cd7e752d3f86 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 13:38:50 +0000 Subject: [PATCH 13/74] explorer: accept only plaintext to rule window closes #1194 --- capa/ida/plugin/view.py | 1 + 1 file changed, 1 insertion(+) diff --git a/capa/ida/plugin/view.py b/capa/ida/plugin/view.py index 9fadaff9..86505fb1 100644 --- a/capa/ida/plugin/view.py +++ b/capa/ida/plugin/view.py @@ -184,6 +184,7 @@ class CapaExplorerRulegenPreview(QtWidgets.QTextEdit): self.setFont(QtGui.QFont("Courier", weight=QtGui.QFont.Bold)) self.setLineWrapMode(QtWidgets.QTextEdit.NoWrap) self.setHorizontalScrollBarPolicy(QtCore.Qt.ScrollBarAsNeeded) + self.setAcceptRichText(False) def reset_view(self): """ """ From 45d007fa9a3f88d41fa6bf08485222208746483c Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 13:39:55 +0000 Subject: [PATCH 14/74] explorer: fix UnboundLocalError closes #1217 --- capa/ida/plugin/form.py | 1 + 1 file changed, 1 insertion(+) diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index e3c588bc..9d101429 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -916,6 +916,7 @@ class CapaExplorerForm(idaapi.PluginForm): logger.error("Failed to match function/basic block rule scope (error: %s)", e) return False else: + fh = None func_features = {} except UserCancelledError: logger.info("User cancelled analysis.") From fe2f668306390df723f4554bb472c5a08c559576 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 13:41:10 +0000 Subject: [PATCH 15/74] CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 130e34cb..11ff5737 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,8 @@ ### capa explorer IDA Pro plugin - fix: display instruction items #1154 @mr-tz +- fix: accept only plaintext pasted content #1194 @williballenthin +- fix: UnboundLocalError #1217 @williballenthin ### Development From a6fdb71178764170e80a56519b7e8626c392bdb5 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 7 Dec 2022 14:09:37 +0000 Subject: [PATCH 16/74] utils: use a single hex() implementation --- capa/features/insn.py | 3 ++- capa/helpers.py | 10 ++++++---- capa/render/utils.py | 8 -------- capa/render/verbose.py | 14 ++++++-------- capa/render/vverbose.py | 3 ++- 5 files changed, 16 insertions(+), 22 deletions(-) diff --git a/capa/features/insn.py b/capa/features/insn.py index c21178b7..e5c1a49e 100644 --- a/capa/features/insn.py +++ b/capa/features/insn.py @@ -8,6 +8,7 @@ import abc from typing import Union, Optional +import capa.helpers from capa.features.common import VALID_FEATURE_ACCESS, Feature @@ -56,7 +57,7 @@ class Number(Feature): def get_value_str(self): if isinstance(self.value, int): - return hex(self.value) + return capa.helpers.hex(self.value) elif isinstance(self.value, float): return str(self.value) else: diff --git a/capa/helpers.py b/capa/helpers.py index 9c4c285e..2e44fc6c 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -18,11 +18,13 @@ EXTENSIONS_ELF = "elf_" logger = logging.getLogger("capa") -_hex = hex - -def hex(i): - return _hex(int(i)) +def hex(n: int) -> str: + """render the given number using upper case hex, like: 0x123ABC""" + if n < 0: + return "-0x%X" % (-n) + else: + return "0x%X" % n def get_file_taste(sample_path: str) -> bytes: diff --git a/capa/render/utils.py b/capa/render/utils.py index 97185a66..2cf480c9 100644 --- a/capa/render/utils.py +++ b/capa/render/utils.py @@ -24,14 +24,6 @@ def bold2(s: str) -> str: return termcolor.colored(s, "green") -def hex(n: int) -> str: - """render the given number using upper case hex, like: 0x123ABC""" - if n < 0: - return "-0x%X" % (-n) - else: - return "0x%X" % n - - def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]): """ format canonical representation of ATT&CK/MBC parts and ID diff --git a/capa/render/verbose.py b/capa/render/verbose.py index 6bdeefda..5a225460 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -23,13 +23,11 @@ Unless required by applicable law or agreed to in writing, software distributed See the License for the specific language governing permissions and limitations under the License. """ import tabulate -import dnfile.mdtable -import dncil.clr.token import capa.rules +import capa.helpers import capa.render.utils as rutils import capa.features.freeze as frz -import capa.render.result_document import capa.render.result_document as rd from capa.rules import RuleSet from capa.engine import MatchResults @@ -37,16 +35,16 @@ from capa.engine import MatchResults def format_address(address: frz.Address) -> str: if address.type == frz.AddressType.ABSOLUTE: - return rutils.hex(address.value) + return capa.helpers.hex(address.value) elif address.type == frz.AddressType.RELATIVE: - return f"base address+{rutils.hex(address.value)}" + return f"base address+{capa.helpers.hex(address.value)}" elif address.type == frz.AddressType.FILE: - return f"file+{rutils.hex(address.value)}" + return f"file+{capa.helpers.hex(address.value)}" elif address.type == frz.AddressType.DN_TOKEN: - return f"token({rutils.hex(address.value)})" + return f"token({capa.helpers.hex(address.value)})" elif address.type == frz.AddressType.DN_TOKEN_OFFSET: token, offset = address.value - return f"token({rutils.hex(token)})+{rutils.hex(offset)}" + return f"token({capa.helpers.hex(token)})+{capa.helpers.hex(offset)}" elif address.type == frz.AddressType.NO_ADDRESS: return "global" else: diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index bd992495..5950275a 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -11,6 +11,7 @@ from typing import Dict, Iterable import tabulate import capa.rules +import capa.helpers import capa.render.utils as rutils import capa.render.verbose import capa.features.common @@ -154,7 +155,7 @@ def render_feature(ostream, match: rd.Match, feature: frzf.Feature, indent=0): feature, (frzf.NumberFeature, frzf.OffsetFeature, frzf.OperandNumberFeature, frzf.OperandOffsetFeature) ): assert isinstance(value, int) - value = f"0x{value:X}" + value = capa.helpers.hex(value) if isinstance(feature, frzf.PropertyFeature) and feature.access is not None: key = f"property/{feature.access}" From 97f633312fe6ce038d4176d320cedb18ac09d2d1 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 7 Dec 2022 16:44:52 +0100 Subject: [PATCH 17/74] skip smda tests until we remove the backend --- tests/test_smda_features.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_smda_features.py b/tests/test_smda_features.py index 6614c24d..91897c59 100644 --- a/tests/test_smda_features.py +++ b/tests/test_smda_features.py @@ -13,6 +13,7 @@ from fixtures import parametrize import capa.features.file +@pytest.mark.skip(reason="SMDA tests fail and we're deprecating this backend in the next major release") @parametrize( "sample,scope,feature,expected", fixtures.FEATURE_PRESENCE_TESTS, @@ -25,6 +26,7 @@ def test_smda_features(sample, scope, feature, expected): fixtures.do_test_feature_presence(fixtures.get_smda_extractor, sample, scope, feature, expected) +@pytest.mark.skip(reason="SMDA tests fail and we're deprecating this backend in the next major release") @parametrize( "sample,scope,feature,expected", fixtures.FEATURE_COUNT_TESTS, From bd84ee83a59420637153943be9780359ada6a802 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 7 Dec 2022 19:10:53 +0000 Subject: [PATCH 18/74] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8f3736a..f2bf3693 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ ### Breaking Changes -### New Rules (30) +### New Rules (31) - collection/use-dotnet-library-sharpclipboard @johnk3r - data-manipulation/encryption/aes/use-dotnet-library-encryptdecryptutils @johnk3r @@ -41,6 +41,7 @@ - nursery/execute-wmi-query-in-dotnet michael.hunhoff@mandiant.com - nursery/manipulate-network-credentials-in-dotnet michael.hunhoff@mandiant.com - nursery/encrypt-data-using-aes william.ballenthin@mandiant.com Ivan Kwiatkowski (@JusticeRage) +- host-interaction/uac/bypass/bypass-uac-via-rpc david.cannings@pwc.com david@edeca.net - ### Bug Fixes diff --git a/README.md b/README.md index f00c9411..5d05c821 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-731-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-732-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 793837a4..5ba70c97 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 793837a406e27dd2754202fd13d7dc6ba8397679 +Subproject commit 5ba70c97d22dd59efcf29a128557e64213f7ace8 From c5a9aa21bff03583723840800f3bbaa560db2d48 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Thu, 8 Dec 2022 21:33:57 +0100 Subject: [PATCH 19/74] wip: elf: better detect linux ELF files --- capa/features/extractors/elf.py | 249 ++++++++++++++++++++++++++++++-- 1 file changed, 235 insertions(+), 14 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 9f4f9b34..3f516f27 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -7,6 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import struct import logging +import collections from enum import Enum from typing import BinaryIO @@ -21,6 +22,12 @@ def align(v, alignment): return v + (alignment - remainder) +def read_cstr(buf, offset): + s = buf[offset:] + s, _, _ = s.partition(b"\x00") + return s.decode("utf-8") + + class CorruptElfFile(ValueError): pass @@ -141,6 +148,112 @@ def detect_elf_os(f) -> str: # subsequent strategies may overwrite this value ret = OSABI[ei_osabi] + (e_machine,) = struct.unpack_from(endian + "H", file_header, 0x12) + MACHINE = { + 0: "None", + 1: "M32", + 2: "SPARC", + 3: "386", + 4: "68K", + 5: "88K", + 6: "486", + 7: "860", + 8: "MIPS", + 9: "S370", + 10: "MIPS_RS3_LE", + 11: "RS6000", + 12: "UNKNOWN12", + 13: "UNKNOWN13", + 14: "UNKNOWN14", + 15: "PA_RISC", + 16: "nCUBE", + 17: "VPP500", + 18: "SPARC32PLUS", + 19: "960", + 20: "PPC", + 21: "PPC64", + 22: "S390", + 23: "SPU", + 24: "UNKNOWN24", + 25: "UNKNOWN25", + 26: "UNKNOWN26", + 27: "UNKNOWN27", + 28: "UNKNOWN28", + 29: "UNKNOWN29", + 30: "UNKNOWN30", + 31: "UNKNOWN31", + 32: "UNKNOWN32", + 33: "UNKNOWN33", + 34: "UNKNOWN34", + 35: "UNKNOWN35", + 36: "V800", + 37: "FR20", + 38: "RH32", + 39: "RCE", + 40: "ARM", + 41: "ALPHA", + 42: "SH", + 43: "SPARCV9", + 44: "TRICORE", + 45: "ARC", + 46: "H8_300", + 47: "H8_300H", + 48: "H8S", + 49: "H8_500", + 50: "IA_64", + 51: "MIPS_X", + 52: "COLDFIRE", + 53: "68HC12", + 54: "MMA", + 55: "PCP", + 56: "NCPU", + 57: "NDR1", + 58: "STARCORE", + 59: "ME16", + 60: "ST100", + 61: "TINYJ", + 62: "X86_64", + 63: "PDSP", + 64: "PDP10", + 65: "PDP11", + 66: "FX66", + 67: "ST9PLUS", + 68: "ST7", + 69: "68HC16", + 70: "68HC11", + 71: "68HC08", + 72: "68HC05", + 73: "SVX", + 74: "ST19", + 75: "VAX", + 76: "CRIS", + 77: "JAVELIN", + 78: "FIREPATH", + 79: "ZSP", + 80: "MMIX", + 81: "HUANY", + 82: "PRISM", + 83: "AVR", + 84: "FR30", + 85: "D10V", + 86: "D30V", + 87: "V850", + 88: "M32R", + 89: "MN10300", + 90: "MN10200", + 91: "PJ", + 92: "OPENRISC", + 93: "ARC_A5", + 94: "XTENSA", + 95: "VIDEOCORE", + 96: "TMM_GPP", + 97: "NS32K", + 98: "TPC", + 99: "SNP1K", + 100: "ST200", + } + logger.debug("emachine: 0x%02x (%s)", e_machine, MACHINE.get(e_machine, "unknown")) + f.seek(e_phoff) program_header_size = e_phnum * e_phentsize program_headers = f.read(program_header_size) @@ -171,18 +284,18 @@ def detect_elf_os(f) -> str: logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz) f.seek(p_offset) - note = f.read(p_filesz) - if len(note) != p_filesz: + version_r = f.read(p_filesz) + if len(version_r) != p_filesz: logger.warning("failed to read note content") continue - namesz, descsz, type_ = struct.unpack_from(endian + "III", note, 0x0) + namesz, descsz, type_ = struct.unpack_from(endian + "III", version_r, 0x0) name_offset = 0xC desc_offset = name_offset + align(namesz, 0x4) logger.debug("ph:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_) - name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") + name = version_r[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") logger.debug("name: %s", name) if type_ != 1: @@ -192,7 +305,7 @@ def detect_elf_os(f) -> str: if descsz < 16: continue - desc = note[desc_offset : desc_offset + descsz] + desc = version_r[desc_offset : desc_offset + descsz] abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0) logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag) @@ -213,6 +326,7 @@ def detect_elf_os(f) -> str: # search for recognizable dynamic linkers (interpreters) # for example, on linux, we see file paths like: /lib64/ld-linux-x86-64.so.2 + linker = None for i in range(e_phnum): offset = i * e_phentsize phent = program_headers[offset : offset + e_phentsize] @@ -257,9 +371,9 @@ def detect_elf_os(f) -> str: shent = section_headers[offset : offset + e_shentsize] if bitness == 32: - sh_name, sh_type, _, sh_addr, sh_offset, sh_size = struct.unpack_from(endian + "IIIIII", shent, 0x0) + sh_name, sh_type, _, sh_addr, linked_sh_offset, linked_sh_size = struct.unpack_from(endian + "IIIIII", shent, 0x0) elif bitness == 64: - sh_name, sh_type, _, sh_addr, sh_offset, sh_size = struct.unpack_from(endian + "IIQQQQ", shent, 0x0) + sh_name, sh_type, _, sh_addr, linked_sh_offset, linked_sh_size = struct.unpack_from(endian + "IIQQQQ", shent, 0x0) else: raise NotImplementedError() @@ -267,21 +381,21 @@ def detect_elf_os(f) -> str: if sh_type != SHT_NOTE: continue - logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size) + logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", linked_sh_offset, linked_sh_size) - f.seek(sh_offset) - note = f.read(sh_size) - if len(note) != sh_size: + f.seek(linked_sh_offset) + version_r = f.read(linked_sh_size) + if len(version_r) != linked_sh_size: logger.warning("failed to read note content") continue - namesz, descsz, type_ = struct.unpack_from(endian + "III", note, 0x0) + namesz, descsz, type_ = struct.unpack_from(endian + "III", version_r, 0x0) name_offset = 0xC desc_offset = name_offset + align(namesz, 0x4) logger.debug("sh:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_) - name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") + name = version_r[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") logger.debug("name: %s", name) if name == "Linux": @@ -300,7 +414,7 @@ def detect_elf_os(f) -> str: if descsz < 16: continue - desc = note[desc_offset : desc_offset + descsz] + desc = version_r[desc_offset : desc_offset + descsz] abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0) logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag) @@ -310,6 +424,113 @@ def detect_elf_os(f) -> str: ret = GNU_ABI_TAG[abi_tag] if not ret else ret logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch) + if not ret: + # if we don't have any guesses yet, + # then lets look for GLIBC symbol versioning requirements. + # this will let us guess about linux/hurd in some cases. + # + # symbol version requirements are stored in the .gnu.version_r section, + # which has type SHT_GNU_verneed (0x6ffffffe). + # + # this contains a linked list of ElfXX_Verneed structs, + # each referencing a linked list of ElfXX_Vernaux structs. + # strings are stored in the section referenced by the sh_link field of the section header. + # each Verneed struct contains a reference to the name of the library, + # each Vernaux struct contains a reference to the name of a symbol. + for i in range(e_shnum): + offset = i * e_shentsize + shent = section_headers[offset : offset + e_shentsize] + + if bitness == 32: + sh_name, sh_type, _, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(endian + "IIIIIII", shent, 0x0) + elif bitness == 64: + sh_name, sh_type, _, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(endian + "IIQQQQI", shent, 0x0) + else: + raise NotImplementedError() + + SHT_GNU_VERNEED = 0x6ffffffe + if sh_type != SHT_GNU_VERNEED: + continue + + logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size) + + # read the section containing the verneed structures + f.seek(sh_offset) + version_r = f.read(sh_size) + if len(version_r) != sh_size: + logger.warning("failed to read .gnu.version_r content") + continue + + # read the linked section content + # which contains strings referenced by the verneed structures + linked_shent_offset = sh_link * e_shentsize + linked_shent = section_headers[linked_shent_offset : linked_shent_offset + e_shentsize] + + if bitness == 32: + _, _, _, _, linked_sh_offset, linked_sh_size = struct.unpack_from(endian + "IIIIII", linked_shent, 0x0) + elif bitness == 64: + _, _, _, _, linked_sh_offset, linked_sh_size = struct.unpack_from(endian + "IIQQQQ", linked_shent, 0x0) + else: + raise NotImplementedError() + + f.seek(linked_sh_offset) + linked_sh = f.read(linked_sh_size) + if len(linked_sh) != linked_sh_size: + logger.warning("failed to read linked content") + continue + + so_abis = collections.defaultdict(set) + + # read verneed structures from the start of the section + # until the vn_next link is 0x0. + # each entry describes a shared object that is required by this binary. + vn_offset = 0x0 + while True: + # ElfXX_Verneed layout is the same on 32 and 64 bit + vn_version, vn_cnt, vn_file, vn_aux, vn_next = struct.unpack_from(endian + "HHIII", version_r, vn_offset) + if vn_version != 1: + # unexpected format, don't try to keep parsing + break + + # shared object names, like: "libdl.so.2" + so_name = read_cstr(linked_sh, vn_file) + + # read vernaux structures linked from the verneed structure. + # there should be vn_cnt of these. + # each entry describes an ABI name required by the shared object. + vna_offset = vn_offset + vn_aux + for i in range(vn_cnt): + # ElfXX_Vernaux layout is the same on 32 and 64 bit + _, _, _, vna_name, vna_next = struct.unpack_from(endian + "IHHII", version_r, vna_offset) + + # ABI names, like: "GLIBC_2.2.5" + abi = read_cstr(linked_sh, vna_name) + so_abis[so_name].add(abi) + + vna_offset += vna_next + + vn_offset += vn_next + if vn_next == 0: + break + + has_glibc_verneed = False + for so_name, abis in so_abis.items(): + for abi in abis: + if abi.startswith("GLIBC"): + has_glibc_verneed = True + + if has_glibc_verneed: + if MACHINE.get(e_machine) != "386": + ret = OS.LINUX + + # TODO: check dynamic sections for libmachuser and libhurduser + + if linker and "ld-linux" in linker: + ret = OS.LINUX + + if linker and "/ld.so" in linker: + ret = OS.HURD + return ret.value if ret is not None else "unknown" From 958d5bcc6a50ad3d92da63518ab635ce511aa231 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 9 Dec 2022 12:56:09 +0100 Subject: [PATCH 20/74] elf: refactor OS detection --- capa/features/extractors/elf.py | 716 ++++++++++++++++++-------------- 1 file changed, 405 insertions(+), 311 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 3f516f27..b3b86135 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -7,9 +7,11 @@ # See the License for the specific language governing permissions and limitations under the License. import struct import logging +import itertools import collections from enum import Enum -from typing import BinaryIO +from dataclasses import dataclass +from typing import BinaryIO, Optional, Dict, Set logger = logging.getLogger(__name__) @@ -67,52 +69,94 @@ GNU_ABI_TAG = { } -def detect_elf_os(f) -> str: - """ - f: type Union[BinaryIO, IDAIO] - """ - f.seek(0x0) - file_header = f.read(0x40) +@dataclass +class Phdr: + type: int + offset: int + vaddr: int + paddr: int + filesz: int + buf: bytes - # we'll set this to the detected OS - # prefer the first heuristics, - # but rather than short circuiting, - # we'll still parse out the remainder, for debugging. - ret = None - if not file_header.startswith(b"\x7fELF"): - raise CorruptElfFile("missing magic header") +@dataclass +class Shdr: + name: int + type: int + flags: int + addr: int + offset: int + size: int + link: int + buf: bytes - ei_class, ei_data = struct.unpack_from("BB", file_header, 4) - logger.debug("ei_class: 0x%02x ei_data: 0x%02x", ei_class, ei_data) - if ei_class == 1: - bitness = 32 - elif ei_class == 2: - bitness = 64 - else: - raise CorruptElfFile("invalid ei_class: 0x%02x" % ei_class) - if ei_data == 1: - endian = "<" - elif ei_data == 2: - endian = ">" - else: - raise CorruptElfFile("not an ELF file: invalid ei_data: 0x%02x" % ei_data) +class ELF: + def __init__(self, f): + self.f = f - if bitness == 32: - (e_phoff, e_shoff) = struct.unpack_from(endian + "II", file_header, 0x1C) - e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x2A) - e_shentsize, e_shnum = struct.unpack_from(endian + "HH", file_header, 0x2E) - elif bitness == 64: - (e_phoff, e_shoff) = struct.unpack_from(endian + "QQ", file_header, 0x20) - e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x36) - e_shentsize, e_shnum = struct.unpack_from(endian + "HH", file_header, 0x3A) - else: - raise NotImplementedError() + self.bitness: int = None + self.endian: str = None + self.e_phentsize: int = None + self.e_phnum: int = None + self.e_shentsize: int = None + self.e_shnum: int = None + self.phbuf = None + self.shbuf = None - logger.debug("e_phoff: 0x%02x e_phentsize: 0x%02x e_phnum: %d", e_phoff, e_phentsize, e_phnum) + self._parse() + + def _parse(self): + + self.f.seek(0x0) + self.file_header = self.f.read(0x40) + + if not self.file_header.startswith(b"\x7fELF"): + raise CorruptElfFile("missing magic header") + + ei_class, ei_data = struct.unpack_from("BB", self.file_header, 4) + logger.debug("ei_class: 0x%02x ei_data: 0x%02x", ei_class, ei_data) + if ei_class == 1: + self.bitness = 32 + elif ei_class == 2: + self.bitness = 64 + else: + raise CorruptElfFile("invalid ei_class: 0x%02x" % ei_class) + + if ei_data == 1: + self.endian = "<" + elif ei_data == 2: + self.endian = ">" + else: + raise CorruptElfFile("not an ELF file: invalid ei_data: 0x%02x" % ei_data) + + if self.bitness == 32: + e_phoff, e_shoff = struct.unpack_from(self.endian + "II", self.file_header, 0x1C) + self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x2A) + self.e_shentsize, self.e_shnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x2E) + elif self.bitness == 64: + e_phoff, e_shoff = struct.unpack_from(self.endian + "QQ", self.file_header, 0x20) + self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x36) + self.e_shentsize, self.e_shnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x3A) + else: + raise NotImplementedError() + + logger.debug("e_phoff: 0x%02x e_phentsize: 0x%02x e_phnum: %d", e_phoff, self.e_phentsize, self.e_phnum) + + self.f.seek(e_phoff) + program_header_size = self.e_phnum * self.e_phentsize + self.phbuf = self.f.read(program_header_size) + if len(self.phbuf) != program_header_size: + logger.warning("failed to read program headers") + self.e_phnum = 0 + + self.f.seek(e_shoff) + section_header_size = self.e_shnum * self.e_shentsize + self.shbuf = self.f.read(section_header_size) + if len(self.shbuf) != section_header_size: + logger.warning("failed to read section headers") + self.e_shnum = 0 - (ei_osabi,) = struct.unpack_from(endian + "B", file_header, 7) OSABI = { # via pyelftools: https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/elftools/elf/enums.py#L35-L58 # some candidates are commented out because the are not useful values, @@ -140,17 +184,14 @@ def detect_elf_os(f) -> str: # 97: "ARM", # not an OS # 255: "STANDALONE", # not an OS } - logger.debug("ei_osabi: 0x%02x (%s)", ei_osabi, OSABI.get(ei_osabi, "unknown")) - # os_osabi == 0 is commonly set even when the OS is not SYSV. - # other values are unused or unknown. - if ei_osabi in OSABI and ei_osabi != 0x0: - # subsequent strategies may overwrite this value - ret = OSABI[ei_osabi] + @property + def ei_osabi(self) -> Optional[OS]: + (ei_osabi,) = struct.unpack_from(self.endian + "B", self.file_header, 7) + return ELF.OSABI.get(ei_osabi) - (e_machine,) = struct.unpack_from(endian + "H", file_header, 0x12) MACHINE = { - 0: "None", + # via https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html 1: "M32", 2: "SPARC", 3: "386", @@ -162,9 +203,6 @@ def detect_elf_os(f) -> str: 9: "S370", 10: "MIPS_RS3_LE", 11: "RS6000", - 12: "UNKNOWN12", - 13: "UNKNOWN13", - 14: "UNKNOWN14", 15: "PA_RISC", 16: "nCUBE", 17: "VPP500", @@ -174,18 +212,6 @@ def detect_elf_os(f) -> str: 21: "PPC64", 22: "S390", 23: "SPU", - 24: "UNKNOWN24", - 25: "UNKNOWN25", - 26: "UNKNOWN26", - 27: "UNKNOWN27", - 28: "UNKNOWN28", - 29: "UNKNOWN29", - 30: "UNKNOWN30", - 31: "UNKNOWN31", - 32: "UNKNOWN32", - 33: "UNKNOWN33", - 34: "UNKNOWN34", - 35: "UNKNOWN35", 36: "V800", 37: "FR20", 38: "RH32", @@ -252,183 +278,82 @@ def detect_elf_os(f) -> str: 99: "SNP1K", 100: "ST200", } - logger.debug("emachine: 0x%02x (%s)", e_machine, MACHINE.get(e_machine, "unknown")) - - f.seek(e_phoff) - program_header_size = e_phnum * e_phentsize - program_headers = f.read(program_header_size) - if len(program_headers) != program_header_size: - logger.warning("failed to read program headers") - e_phnum = 0 - # search for PT_NOTE sections that specify an OS - # for example, on Linux there is a GNU section with minimum kernel version - for i in range(e_phnum): - offset = i * e_phentsize - phent = program_headers[offset : offset + e_phentsize] + @property + def e_machine(self) -> Optional[str]: + (e_machine,) = struct.unpack_from(self.endian + "H", self.file_header, 0x12) + return ELF.MACHINE.get(e_machine) - PT_NOTE = 0x4 + def parse_program_header(self, i) -> Phdr: + phent_offset = i * self.e_phentsize + phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize] - (p_type,) = struct.unpack_from(endian + "I", phent, 0x0) + (p_type,) = struct.unpack_from(self.endian + "I", phent, 0x0) logger.debug("ph:p_type: 0x%04x", p_type) - if p_type != PT_NOTE: - continue - if bitness == 32: - p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4) - elif bitness == 64: - p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8) + if self.bitness == 32: + p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "IIII", phent, 0x4) + elif self.bitness == 64: + p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "QQQQ", phent, 0x8) else: raise NotImplementedError() logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz) - f.seek(p_offset) - version_r = f.read(p_filesz) - if len(version_r) != p_filesz: - logger.warning("failed to read note content") - continue + self.f.seek(p_offset) + buf = self.f.read(p_filesz) + if len(buf) != p_filesz: + raise ValueError("failed to read program header content") - namesz, descsz, type_ = struct.unpack_from(endian + "III", version_r, 0x0) - name_offset = 0xC - desc_offset = name_offset + align(namesz, 0x4) + return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf) - logger.debug("ph:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_) - - name = version_r[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") - logger.debug("name: %s", name) - - if type_ != 1: - continue - - if name == "GNU": - if descsz < 16: + @property + def program_headers(self): + for i in range(self.e_phnum): + try: + yield self.parse_program_header(i) + except ValueError: continue - desc = version_r[desc_offset : desc_offset + descsz] - abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0) - logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag) + def parse_section_header(self, i) -> Shdr: + shent_offset = i * self.e_shentsize + shent = self.shbuf[shent_offset : shent_offset + self.e_shentsize] - if abi_tag in GNU_ABI_TAG: - # update only if not set - # so we can get the debugging output of subsequent strategies - ret = GNU_ABI_TAG[abi_tag] if not ret else ret - logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch) - elif name == "OpenBSD": - logger.debug("note owner: %s", "OPENBSD") - ret = OS.OPENBSD if not ret else ret - elif name == "NetBSD": - logger.debug("note owner: %s", "NETBSD") - ret = OS.NETBSD if not ret else ret - elif name == "FreeBSD": - logger.debug("note owner: %s", "FREEBSD") - ret = OS.FREEBSD if not ret else ret + if self.bitness == 32: + sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(self.endian + "IIIIIII", shent, 0x0) + elif self.bitness == 64: + sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(self.endian + "IIQQQQI", shent, 0x0) + else: + raise NotImplementedError() - # search for recognizable dynamic linkers (interpreters) - # for example, on linux, we see file paths like: /lib64/ld-linux-x86-64.so.2 - linker = None - for i in range(e_phnum): - offset = i * e_phentsize - phent = program_headers[offset : offset + e_phentsize] + logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size) + self.f.seek(sh_offset) + buf = self.f.read(sh_size) + if len(buf) != sh_size: + raise ValueError("failed to read section header content") + + return Shdr(sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, buf) + + @property + def section_headers(self): + for i in range(self.e_shnum): + try: + yield self.parse_section_header(i) + except ValueError: + continue + + @property + def linker(self): PT_INTERP = 0x3 - - (p_type,) = struct.unpack_from(endian + "I", phent, 0x0) - if p_type != PT_INTERP: - continue - - if bitness == 32: - p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4) - elif bitness == 64: - p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8) - else: - raise NotImplementedError() - - f.seek(p_offset) - interp = f.read(p_filesz) - if len(interp) != p_filesz: - logger.warning("failed to read interp content") - continue - - linker = interp.partition(b"\x00")[0].decode("ascii") - logger.debug("linker: %s", linker) - if "ld-linux" in linker: - # update only if not set - # so we can get the debugging output of subsequent strategies - ret = OS.LINUX if ret is None else ret - - f.seek(e_shoff) - section_header_size = e_shnum * e_shentsize - section_headers = f.read(section_header_size) - if len(section_headers) != section_header_size: - logger.warning("failed to read section headers") - e_shnum = 0 - - # search for notes stored in sections that aren't visible in program headers. - # e.g. .note.Linux in Linux kernel modules. - for i in range(e_shnum): - offset = i * e_shentsize - shent = section_headers[offset : offset + e_shentsize] - - if bitness == 32: - sh_name, sh_type, _, sh_addr, linked_sh_offset, linked_sh_size = struct.unpack_from(endian + "IIIIII", shent, 0x0) - elif bitness == 64: - sh_name, sh_type, _, sh_addr, linked_sh_offset, linked_sh_size = struct.unpack_from(endian + "IIQQQQ", shent, 0x0) - else: - raise NotImplementedError() - - SHT_NOTE = 0x7 - if sh_type != SHT_NOTE: - continue - - logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", linked_sh_offset, linked_sh_size) - - f.seek(linked_sh_offset) - version_r = f.read(linked_sh_size) - if len(version_r) != linked_sh_size: - logger.warning("failed to read note content") - continue - - namesz, descsz, type_ = struct.unpack_from(endian + "III", version_r, 0x0) - name_offset = 0xC - desc_offset = name_offset + align(namesz, 0x4) - - logger.debug("sh:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_) - - name = version_r[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") - logger.debug("name: %s", name) - - if name == "Linux": - logger.debug("note owner: %s", "LINUX") - ret = OS.LINUX if not ret else ret - elif name == "OpenBSD": - logger.debug("note owner: %s", "OPENBSD") - ret = OS.OPENBSD if not ret else ret - elif name == "NetBSD": - logger.debug("note owner: %s", "NETBSD") - ret = OS.NETBSD if not ret else ret - elif name == "FreeBSD": - logger.debug("note owner: %s", "FREEBSD") - ret = OS.FREEBSD if not ret else ret - elif name == "GNU": - if descsz < 16: + for phdr in self.program_headers: + if phdr.type != PT_INTERP: continue - desc = version_r[desc_offset : desc_offset + descsz] - abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0) - logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag) + return read_cstr(phdr.buf, 0) - if abi_tag in GNU_ABI_TAG: - # update only if not set - # so we can get the debugging output of subsequent strategies - ret = GNU_ABI_TAG[abi_tag] if not ret else ret - logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch) - - if not ret: - # if we don't have any guesses yet, - # then lets look for GLIBC symbol versioning requirements. - # this will let us guess about linux/hurd in some cases. - # + @property + def versions_needed(self) -> Dict[str, Set[str]]: # symbol version requirements are stored in the .gnu.version_r section, # which has type SHT_GNU_verneed (0x6ffffffe). # @@ -437,49 +362,15 @@ def detect_elf_os(f) -> str: # strings are stored in the section referenced by the sh_link field of the section header. # each Verneed struct contains a reference to the name of the library, # each Vernaux struct contains a reference to the name of a symbol. - for i in range(e_shnum): - offset = i * e_shentsize - shent = section_headers[offset : offset + e_shentsize] - - if bitness == 32: - sh_name, sh_type, _, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(endian + "IIIIIII", shent, 0x0) - elif bitness == 64: - sh_name, sh_type, _, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(endian + "IIQQQQI", shent, 0x0) - else: - raise NotImplementedError() - - SHT_GNU_VERNEED = 0x6ffffffe - if sh_type != SHT_GNU_VERNEED: + SHT_GNU_VERNEED = 0x6ffffffe + for shdr in self.section_headers: + if shdr.type != SHT_GNU_VERNEED: continue - logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size) + # the linked section contains strings referenced by the verneed structures. + linked_shdr = self.parse_section_header(shdr.link) - # read the section containing the verneed structures - f.seek(sh_offset) - version_r = f.read(sh_size) - if len(version_r) != sh_size: - logger.warning("failed to read .gnu.version_r content") - continue - - # read the linked section content - # which contains strings referenced by the verneed structures - linked_shent_offset = sh_link * e_shentsize - linked_shent = section_headers[linked_shent_offset : linked_shent_offset + e_shentsize] - - if bitness == 32: - _, _, _, _, linked_sh_offset, linked_sh_size = struct.unpack_from(endian + "IIIIII", linked_shent, 0x0) - elif bitness == 64: - _, _, _, _, linked_sh_offset, linked_sh_size = struct.unpack_from(endian + "IIQQQQ", linked_shent, 0x0) - else: - raise NotImplementedError() - - f.seek(linked_sh_offset) - linked_sh = f.read(linked_sh_size) - if len(linked_sh) != linked_sh_size: - logger.warning("failed to read linked content") - continue - - so_abis = collections.defaultdict(set) + versions_needed = collections.defaultdict(set) # read verneed structures from the start of the section # until the vn_next link is 0x0. @@ -487,13 +378,13 @@ def detect_elf_os(f) -> str: vn_offset = 0x0 while True: # ElfXX_Verneed layout is the same on 32 and 64 bit - vn_version, vn_cnt, vn_file, vn_aux, vn_next = struct.unpack_from(endian + "HHIII", version_r, vn_offset) + vn_version, vn_cnt, vn_file, vn_aux, vn_next = struct.unpack_from(self.endian + "HHIII", shdr.buf, vn_offset) if vn_version != 1: # unexpected format, don't try to keep parsing break # shared object names, like: "libdl.so.2" - so_name = read_cstr(linked_sh, vn_file) + so_name = read_cstr(linked_shdr.buf, vn_file) # read vernaux structures linked from the verneed structure. # there should be vn_cnt of these. @@ -501,11 +392,11 @@ def detect_elf_os(f) -> str: vna_offset = vn_offset + vn_aux for i in range(vn_cnt): # ElfXX_Vernaux layout is the same on 32 and 64 bit - _, _, _, vna_name, vna_next = struct.unpack_from(endian + "IHHII", version_r, vna_offset) + _, _, _, vna_name, vna_next = struct.unpack_from(self.endian + "IHHII", shdr.buf, vna_offset) # ABI names, like: "GLIBC_2.2.5" - abi = read_cstr(linked_sh, vna_name) - so_abis[so_name].add(abi) + abi = read_cstr(linked_shdr.buf, vna_name) + versions_needed[so_name].add(abi) vna_offset += vna_next @@ -513,59 +404,262 @@ def detect_elf_os(f) -> str: if vn_next == 0: break - has_glibc_verneed = False - for so_name, abis in so_abis.items(): - for abi in abis: - if abi.startswith("GLIBC"): - has_glibc_verneed = True + return dict(versions_needed) - if has_glibc_verneed: - if MACHINE.get(e_machine) != "386": - ret = OS.LINUX - # TODO: check dynamic sections for libmachuser and libhurduser +@dataclass +class ABITag: + os: OS + kmajor: int + kminor: int + kpatch: int - if linker and "ld-linux" in linker: - ret = OS.LINUX - if linker and "/ld.so" in linker: - ret = OS.HURD +class PHNote: + def __init__(self, endian, buf): + self.endian = endian + self.buf = buf + + self.type_: int = None + self.descsz: int = None + self.name: str = None + + self._parse() + + def _parse(self): + namesz, self.descsz, self.type_ = struct.unpack_from(self.endian + "III", self.buf, 0x0) + name_offset = 0xC + self.desc_offset = name_offset + align(namesz, 0x4) + + logger.debug("ph:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, self.descsz, self.type_) + + name = self.buf[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") + logger.debug("name: %s", name) + + @property + def abi_tag(self) -> Optional[ABITag]: + if self.type_ != 1: + # TODO: what is this constant name? + return None + + if self.name != "GNU": + return None + + if self.descsz < 16: + return None + + desc = self.buf[self.desc_offset : self.desc_offset + self.descsz] + abi_tag, kmajor, kminor, kpatch = struct.unpack_from(self.endian + "IIII", desc, 0x0) + logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag) + + os = GNU_ABI_TAG.get(abi_tag) + if not os: + return None + + logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", os, kmajor, kminor, kpatch) + + return ABITag(os, kmajor, kminor, kpatch) + + +class SHNote: + def __init__(self, endian, buf): + self.endian = endian + self.buf = buf + + self.type_: int = None + self.descsz: int = None + self.name: str = None + + self._parse() + + def _parse(self): + namesz, self.descsz, self.type_ = struct.unpack_from(self.endian + "III", self.buf, 0x0) + name_offset = 0xC + self.desc_offset = name_offset + align(namesz, 0x4) + + logger.debug("sh:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, self.descsz, self.type_) + + name_buf = self.buf[name_offset : name_offset + namesz] + self.name = read_cstr(name_buf, 0x0) + logger.debug("sh:name: %s", self.name) + + @property + def abi_tag(self) -> Optional[ABITag]: + if self.name != "GNU": + return None + + if self.descsz < 16: + return None + + desc = self.buf[self.desc_offset : self.desc_offset + self.descsz] + abi_tag, kmajor, kminor, kpatch = struct.unpack_from(self.endian + "IIII", desc, 0x0) + logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag) + + os = GNU_ABI_TAG.get(abi_tag) + if not os: + return None + + logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", os, kmajor, kminor, kpatch) + return ABITag(os, kmajor, kminor, kpatch) + + +def guess_os_from_osabi(elf) -> Optional[OS]: + return elf.ei_osabi + + +def guess_os_from_ph_notes(elf) -> Optional[OS]: + # search for PT_NOTE sections that specify an OS + # for example, on Linux there is a GNU section with minimum kernel version + PT_NOTE = 0x4 + for phdr in elf.program_headers: + if phdr.type != PT_NOTE: + continue + + note = PHNote(elf.endian, phdr.buf) + + if note.type_ != 1: + # TODO: what is this constant name? + continue + + if note.name == "Linux": + logger.debug("note owner: %s", "LINUX") + return OS.LINUX + elif note.name == "OpenBSD": + logger.debug("note owner: %s", "OPENBSD") + return OS.OPENBSD + elif note.name == "NetBSD": + logger.debug("note owner: %s", "NETBSD") + return OS.NETBSD + elif note.name == "FreeBSD": + logger.debug("note owner: %s", "FREEBSD") + return OS.FREEBSD + elif note.name == "GNU": + abi_tag = note.abi_tag + if abi_tag: + return abi_tag.os + else: + # cannot make a guess about the OS, but probably linux or hurd + pass + + return None + + +def guess_os_from_sh_notes(elf) -> Optional[OS]: + # search for notes stored in sections that aren't visible in program headers. + # e.g. .note.Linux in Linux kernel modules. + SHT_NOTE = 0x7 + for shdr in elf.section_headers: + if shdr.type != SHT_NOTE: + continue + + note = SHNote(elf.endian, shdr.buf) + + if note.name == "Linux": + logger.debug("note owner: %s", "LINUX") + return OS.LINUX + elif note.name == "OpenBSD": + logger.debug("note owner: %s", "OPENBSD") + return OS.OPENBSD + elif note.name == "NetBSD": + logger.debug("note owner: %s", "NETBSD") + return OS.NETBSD + elif note.name == "FreeBSD": + logger.debug("note owner: %s", "FREEBSD") + return OS.FREEBSD + elif note.name == "GNU": + abi_tag = note.abi_tag + if abi_tag: + ret = abi_tag.os if not ret else ret + else: + # cannot make a guess about the OS, but probably linux or hurd + pass + + return None + + +def guess_os_from_linker(elf) -> Optional[OS]: + # search for recognizable dynamic linkers (interpreters) + # for example, on linux, we see file paths like: /lib64/ld-linux-x86-64.so.2 + linker = elf.linker + if linker and "ld-linux" in elf.linker: + return OS.LINUX + + return None + + +def guess_os_from_abi_versions_needed(elf) -> Optional[OS]: + # then lets look for GLIBC symbol versioning requirements. + # this will let us guess about linux/hurd in some cases. + + versions_needed = elf.versions_needed + if any(map(lambda abi: abi.startswith("GLIBC"), itertools.chain(*versions_needed.values()))): + # there are any GLIBC versions needed + + if elf.e_machine != "386": + # GLIBC runs on Linux and Hurd. + # for Hurd, its *only* on i386. + # so if we're not on i386, then we're on Linux. + return OS.LINUX + + else: + # we're on i386, so we could be on either Linux or Hurd. + linker = elf.linker + + if linker and "ld-linux" in linker: + return OS.LINUX + + elif linker and "/ld.so" in linker: + return OS.HURD + + else: + # we don't have any good guesses based on versions needed + pass + + return None + + +def detect_elf_os(f) -> str: + """ + f: type Union[BinaryIO, IDAIO] + """ + elf = ELF(f) + + osabi_guess = guess_os_from_osabi(elf) + logger.info("guess: osabi: %s", osabi_guess) + + ph_notes_guess = guess_os_from_ph_notes(elf) + logger.info("guess: ph notes: %s", ph_notes_guess) + + sh_notes_guess = guess_os_from_sh_notes(elf) + logger.info("guess: sh notes: %s", sh_notes_guess) + + linker_guess = guess_os_from_linker(elf) + logger.info("guess: linker: %s", linker_guess) + + abi_versions_needed_guess = guess_os_from_abi_versions_needed(elf) + logger.info("guess: ABI versions needed: %s", abi_versions_needed_guess) + + ret = None + + if osabi_guess: + ret = osabi_guess + + elif ph_notes_guess: + ret = ph_notes_guess + + elif sh_notes_guess: + ret = sh_notes_guess + + elif linker_guess: + ret = linker_guess + + elif abi_versions_needed_guess: + ret = abi_versions_needed_guess + + # TODO: guess by dynamic sections return ret.value if ret is not None else "unknown" -class Arch(str, Enum): - I386 = "i386" - AMD64 = "amd64" - - def detect_elf_arch(f: BinaryIO) -> str: - f.seek(0x0) - file_header = f.read(0x40) - - if not file_header.startswith(b"\x7fELF"): - raise CorruptElfFile("missing magic header") - - (ei_data,) = struct.unpack_from("B", file_header, 5) - logger.debug("ei_data: 0x%02x", ei_data) - - if ei_data == 1: - endian = "<" - elif ei_data == 2: - endian = ">" - else: - raise CorruptElfFile("not an ELF file: invalid ei_data: 0x%02x" % ei_data) - - (ei_machine,) = struct.unpack_from(endian + "H", file_header, 0x12) - logger.debug("ei_machine: 0x%02x", ei_machine) - - EM_386 = 0x3 - EM_X86_64 = 0x3E - if ei_machine == EM_386: - return Arch.I386 - elif ei_machine == EM_X86_64: - return Arch.AMD64 - else: - # not really unknown, but unsupport at the moment: - # https://github.com/eliben/pyelftools/blob/ab444d982d1849191e910299a985989857466620/elftools/elf/enums.py#L73 - return "unknown" + return ELF(f).e_machine or "unknown" From 307a6fad4f4c89917436b2df430582874197e002 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 9 Dec 2022 14:31:03 +0100 Subject: [PATCH 21/74] elf: os: detect via so dependencies --- capa/features/extractors/elf.py | 108 +++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index b3b86135..af0133e1 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -11,7 +11,7 @@ import itertools import collections from enum import Enum from dataclasses import dataclass -from typing import BinaryIO, Optional, Dict, Set +from typing import BinaryIO, Optional, Dict, Set, Iterator, Tuple, List logger = logging.getLogger(__name__) @@ -406,6 +406,94 @@ class ELF: return dict(versions_needed) + @property + def dynamic_entries(self) -> Iterator[Tuple[int, int]]: + """ + read the entries from the dynamic section, + yielding the tag and value for each entry. + """ + DT_NULL = 0x0 + PT_DYNAMIC = 0x2 + for phdr in self.program_headers: + if phdr.type != PT_DYNAMIC: + continue + + offset = 0x0 + while True: + if self.bitness == 32: + d_tag, d_val = struct.unpack_from(self.endian + "II", phdr.buf, offset) + offset += 8 + elif self.bitness == 64: + d_tag, d_val = struct.unpack_from(self.endian + "QQ", phdr.buf, offset) + offset += 16 + else: + raise NotImplementedError() + + if d_tag == DT_NULL: + break + + yield d_tag, d_val + + @property + def strtab(self) -> Optional[bytes]: + """ + fetch the bytes of the string table + referenced by the dynamic section. + """ + DT_STRTAB = 0x5 + DT_STRSZ = 0xA + + strtab_addr = None + strtab_size = None + + for d_tag, d_val in self.dynamic_entries: + if d_tag == DT_STRTAB: + strtab_addr = d_val + + for d_tag, d_val in self.dynamic_entries: + if d_tag == DT_STRSZ: + strtab_size = d_val + + if strtab_addr is None: + return None + + if strtab_size is None: + return None + + strtab_offset = None + for shdr in self.section_headers: + if shdr.addr <= strtab_addr < shdr.addr + shdr.size: + strtab_offset = shdr.offset + (strtab_addr - shdr.addr) + + if strtab_offset is None: + return None + + self.f.seek(strtab_offset) + strtab_buf = self.f.read(strtab_size) + + if len(strtab_buf) != strtab_size: + return None + + return strtab_buf + + @property + def needed(self) -> Iterator[str]: + """ + read the names of DT_NEEDED entries from the dynamic section, + which correspond to dependencies on other shared objects, + like: `libpthread.so.0` + """ + DT_NEEDED = 0x1 + strtab = self.strtab + if not strtab: + return + + for d_tag, d_val in self.dynamic_entries: + if d_tag != DT_NEEDED: + continue + + yield read_cstr(strtab, d_val) + @dataclass class ABITag: @@ -569,7 +657,7 @@ def guess_os_from_sh_notes(elf) -> Optional[OS]: elif note.name == "GNU": abi_tag = note.abi_tag if abi_tag: - ret = abi_tag.os if not ret else ret + return abi_tag.os else: # cannot make a guess about the OS, but probably linux or hurd pass @@ -618,6 +706,16 @@ def guess_os_from_abi_versions_needed(elf) -> Optional[OS]: return None +def guess_os_from_needed_dependencies(elf) -> Optional[OS]: + for needed in elf.needed: + if needed.startswith("libmachuser.so"): + return OS.HURD + if needed.startswith("libhurduser.so"): + return OS.HURD + + return None + + def detect_elf_os(f) -> str: """ f: type Union[BinaryIO, IDAIO] @@ -639,6 +737,9 @@ def detect_elf_os(f) -> str: abi_versions_needed_guess = guess_os_from_abi_versions_needed(elf) logger.info("guess: ABI versions needed: %s", abi_versions_needed_guess) + needed_dependencies_guess = guess_os_from_needed_dependencies(elf) + logger.info("guess: needed dependencies: %s", needed_dependencies_guess) + ret = None if osabi_guess: @@ -656,7 +757,8 @@ def detect_elf_os(f) -> str: elif abi_versions_needed_guess: ret = abi_versions_needed_guess - # TODO: guess by dynamic sections + elif needed_dependencies_guess: + ret = needed_dependencies_guess return ret.value if ret is not None else "unknown" From 1583fedba2b993995ebeb1c3e3a6524d5f18f52c Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Fri, 9 Dec 2022 17:34:44 +0100 Subject: [PATCH 22/74] mypy --- capa/features/extractors/elf.py | 4 ++-- capa/features/extractors/elffile.py | 18 ++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index af0133e1..ad78c945 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -194,7 +194,7 @@ class ELF: # via https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html 1: "M32", 2: "SPARC", - 3: "386", + 3: "i386", 4: "68K", 5: "88K", 6: "486", @@ -238,7 +238,7 @@ class ELF: 59: "ME16", 60: "ST100", 61: "TINYJ", - 62: "X86_64", + 62: "amd64", 63: "PDSP", 64: "PDP10", 65: "PDP11", diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 4810bb5f..d4f61a06 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -7,7 +7,6 @@ # See the License for the specific language governing permissions and limitations under the License. import io import logging -import contextlib from typing import Tuple, Iterator from elftools.elf.elffile import ELFFile, SymbolTableSection @@ -16,7 +15,6 @@ import capa.features.extractors.common from capa.features.file import Import, Section from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress -from capa.features.extractors.elf import Arch as ElfArch from capa.features.extractors.base_extractor import FeatureExtractor logger = logging.getLogger(__name__) @@ -26,17 +24,17 @@ def extract_file_import_names(elf, **kwargs): # see https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/scripts/readelf.py#L372 symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections()) if isinstance(s, SymbolTableSection)] - for section_index, section in symbol_tables: + for _, section in symbol_tables: if not isinstance(section, SymbolTableSection): continue if section["sh_entsize"] == 0: - logger.debug("Symbol table '%s' has a sh_entsize of zero!" % (section.name)) + logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name) continue - logger.debug("Symbol table '%s' contains %s entries:" % (section.name, section.num_symbols())) + logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols()) - for nsym, symbol in enumerate(section.iter_symbols()): + for _, symbol in enumerate(section.iter_symbols()): if symbol.name and symbol.entry.st_info.type == "STT_FUNC": # TODO symbol address # TODO symbol version info? @@ -73,9 +71,9 @@ def extract_file_arch(elf, **kwargs): # TODO merge with capa.features.extractors.elf.detect_elf_arch() arch = elf.get_machine_arch() if arch == "x86": - yield Arch(ElfArch.I386), NO_ADDRESS + yield Arch("i386"), NO_ADDRESS elif arch == "x64": - yield Arch(ElfArch.AMD64), NO_ADDRESS + yield Arch("amd64"), NO_ADDRESS else: logger.warning("unsupported architecture: %s", arch) @@ -153,8 +151,8 @@ class ElfFeatureExtractor(FeatureExtractor): def extract_insn_features(self, f, bb, insn): raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features") - def is_library_function(self, va): + def is_library_function(self, addr): raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features") - def get_function_name(self, va): + def get_function_name(self, addr): raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features") From c958a6a286443b21cd301918577c345a839dd2ae Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Fri, 9 Dec 2022 16:07:46 +0100 Subject: [PATCH 23/74] elf: black --- capa/features/extractors/elf.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index ad78c945..6e7a1b75 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -10,8 +10,8 @@ import logging import itertools import collections from enum import Enum +from typing import Set, Dict, List, Tuple, BinaryIO, Iterator, Optional from dataclasses import dataclass -from typing import BinaryIO, Optional, Dict, Set, Iterator, Tuple, List logger = logging.getLogger(__name__) @@ -320,9 +320,13 @@ class ELF: shent = self.shbuf[shent_offset : shent_offset + self.e_shentsize] if self.bitness == 32: - sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(self.endian + "IIIIIII", shent, 0x0) + sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from( + self.endian + "IIIIIII", shent, 0x0 + ) elif self.bitness == 64: - sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(self.endian + "IIQQQQI", shent, 0x0) + sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from( + self.endian + "IIQQQQI", shent, 0x0 + ) else: raise NotImplementedError() @@ -362,7 +366,7 @@ class ELF: # strings are stored in the section referenced by the sh_link field of the section header. # each Verneed struct contains a reference to the name of the library, # each Vernaux struct contains a reference to the name of a symbol. - SHT_GNU_VERNEED = 0x6ffffffe + SHT_GNU_VERNEED = 0x6FFFFFFE for shdr in self.section_headers: if shdr.type != SHT_GNU_VERNEED: continue @@ -378,7 +382,9 @@ class ELF: vn_offset = 0x0 while True: # ElfXX_Verneed layout is the same on 32 and 64 bit - vn_version, vn_cnt, vn_file, vn_aux, vn_next = struct.unpack_from(self.endian + "HHIII", shdr.buf, vn_offset) + vn_version, vn_cnt, vn_file, vn_aux, vn_next = struct.unpack_from( + self.endian + "HHIII", shdr.buf, vn_offset + ) if vn_version != 1: # unexpected format, don't try to keep parsing break @@ -437,7 +443,7 @@ class ELF: @property def strtab(self) -> Optional[bytes]: """ - fetch the bytes of the string table + fetch the bytes of the string table referenced by the dynamic section. """ DT_STRTAB = 0x5 From 7ba08edffa402da1f373b4a0303e5c6d08722b39 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Fri, 9 Dec 2022 16:09:41 +0100 Subject: [PATCH 24/74] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2bf3693..1ba2cab5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ - update pydantic model to guarantee type coercion #1176 @mike-hunhoff - do not overwrite version in version.py during PyInstaller build #1169 @mr-tz - render: fix vverbose rendering of offsets #1215 @williballenthin +- elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin ### capa explorer IDA Pro plugin - fix: display instruction items #1154 @mr-tz From b26ed47ab8f386e8e16981725c0002672fd3411e Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 12 Dec 2022 11:40:32 +0100 Subject: [PATCH 25/74] tests: add OS detection tests --- tests/test_os_detection.py | 40 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tests/test_os_detection.py b/tests/test_os_detection.py index e2f850d7..25f30475 100644 --- a/tests/test_os_detection.py +++ b/tests/test_os_detection.py @@ -14,13 +14,49 @@ from fixtures import * import capa.features.extractors.elf -def test_elf_section_gnu_abi_tag(): +def test_elf_sh_notes(): + # guess: osabi: None + # guess: ph notes: None + # guess: sh notes: OS.LINUX + # guess: linker: None + # guess: ABI versions needed: None + # guess: needed dependencies: None path = get_data_path_by_name("2f7f5f") with open(path, "rb") as f: assert capa.features.extractors.elf.detect_elf_os(f) == "linux" -def test_elf_program_header_gnu_abi_tag(): +def test_elf_pt_notes(): + # guess: osabi: None + # guess: ph notes: None + # guess: sh notes: OS.LINUX + # guess: linker: OS.LINUX + # guess: ABI versions needed: OS.LINUX + # guess: needed dependencies: None path = get_data_path_by_name("7351f.elf") with open(path, "rb") as f: assert capa.features.extractors.elf.detect_elf_os(f) == "linux" + + +def test_elf_so_needed(): + # guess: osabi: None + # guess: ph notes: None + # guess: sh notes: OS.HURD + # guess: linker: None + # guess: ABI versions needed: OS.HURD + # guess: needed dependencies: OS.HURD + path = get_data_path_by_name("b5f052") + with open(path, "rb") as f: + assert capa.features.extractors.elf.detect_elf_os(f) == "hurd" + + +def test_elf_abi_version_hurd(): + # guess: osabi: None + # guess: ph notes: None + # guess: sh notes: OS.HURD + # guess: linker: None + # guess: ABI versions needed: OS.HURD + # guess: needed dependencies: None + path = get_data_path_by_name("bf7a9c") + with open(path, "rb") as f: + assert capa.features.extractors.elf.detect_elf_os(f) == "unknown" From 22bef146f83174954110a83ccbc778f560831ce2 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 12 Dec 2022 11:40:43 +0100 Subject: [PATCH 26/74] tests: add OS detection tests --- tests/fixtures.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index 8df1153f..1d0ba0fa 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -284,6 +284,10 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "0953cc3b77ed2974b09e3a00708f88de931d681e2d0cb64afbaf714610beabe6.exe_") elif name.startswith("_039a6"): return os.path.join(CD, "data", "039a6336d0802a2255669e6867a5679c7eb83313dbc61fb1c7232147379bd304.exe_") + elif name.startswith("b5f052"): + return os.path.join(CD, "data", "b5f0524e69b3a3cf636c7ac366ca57bf5e3a8fdc8a9f01caf196c611a7918a87.elf_") + elif name.startswith("bf7a9c"): + return os.path.join(CD, "data", "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_") else: raise ValueError("unexpected sample fixture: %s" % name) From d4a218e268b1b3a403cb6ea6f21858d8179d562a Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 12 Dec 2022 11:41:01 +0100 Subject: [PATCH 27/74] elf: os: bug fixes --- capa/features/extractors/elf.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 6e7a1b75..5ad13456 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -410,7 +410,9 @@ class ELF: if vn_next == 0: break - return dict(versions_needed) + return dict(versions_needed) + + return {} @property def dynamic_entries(self) -> Iterator[Tuple[int, int]]: @@ -533,7 +535,9 @@ class PHNote: @property def abi_tag(self) -> Optional[ABITag]: if self.type_ != 1: - # TODO: what is this constant name? + # > The type field shall be 1. + # Linux Standard Base Specification 1.2 + # ref: https://refspecs.linuxfoundation.org/LSB_1.2.0/gLSB/noteabitag.html return None if self.name != "GNU": @@ -612,7 +616,9 @@ def guess_os_from_ph_notes(elf) -> Optional[OS]: note = PHNote(elf.endian, phdr.buf) if note.type_ != 1: - # TODO: what is this constant name? + # > The type field shall be 1. + # Linux Standard Base Specification 1.2 + # ref: https://refspecs.linuxfoundation.org/LSB_1.2.0/gLSB/noteabitag.html continue if note.name == "Linux": @@ -689,7 +695,7 @@ def guess_os_from_abi_versions_needed(elf) -> Optional[OS]: if any(map(lambda abi: abi.startswith("GLIBC"), itertools.chain(*versions_needed.values()))): # there are any GLIBC versions needed - if elf.e_machine != "386": + if elf.e_machine != "i386": # GLIBC runs on Linux and Hurd. # for Hurd, its *only* on i386. # so if we're not on i386, then we're on Linux. From 0f902124d176ceb933a3f5a91ccd8332a3ba5336 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 12 Dec 2022 11:43:48 +0100 Subject: [PATCH 28/74] elf: reduce logging verbosity --- capa/features/extractors/elf.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 5ad13456..d5c187dc 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -735,22 +735,22 @@ def detect_elf_os(f) -> str: elf = ELF(f) osabi_guess = guess_os_from_osabi(elf) - logger.info("guess: osabi: %s", osabi_guess) + logger.debug("guess: osabi: %s", osabi_guess) ph_notes_guess = guess_os_from_ph_notes(elf) - logger.info("guess: ph notes: %s", ph_notes_guess) + logger.debug("guess: ph notes: %s", ph_notes_guess) sh_notes_guess = guess_os_from_sh_notes(elf) - logger.info("guess: sh notes: %s", sh_notes_guess) + logger.debug("guess: sh notes: %s", sh_notes_guess) linker_guess = guess_os_from_linker(elf) - logger.info("guess: linker: %s", linker_guess) + logger.debug("guess: linker: %s", linker_guess) abi_versions_needed_guess = guess_os_from_abi_versions_needed(elf) - logger.info("guess: ABI versions needed: %s", abi_versions_needed_guess) + logger.debug("guess: ABI versions needed: %s", abi_versions_needed_guess) needed_dependencies_guess = guess_os_from_needed_dependencies(elf) - logger.info("guess: needed dependencies: %s", needed_dependencies_guess) + logger.debug("guess: needed dependencies: %s", needed_dependencies_guess) ret = None From a46d7b3262bbba6cd84ba4ff0e12f764043de0bf Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 12 Dec 2022 12:18:01 +0000 Subject: [PATCH 29/74] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 23f114a2..e219f809 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 23f114a2e2996d33e0bc9caf98f58b0c5bb0ade1 +Subproject commit e219f809bda7c1d8cc8d9d716b51e74d2167ed19 From 5cd7f33d005f22017bdc7adc5021a877c7a63369 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 12 Dec 2022 12:29:44 +0000 Subject: [PATCH 30/74] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index e219f809..0ffc189e 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit e219f809bda7c1d8cc8d9d716b51e74d2167ed19 +Subproject commit 0ffc189eea6113d2dfc6355dacad8fbd78f9675d From ee72ed4b5309dc1d29b82e6f16dd55d05e6b7fa6 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 12 Dec 2022 14:06:17 +0100 Subject: [PATCH 31/74] tests: os: fix test --- tests/test_os_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_os_detection.py b/tests/test_os_detection.py index 25f30475..82e592a7 100644 --- a/tests/test_os_detection.py +++ b/tests/test_os_detection.py @@ -59,4 +59,4 @@ def test_elf_abi_version_hurd(): # guess: needed dependencies: None path = get_data_path_by_name("bf7a9c") with open(path, "rb") as f: - assert capa.features.extractors.elf.detect_elf_os(f) == "unknown" + assert capa.features.extractors.elf.detect_elf_os(f) == "hurd" From 47f58162c5654fee3939ae40157c26cbb126717d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Dec 2022 14:02:41 +0000 Subject: [PATCH 32/74] build(deps-dev): bump black from 22.10.0 to 22.12.0 Bumps [black](https://github.com/psf/black) from 22.10.0 to 22.12.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/22.10.0...22.12.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 69fcb62d..a799c9a4 100644 --- a/setup.py +++ b/setup.py @@ -74,7 +74,7 @@ setuptools.setup( "pytest-instafail==0.4.2", "pytest-cov==4.0.0", "pycodestyle==2.9.1", - "black==22.10.0", + "black==22.12.0", "isort==5.10.1", "mypy==0.982", "psutil==5.9.2", From d581eefcdfe64fadc361886bc90eed446d73192e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Dec 2022 14:02:53 +0000 Subject: [PATCH 33/74] build(deps-dev): bump mypy from 0.982 to 0.991 Bumps [mypy](https://github.com/python/mypy) from 0.982 to 0.991. - [Release notes](https://github.com/python/mypy/releases) - [Commits](https://github.com/python/mypy/compare/v0.982...v0.991) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 69fcb62d..d71a34e3 100644 --- a/setup.py +++ b/setup.py @@ -76,7 +76,7 @@ setuptools.setup( "pycodestyle==2.9.1", "black==22.10.0", "isort==5.10.1", - "mypy==0.982", + "mypy==0.991", "psutil==5.9.2", "stix2==3.0.1", "requests==2.28.0", From a7d06275c1fa72b2f373220bd4dc5ca418457c91 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Dec 2022 14:02:58 +0000 Subject: [PATCH 34/74] build(deps-dev): bump pyinstaller from 5.5 to 5.7.0 Bumps [pyinstaller](https://github.com/pyinstaller/pyinstaller) from 5.5 to 5.7.0. - [Release notes](https://github.com/pyinstaller/pyinstaller/releases) - [Changelog](https://github.com/pyinstaller/pyinstaller/blob/develop/doc/CHANGES.rst) - [Commits](https://github.com/pyinstaller/pyinstaller/compare/v5.5...v5.7.0) --- updated-dependencies: - dependency-name: pyinstaller dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 69fcb62d..f39fdb56 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,7 @@ setuptools.setup( "types_requests==2.28.1", ], "build": [ - "pyinstaller==5.5", + "pyinstaller==5.7.0", ], }, zip_safe=False, From b6911f8ad23326632fd7216870af937bd2fa6de0 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 12 Dec 2022 14:39:26 +0000 Subject: [PATCH 35/74] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ba2cab5..ae1e0dc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ ### Breaking Changes -### New Rules (31) +### New Rules (32) - collection/use-dotnet-library-sharpclipboard @johnk3r - data-manipulation/encryption/aes/use-dotnet-library-encryptdecryptutils @johnk3r @@ -42,6 +42,7 @@ - nursery/manipulate-network-credentials-in-dotnet michael.hunhoff@mandiant.com - nursery/encrypt-data-using-aes william.ballenthin@mandiant.com Ivan Kwiatkowski (@JusticeRage) - host-interaction/uac/bypass/bypass-uac-via-rpc david.cannings@pwc.com david@edeca.net +- nursery/check-for-vm-using-instruction-vpcext richard.weiss@mandiant.com - ### Bug Fixes diff --git a/README.md b/README.md index 5d05c821..d054a7ec 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-732-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-733-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 5ba70c97..2bc58afb 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 5ba70c97d22dd59efcf29a128557e64213f7ace8 +Subproject commit 2bc58afb5184a914ae13152df4ef09eb18ee3e79 From 522438094779128f36a59eb5a336fe9bdb9a4e9d Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 12 Dec 2022 18:02:07 +0100 Subject: [PATCH 36/74] setup: viv-utils 0.7.6 closes #1192 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 69fcb62d..94b1c215 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ requirements = [ "termcolor==2.1.1", "wcwidth==0.2.5", "ida-settings==2.1.0", - "viv-utils[flirt]==0.7.5", + "viv-utils[flirt]==0.7.6", "halo==0.0.31", "networkx==2.5.1", "ruamel.yaml==0.17.21", From 447cd95bc5cf5afbbadb222ff0bad73381d52330 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 12 Dec 2022 16:36:44 -0700 Subject: [PATCH 37/74] ida: add support for COFF and extern functions (#1223) --- .github/mypy/mypy.ini | 3 +++ CHANGELOG.md | 1 + capa/features/extractors/ida/file.py | 7 +++++-- capa/features/extractors/ida/helpers.py | 14 +++++++++++++ capa/features/extractors/ida/insn.py | 26 ++++++++++++++++++------- capa/ida/helpers.py | 1 + 6 files changed, 43 insertions(+), 9 deletions(-) diff --git a/.github/mypy/mypy.ini b/.github/mypy/mypy.ini index e6e0e942..936b8e45 100644 --- a/.github/mypy/mypy.ini +++ b/.github/mypy/mypy.ini @@ -63,6 +63,9 @@ ignore_missing_imports = True [mypy-ida_loader.*] ignore_missing_imports = True +[mypy-ida_segment.*] +ignore_missing_imports = True + [mypy-PyQt5.*] ignore_missing_imports = True diff --git a/CHANGELOG.md b/CHANGELOG.md index ae1e0dc0..934636e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ - fix: display instruction items #1154 @mr-tz - fix: accept only plaintext pasted content #1194 @williballenthin - fix: UnboundLocalError #1217 @williballenthin +- extractor: add support for COFF files and extern functions #1223 @mike-hunhoff ### Development diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index eefef531..31c7fb09 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -115,6 +115,9 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): yield Import(name), addr + for (ea, info) in capa.features.extractors.ida.helpers.get_file_externs().items(): + yield Import(info[1]), AbsoluteVirtualAddress(ea) + def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]: """extract section names @@ -165,7 +168,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: def extract_file_format() -> Iterator[Tuple[Feature, Address]]: file_info = idaapi.get_inf_structure() - if file_info.filetype == idaapi.f_PE: + if file_info.filetype in (idaapi.f_PE, idaapi.f_COFF): yield Format(FORMAT_PE), NO_ADDRESS elif file_info.filetype == idaapi.f_ELF: yield Format(FORMAT_ELF), NO_ADDRESS @@ -173,7 +176,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]: # no file type to return when processing a binary file, but we want to continue processing return else: - raise NotImplementedError("file format: %d" % file_info.filetype) + raise NotImplementedError("unexpected file format: %d" % file_info.filetype) def extract_features() -> Iterator[Tuple[Feature, Address]]: diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 186723d2..a333f064 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -11,6 +11,7 @@ import idc import idaapi import idautils import ida_bytes +import ida_segment from capa.features.address import AbsoluteVirtualAddress from capa.features.extractors.base_extractor import FunctionHandle @@ -109,6 +110,19 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]: return imports +def get_file_externs() -> Dict[int, Tuple[str, str, int]]: + externs = {} + + for seg in get_segments(skip_header_segments=True): + if not (seg.type == ida_segment.SEG_XTRN): + continue + + for ea in idautils.Functions(seg.start_ea, seg.end_ea): + externs[ea] = ("", idaapi.get_func_name(ea), -1) + + return externs + + def get_instructions_in_range(start: int, end: int) -> Iterator[idaapi.insn_t]: """yield instructions in range diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 75ad987c..da9e1387 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -23,13 +23,19 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi SECURITY_COOKIE_BYTES_DELTA = 0x40 -def get_imports(ctx: Dict[str, Any]) -> Dict[str, Any]: +def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]: if "imports_cache" not in ctx: ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports() return ctx["imports_cache"] -def check_for_api_call(ctx: Dict[str, Any], insn: idaapi.insn_t) -> Iterator[str]: +def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]: + if "externs_cache" not in ctx: + ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs() + return ctx["externs_cache"] + + +def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[Any]: """check instruction for API call""" info = () ref = insn.ea @@ -46,7 +52,7 @@ def check_for_api_call(ctx: Dict[str, Any], insn: idaapi.insn_t) -> Iterator[str except IndexError: break - info = get_imports(ctx).get(ref, ()) + info = funcs.get(ref, ()) if info: break @@ -55,7 +61,7 @@ def check_for_api_call(ctx: Dict[str, Any], insn: idaapi.insn_t) -> Iterator[str break if info: - yield "%s.%s" % (info[0], info[1]) + yield info def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: @@ -70,11 +76,17 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) if not insn.get_canon_mnem() in ("call", "jmp"): return - for api in check_for_api_call(fh.ctx, insn): - dll, _, symbol = api.rpartition(".") - for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + # check calls to imported functions + for api in check_for_api_call(insn, get_imports(fh.ctx)): + # tuple (, , ) + for name in capa.features.extractors.helpers.generate_symbols(api[0], api[1]): yield API(name), ih.address + # check calls to extern functions + for api in check_for_api_call(insn, get_externs(fh.ctx)): + # tuple (, , ) + yield API(api[1]), ih.address + # extract IDA/FLIRT recognized API functions targets = tuple(idautils.CodeRefsFrom(insn.ea, False)) if not targets: diff --git a/capa/ida/helpers.py b/capa/ida/helpers.py index eb3151d9..d1ef3093 100644 --- a/capa/ida/helpers.py +++ b/capa/ida/helpers.py @@ -27,6 +27,7 @@ SUPPORTED_FILE_TYPES = ( idaapi.f_PE, idaapi.f_ELF, idaapi.f_BIN, + idaapi.f_COFF, # idaapi.f_MACHO, ) From 8afebc1f173828079b90cbca4d193c091da5cc42 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Dec 2022 13:20:01 +0100 Subject: [PATCH 38/74] ci: mypy: enable --check-untyped-defs --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ed866547..ed635959 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -40,7 +40,7 @@ jobs: - name: Lint with pycodestyle run: pycodestyle --show-source capa/ scripts/ tests/ - name: Check types with mypy - run: mypy --config-file .github/mypy/mypy.ini capa/ scripts/ tests/ + run: mypy --config-file .github/mypy/mypy.ini --check-untyped-defs capa/ scripts/ tests/ rule_linter: runs-on: ubuntu-20.04 From b1d6fcd6c8b125aec518213b8e65f2b1cd1c1c86 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Dec 2022 13:20:24 +0100 Subject: [PATCH 39/74] mypy --- capa/features/extractors/elf.py | 37 ++++++++++++---------- capa/features/extractors/ida/helpers.py | 6 ++-- capa/features/extractors/viv/basicblock.py | 2 +- capa/features/extractors/viv/function.py | 2 +- capa/features/extractors/viv/insn.py | 2 +- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index d5c187dc..4fed06dc 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -92,17 +92,18 @@ class Shdr: class ELF: - def __init__(self, f): + def __init__(self, f: BinaryIO): self.f = f - self.bitness: int = None - self.endian: str = None - self.e_phentsize: int = None - self.e_phnum: int = None - self.e_shentsize: int = None - self.e_shnum: int = None - self.phbuf = None - self.shbuf = None + # these will all be initialized in `_parse()` + self.bitness: int = 0 + self.endian: str = "" + self.e_phentsize: int = 0 + self.e_phnum: int = 0 + self.e_shentsize: int = 0 + self.e_shnum: int = 0 + self.phbuf: bytes = b"" + self.shbuf: bytes = b"" self._parse() @@ -512,13 +513,14 @@ class ABITag: class PHNote: - def __init__(self, endian, buf): + def __init__(self, endian: str, buf: bytes): self.endian = endian self.buf = buf - self.type_: int = None - self.descsz: int = None - self.name: str = None + # these will be initialized in `_parse()` + self.type_: int = 0 + self.descsz: int = 0 + self.name: str = "" self._parse() @@ -560,13 +562,14 @@ class PHNote: class SHNote: - def __init__(self, endian, buf): + def __init__(self, endian: str, buf: bytes): self.endian = endian self.buf = buf - self.type_: int = None - self.descsz: int = None - self.name: str = None + # these will be initialized in `_parse()` + self.type_: int = 0 + self.descsz: int = 0 + self.name: str = "" self._parse() diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index a333f064..5c997f69 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Any, Dict, Tuple, Iterator +from typing import Any, Dict, Tuple, Iterator, Optional import idc import idaapi @@ -36,7 +36,7 @@ def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]: def get_functions( - start: int = None, end: int = None, skip_thunks: bool = False, skip_libs: bool = False + start: Optional[int] = None, end: Optional[int] = None, skip_thunks: bool = False, skip_libs: bool = False ) -> Iterator[FunctionHandle]: """get functions, range optional @@ -287,7 +287,7 @@ def is_frame_register(reg: int) -> bool: return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg) -def get_insn_ops(insn: idaapi.insn_t, target_ops: Tuple[Any] = None) -> idaapi.op_t: +def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[Tuple[Any]] = None) -> idaapi.op_t: """yield op_t for instruction, filter on type if specified""" for op in insn.ops: if op.type == idaapi.o_void: diff --git a/capa/features/extractors/viv/basicblock.py b/capa/features/extractors/viv/basicblock.py index 6341ec3a..9848bec0 100644 --- a/capa/features/extractors/viv/basicblock.py +++ b/capa/features/extractors/viv/basicblock.py @@ -31,7 +31,7 @@ def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterat yields: (Feature, Address): the feature and the address at which its found. """ - ... + raise NotImplementedError def _bb_has_tight_loop(f, bb): diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index 64671711..cf1df527 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -27,7 +27,7 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature yields: (Feature, Address): the feature and the address at which its found. """ - ... + raise NotImplementedError def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index ae106c31..738c69a7 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -44,7 +44,7 @@ def interface_extract_instruction_XXX( yields: (Feature, Address): the feature and the address at which its found. """ - ... + raise NotImplementedError def get_imports(vw): From 62700ca5d1c831946ffaef1bc9fb1f0eb8d72cbf Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Dec 2022 14:07:51 +0100 Subject: [PATCH 40/74] setup: bump viv-utils to 0.7.7 for py3.11 support --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 94b1c215..51a4ece1 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ requirements = [ "termcolor==2.1.1", "wcwidth==0.2.5", "ida-settings==2.1.0", - "viv-utils[flirt]==0.7.6", + "viv-utils[flirt]==0.7.7", "halo==0.0.31", "networkx==2.5.1", "ruamel.yaml==0.17.21", From f9b68008319dd19a971869a160263e4e472d06b9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Dec 2022 13:15:33 +0000 Subject: [PATCH 41/74] build(deps-dev): bump pycodestyle from 2.9.1 to 2.10.0 Bumps [pycodestyle](https://github.com/PyCQA/pycodestyle) from 2.9.1 to 2.10.0. - [Release notes](https://github.com/PyCQA/pycodestyle/releases) - [Changelog](https://github.com/PyCQA/pycodestyle/blob/main/CHANGES.txt) - [Commits](https://github.com/PyCQA/pycodestyle/compare/2.9.1...2.10.0) --- updated-dependencies: - dependency-name: pycodestyle dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9ba7abee..77ce7aaf 100644 --- a/setup.py +++ b/setup.py @@ -73,7 +73,7 @@ setuptools.setup( "pytest-sugar==0.9.4", "pytest-instafail==0.4.2", "pytest-cov==4.0.0", - "pycodestyle==2.9.1", + "pycodestyle==2.10.0", "black==22.12.0", "isort==5.10.1", "mypy==0.991", From 35243ef7a6df5a27fc08446b0caf6c0aad766072 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 13 Dec 2022 13:23:46 +0000 Subject: [PATCH 42/74] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae1e0dc0..cee6ce3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - extract property features from .NET PE files #1168 @anushkavirgaonkar - emit features for .NET newobj instruction #1186 @mike-hunhoff - fix import-to-ida script formatting #1208 @williballenthin +- Python 3.11 support #1192 @williballenthin ### Breaking Changes From b819033da08b5878be60c24f8598dcb0beebb256 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 14 Dec 2022 10:37:39 +0100 Subject: [PATCH 43/74] lots of mypy --- capa/engine.py | 15 ++- capa/features/common.py | 41 ++++--- capa/features/extractors/dnfile/helpers.py | 37 +++++++ capa/features/extractors/dnfile_.py | 37 ++++++- capa/features/extractors/dotnetfile.py | 37 ++++++- capa/features/extractors/null.py | 14 +-- capa/features/extractors/pefile.py | 6 +- capa/features/extractors/smda/file.py | 3 +- capa/ida/plugin/form.py | 99 +++++++++-------- capa/ida/plugin/item.py | 4 +- capa/ida/plugin/proxy.py | 2 +- capa/ida/plugin/view.py | 8 +- capa/main.py | 12 +-- capa/perf.py | 2 +- capa/rules.py | 37 ++++--- rules | 2 +- scripts/bulk-process.py | 3 +- scripts/capa2yara.py | 20 ++-- scripts/capa_as_library.py | 2 +- scripts/detect-elf-os.py | 2 +- scripts/lint.py | 16 +-- scripts/show-capabilities-by-function.py | 3 +- scripts/show-features.py | 4 +- tests/data | 2 +- tests/test_engine.py | 119 +++++++++++---------- tests/test_fmt.py | 4 +- tests/test_match.py | 2 +- tests/test_result_document.py | 29 +++++ tests/test_rules.py | 81 +++++++------- 29 files changed, 410 insertions(+), 233 deletions(-) diff --git a/capa/engine.py b/capa/engine.py index bd26f454..b5fbb412 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -8,7 +8,7 @@ import copy import collections -from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Mapping, Iterable +from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Mapping, Iterable, Iterator, Union, cast import capa.perf import capa.features.common @@ -60,17 +60,24 @@ class Statement: """ raise NotImplementedError() - def get_children(self): + def get_children(self) -> Iterator[Union["Statement", Feature]]: if hasattr(self, "child"): - yield self.child + # this really confuses mypy because the property may not exist + # since its defined in the subclasses. + child = self.child # type: ignore + assert isinstance(child, (Statement, Feature)) + yield child if hasattr(self, "children"): for child in getattr(self, "children"): + assert isinstance(child, (Statement, Feature)) yield child def replace_child(self, existing, new): if hasattr(self, "child"): - if self.child is existing: + # this really confuses mypy because the property may not exist + # since its defined in the subclasses. + if self.child is existing: # type: ignore self.child = new if hasattr(self, "children"): diff --git a/capa/features/common.py b/capa/features/common.py index a8dca781..dca0d03f 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -200,8 +200,9 @@ class Substring(String): # mapping from string value to list of locations. # will unique the locations later on. - matches = collections.defaultdict(list) + matches: collections.defaultdict[str, Set[Address]] = collections.defaultdict(set) + assert isinstance(self.value, str) for feature, locations in ctx.items(): if not isinstance(feature, (String,)): continue @@ -211,32 +212,29 @@ class Substring(String): raise ValueError("unexpected feature value type") if self.value in feature.value: - matches[feature.value].extend(locations) + matches[feature.value].update(locations) if short_circuit: # we found one matching string, thats sufficient to match. # don't collect other matching strings in this mode. break if matches: - # finalize: defaultdict -> dict - # which makes json serialization easier - matches = dict(matches) - # collect all locations locations = set() - for s in matches.keys(): - matches[s] = list(set(matches[s])) - locations.update(matches[s]) + for locs in matches.values(): + locations.update(locs) # unlike other features, we cannot return put a reference to `self` directly in a `Result`. # this is because `self` may match on many strings, so we can't stuff the matched value into it. # instead, return a new instance that has a reference to both the substring and the matched values. - return Result(True, _MatchedSubstring(self, matches), [], locations=locations) + return Result(True, _MatchedSubstring(self, dict(matches)), [], locations=locations) else: return Result(False, _MatchedSubstring(self, {}), []) def __str__(self): - return "substring(%s)" % self.value + v = self.value + assert isinstance(v, str) + return "substring(%s)" % v class _MatchedSubstring(Substring): @@ -261,6 +259,7 @@ class _MatchedSubstring(Substring): self.matches = matches def __str__(self): + assert isinstance(self.value, str) return 'substring("%s", matches = %s)' % ( self.value, ", ".join(map(lambda s: '"' + s + '"', (self.matches or {}).keys())), @@ -292,7 +291,7 @@ class Regex(String): # mapping from string value to list of locations. # will unique the locations later on. - matches = collections.defaultdict(list) + matches: collections.defaultdict[str, Set[Address]] = collections.defaultdict(set) for feature, locations in ctx.items(): if not isinstance(feature, (String,)): @@ -307,32 +306,28 @@ class Regex(String): # using this mode cleans is more convenient for rule authors, # so that they don't have to prefix/suffix their terms like: /.*foo.*/. if self.re.search(feature.value): - matches[feature.value].extend(locations) + matches[feature.value].update(locations) if short_circuit: # we found one matching string, thats sufficient to match. # don't collect other matching strings in this mode. break if matches: - # finalize: defaultdict -> dict - # which makes json serialization easier - matches = dict(matches) - # collect all locations locations = set() - for s in matches.keys(): - matches[s] = list(set(matches[s])) - locations.update(matches[s]) + for locs in matches.values(): + locations.update(locs) # unlike other features, we cannot return put a reference to `self` directly in a `Result`. # this is because `self` may match on many strings, so we can't stuff the matched value into it. # instead, return a new instance that has a reference to both the regex and the matched values. # see #262. - return Result(True, _MatchedRegex(self, matches), [], locations=locations) + return Result(True, _MatchedRegex(self, dict(matches)), [], locations=locations) else: return Result(False, _MatchedRegex(self, {}), []) def __str__(self): + assert isinstance(self.value, str) return "regex(string =~ %s)" % self.value @@ -358,6 +353,7 @@ class _MatchedRegex(Regex): self.matches = matches def __str__(self): + assert isinstance(self.value, str) return "regex(string =~ %s, matches = %s)" % ( self.value, ", ".join(map(lambda s: '"' + s + '"', (self.matches or {}).keys())), @@ -380,16 +376,19 @@ class Bytes(Feature): capa.perf.counters["evaluate.feature"] += 1 capa.perf.counters["evaluate.feature.bytes"] += 1 + assert isinstance(self.value, bytes) for feature, locations in ctx.items(): if not isinstance(feature, (Bytes,)): continue + assert isinstance(feature.value, bytes) if feature.value.startswith(self.value): return Result(True, self, [], locations=locations) return Result(False, self, []) def get_value_str(self): + assert isinstance(self.value, bytes) return hex_string(bytes_to_str(self.value)) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 3fef794d..2c489c22 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -107,8 +107,18 @@ class DnUnmanagedMethod: return f"{module}.{method}" +def validate_has_dotnet(pe: dnfile.dnPE): + assert pe.net is not None + assert pe.net.mdtables is not None + assert pe.net.Flags is not None + + def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: """map generic token to string or table row""" + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.mdtables is not None + if isinstance(token, StringToken): user_string: Optional[str] = read_dotnet_user_string(pe, token) if user_string is None: @@ -143,6 +153,10 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) - def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: """read user string from #US stream""" + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.user_strings is not None + try: user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) except UnicodeDecodeError as e: @@ -169,6 +183,11 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: TypeName (index into String heap) TypeNamespace (index into String heap) """ + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.mdtables is not None + assert pe.net.mdtables.MemberRef is not None + for (rid, row) in enumerate(iter_dotnet_table(pe, "MemberRef")): if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow): continue @@ -258,6 +277,11 @@ def get_dotnet_properties(pe: dnfile.dnPE) -> Iterator[DnType]: def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: """get managed methods from MethodDef table""" + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.mdtables is not None + assert pe.net.mdtables.MethodDef is not None + if not hasattr(pe.net.mdtables, "MethodDef"): return @@ -307,15 +331,28 @@ def calculate_dotnet_token_value(table: int, rid: int) -> int: def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool: + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.mdtables is not None + return bool(getattr(pe.net.mdtables, table_name, None)) def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.Flags is not None + return not bool(pe.net.Flags.CLR_ILONLY) def iter_dotnet_table(pe: dnfile.dnPE, name: str) -> Iterator[Any]: + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.mdtables is not None + if not is_dotnet_table_valid(pe, name): return + for row in getattr(pe.net.mdtables, name): yield row diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py index 998ea209..cf82bbce 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dnfile_.py @@ -19,9 +19,19 @@ def extract_file_os(**kwargs) -> Iterator[Tuple[Feature, Address]]: yield OS(OS_ANY), NO_ADDRESS -def extract_file_arch(pe, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def validate_has_dotnet(pe: dnfile.dnPE): + assert pe.net is not None + assert pe.net.mdtables is not None + assert pe.net.Flags is not None + + +def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Feature, Address]]: # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 # .NET 4.5 added option: any CPU, 32-bit preferred + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.Flags is not None + if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE: yield Arch(ARCH_I386), NO_ADDRESS elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS: @@ -71,6 +81,10 @@ class DnfileFeatureExtractor(FeatureExtractor): # self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT # True: native EP: Token # False: managed EP: RVA + validate_has_dotnet(self.pe) + assert self.pe.net is not None + assert self.pe.net.struct is not None + return self.pe.net.struct.EntryPointTokenOrRva def extract_global_features(self): @@ -83,13 +97,32 @@ class DnfileFeatureExtractor(FeatureExtractor): return bool(self.pe.net) def is_mixed_mode(self) -> bool: + validate_has_dotnet(self.pe) + assert self.pe is not None + assert self.pe.net is not None + assert self.pe.net.Flags is not None + return not bool(self.pe.net.Flags.CLR_ILONLY) def get_runtime_version(self) -> Tuple[int, int]: + validate_has_dotnet(self.pe) + assert self.pe is not None + assert self.pe.net is not None + assert self.pe.net.struct is not None + return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion def get_meta_version_string(self) -> str: - return self.pe.net.metadata.struct.Version.rstrip(b"\x00").decode("utf-8") + validate_has_dotnet(self.pe) + assert self.pe.net is not None + assert self.pe.net.metadata is not None + assert self.pe.net.metadata.struct is not None + assert self.pe.net.metadata.struct.Version is not None + + vbuf = self.pe.net.metadata.struct.Version + assert isinstance(vbuf, bytes) + + return vbuf.rstrip(b"\x00").decode("utf-8") def get_functions(self): raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index ef6f9f07..e7bb67fc 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -40,6 +40,12 @@ def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]: yield Format(FORMAT_DOTNET), NO_ADDRESS +def validate_has_dotnet(pe: dnfile.dnPE): + assert pe.net is not None + assert pe.net.mdtables is not None + assert pe.net.Flags is not None + + def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]: for method in get_dotnet_managed_imports(pe): # like System.IO.File::OpenRead @@ -78,6 +84,12 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.mdtables is not None + assert pe.net.mdtables.TypeDef is not None + assert pe.net.mdtables.TypeRef is not None + for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeDef")): token = calculate_dotnet_token_value(pe.net.mdtables.TypeDef.number, rid + 1) yield Class(DnType.format_name(row.TypeName, namespace=row.TypeNamespace)), DNTokenAddress(token) @@ -94,6 +106,10 @@ def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]: def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]: # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 # .NET 4.5 added option: any CPU, 32-bit preferred + validate_has_dotnet(pe) + assert pe.net is not None + assert pe.net.Flags is not None + if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE: yield Arch(ARCH_I386), NO_ADDRESS elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS: @@ -155,6 +171,10 @@ class DotnetFileFeatureExtractor(FeatureExtractor): # self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT # True: native EP: Token # False: managed EP: RVA + validate_has_dotnet(self.pe) + assert self.pe.net is not None + assert self.pe.net.struct is not None + return self.pe.net.struct.EntryPointTokenOrRva def extract_global_features(self): @@ -170,10 +190,25 @@ class DotnetFileFeatureExtractor(FeatureExtractor): return is_dotnet_mixed_mode(self.pe) def get_runtime_version(self) -> Tuple[int, int]: + validate_has_dotnet(self.pe) + assert self.pe.net is not None + assert self.pe.net.struct is not None + assert self.pe.net.struct.MajorRuntimeVersion is not None + assert self.pe.net.struct.MinorRuntimeVersion is not None + return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion def get_meta_version_string(self) -> str: - return self.pe.net.metadata.struct.Version.rstrip(b"\x00").decode("utf-8") + validate_has_dotnet(self.pe) + assert self.pe.net is not None + assert self.pe.net.metadata is not None + assert self.pe.net.metadata.struct is not None + assert self.pe.net.metadata.struct.Version is not None + + vbuf = self.pe.net.metadata.struct.Version + assert isinstance(vbuf, bytes) + + return vbuf.rstrip(b"\x00").decode("utf-8") def get_functions(self): raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features") diff --git a/capa/features/extractors/null.py b/capa/features/extractors/null.py index d5cf72ab..f8d6d077 100644 --- a/capa/features/extractors/null.py +++ b/capa/features/extractors/null.py @@ -52,26 +52,26 @@ class NullFeatureExtractor(FeatureExtractor): yield FunctionHandle(address, None) def extract_function_features(self, f): - for address, feature in self.functions.get(f.address, {}).features: + for address, feature in self.functions[f.address].features: yield feature, address def get_basic_blocks(self, f): - for address in sorted(self.functions.get(f.address, {}).basic_blocks.keys()): + for address in sorted(self.functions[f.address].basic_blocks.keys()): yield BBHandle(address, None) def extract_basic_block_features(self, f, bb): - for address, feature in self.functions.get(f.address, {}).basic_blocks.get(bb.address, {}).features: + for address, feature in self.functions[f.address].basic_blocks[bb.address].features: yield feature, address def get_instructions(self, f, bb): - for address in sorted(self.functions.get(f.address, {}).basic_blocks.get(bb.address, {}).instructions.keys()): + for address in sorted(self.functions[f.address].basic_blocks[bb.address].instructions.keys()): yield InsnHandle(address, None) def extract_insn_features(self, f, bb, insn): for address, feature in ( - self.functions.get(f.address, {}) - .basic_blocks.get(bb.address, {}) - .instructions.get(insn.address, {}) + self.functions[f.address] + .basic_blocks[bb.address] + .instructions[insn.address] .features ): yield feature, address diff --git a/capa/features/extractors/pefile.py b/capa/features/extractors/pefile.py index dbdf72ac..038200b8 100644 --- a/capa/features/extractors/pefile.py +++ b/capa/features/extractors/pefile.py @@ -133,7 +133,8 @@ def extract_file_features(pe, buf): """ for file_handler in FILE_HANDLERS: - for feature, va in file_handler(pe=pe, buf=buf): + # file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]] + for feature, va in file_handler(pe=pe, buf=buf): # type: ignore yield feature, va @@ -160,7 +161,8 @@ def extract_global_features(pe, buf): Tuple[Feature, VA]: a feature and its location. """ for handler in GLOBAL_HANDLERS: - for feature, va in handler(pe=pe, buf=buf): + # file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]] + for feature, va in handler(pe=pe, buf=buf): # type: ignore yield feature, va diff --git a/capa/features/extractors/smda/file.py b/capa/features/extractors/smda/file.py index f4bae925..fa2692ce 100644 --- a/capa/features/extractors/smda/file.py +++ b/capa/features/extractors/smda/file.py @@ -88,7 +88,8 @@ def extract_features(smda_report, buf): """ for file_handler in FILE_HANDLERS: - for feature, addr in file_handler(smda_report=smda_report, buf=buf): + # file_handler: type: (smda_report, bytes) -> Iterable[Tuple[Feature, Address]] + for feature, addr in file_handler(smda_report=smda_report, buf=buf): # type: ignore yield feature, addr diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 9d101429..e6deecb7 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -11,7 +11,7 @@ import copy import logging import itertools import collections -from typing import Set, Dict, Optional +from typing import Set, Dict, Optional, List, Any import idaapi import ida_kernwin @@ -72,14 +72,14 @@ def trim_function_name(f, max_length=25): def find_func_features(fh: FunctionHandle, extractor): """ """ - func_features: Dict[Feature, Set] = collections.defaultdict(set) - bb_features: Dict[Address, Dict] = collections.defaultdict(dict) + func_features: Dict[Feature, Set[Address]] = collections.defaultdict(set) + bb_features: Dict[Address, Dict[Feature, Set[Address]]] = collections.defaultdict(dict) for (feature, addr) in extractor.extract_function_features(fh): func_features[feature].add(addr) for bbh in extractor.get_basic_blocks(fh): - _bb_features = collections.defaultdict(set) + _bb_features: Dict[Feature, Set[Address]] = collections.defaultdict(set) for (feature, addr) in extractor.extract_basic_block_features(fh, bbh): _bb_features[feature].add(addr) @@ -239,53 +239,52 @@ class CapaSettingsInputDialog(QtWidgets.QDialog): class CapaExplorerForm(idaapi.PluginForm): """form element for plugin interface""" - def __init__(self, name, option=Options.DEFAULT): + def __init__(self, name: str, option=Options.DEFAULT): """initialize form elements""" super().__init__() - self.form_title = name - self.process_total = 0 - self.process_count = 0 + self.form_title: str = name + self.process_total: int = 0 + self.process_count: int = 0 - self.parent = None - self.ida_hooks = None + self.parent: Any # QtWidget + self.ida_hooks: CapaExplorerIdaHooks self.doc: Optional[capa.render.result_document.ResultDocument] = None - self.rule_paths = None - self.rules_cache = None - self.ruleset_cache = None + self.rule_paths: Optional[List[str]] + self.rules_cache: Optional[List[capa.rules.Rule]] + self.ruleset_cache: Optional[capa.rules.RuleSet] # models - self.model_data = None - self.range_model_proxy = None - self.search_model_proxy = None + self.model_data: CapaExplorerDataModel + self.range_model_proxy: CapaExplorerRangeProxyModel + self.search_model_proxy: CapaExplorerSearchProxyModel # UI controls - self.view_limit_results_by_function = None - self.view_show_results_by_function = None - self.view_search_bar = None - self.view_tree = None - self.view_rulegen = None - self.view_tabs = None + self.view_limit_results_by_function: QtWidgets.QCheckBox + self.view_show_results_by_function: QtWidgets.QCheckBox + self.view_search_bar: QtWidgets.QLineEdit + self.view_tree: CapaExplorerQtreeView + self.view_tabs: QtWidgets.QTabWidget self.view_tab_rulegen = None - self.view_status_label = None - self.view_buttons = None - self.view_analyze_button = None - self.view_reset_button = None - self.view_settings_button = None - self.view_save_button = None + self.view_status_label: QtWidgets.QLabel + self.view_buttons: QtWidgets.QHBoxLayout + self.view_analyze_button: QtWidgets.QPushButton + self.view_reset_button: QtWidgets.QPushButton + self.view_settings_button: QtWidgets.QPushButton + self.view_save_button: QtWidgets.QPushButton - self.view_rulegen_preview = None - self.view_rulegen_features = None - self.view_rulegen_editor = None - self.view_rulegen_header_label = None - self.view_rulegen_search = None - self.view_rulegen_limit_features_by_ea = None - self.rulegen_current_function = None - self.rulegen_bb_features_cache = {} - self.rulegen_func_features_cache = {} - self.rulegen_file_features_cache = {} - self.view_rulegen_status_label = None + self.view_rulegen_preview: CapaExplorerRulegenPreview + self.view_rulegen_features: CapaExplorerRulegenFeatures + self.view_rulegen_editor: CapaExplorerRulegenEditor + self.view_rulegen_header_label: QtWidgets.QLabel + self.view_rulegen_search: QtWidgets.QLineEdit + self.view_rulegen_limit_features_by_ea: QtWidgets.QCheckBox + self.rulegen_current_function: Optional[FunctionHandle] + self.rulegen_bb_features_cache: Dict[Address, Dict[Feature, Set[Address]]] = {} + self.rulegen_func_features_cache: Dict[Feature, Set[Address]] = {} + self.rulegen_file_features_cache: Dict[Feature, Set[Address]] = {} + self.view_rulegen_status_label: QtWidgets.QLabel self.Show() @@ -762,6 +761,9 @@ class CapaExplorerForm(idaapi.PluginForm): if not self.load_capa_rules(): return False + assert self.rules_cache is not None + assert self.ruleset_cache is not None + if ida_kernwin.user_cancelled(): logger.info("User cancelled analysis.") return False @@ -822,6 +824,13 @@ class CapaExplorerForm(idaapi.PluginForm): return False try: + # either the results are cached and the doc already exists, + # or the doc was just created above + assert self.doc is not None + # same with rules cache, either it's cached or it was just loaded + assert self.rules_cache is not None + assert self.ruleset_cache is not None + self.model_data.render_capa_doc(self.doc, self.view_show_results_by_function.isChecked()) self.set_view_status_label( "capa rules directory: %s (%d rules)" % (settings.user[CAPA_SETTINGS_RULE_PATH], len(self.rules_cache)) @@ -871,6 +880,9 @@ class CapaExplorerForm(idaapi.PluginForm): else: logger.info('Using cached ruleset, click "Reset" to reload rules from disk.') + assert self.rules_cache is not None + assert self.ruleset_cache is not None + if ida_kernwin.user_cancelled(): logger.info("User cancelled analysis.") return False @@ -891,7 +903,8 @@ class CapaExplorerForm(idaapi.PluginForm): try: f = idaapi.get_func(idaapi.get_screen_ea()) if f: - fh: FunctionHandle = extractor.get_function(f.start_ea) + fh: Optional[FunctionHandle] = extractor.get_function(f.start_ea) + assert fh is not None self.rulegen_current_function = fh func_features, bb_features = find_func_features(fh, extractor) @@ -1053,6 +1066,8 @@ class CapaExplorerForm(idaapi.PluginForm): def update_rule_status(self, rule_text): """ """ + assert self.rules_cache is not None + if not self.view_rulegen_editor.invisibleRootItem().childCount(): self.set_rulegen_preview_border_neutral() self.view_rulegen_status_label.clear() @@ -1077,7 +1092,7 @@ class CapaExplorerForm(idaapi.PluginForm): rules.append(rule) try: - file_features = copy.copy(self.rulegen_file_features_cache) + file_features = copy.copy(dict(self.rulegen_file_features_cache)) if self.rulegen_current_function: func_matches, bb_matches = find_func_matches( self.rulegen_current_function, @@ -1093,7 +1108,7 @@ class CapaExplorerForm(idaapi.PluginForm): _, file_matches = capa.engine.match( capa.rules.RuleSet(list(capa.rules.get_rules_and_dependencies(rules, rule.name))).file_rules, file_features, - 0x0, + NO_ADDRESS ) except Exception as e: self.set_rulegen_status("Failed to match rule (%s)" % e) diff --git a/capa/ida/plugin/item.py b/capa/ida/plugin/item.py index 159333a4..ac349424 100644 --- a/capa/ida/plugin/item.py +++ b/capa/ida/plugin/item.py @@ -36,7 +36,7 @@ def ea_to_hex(ea): class CapaExplorerDataItem: """store data for CapaExplorerDataModel""" - def __init__(self, parent: "CapaExplorerDataItem", data: List[str], can_check=True): + def __init__(self, parent: Optional["CapaExplorerDataItem"], data: List[str], can_check=True): """initialize item""" self.pred = parent self._data = data @@ -110,7 +110,7 @@ class CapaExplorerDataItem: except IndexError: return None - def parent(self) -> "CapaExplorerDataItem": + def parent(self) -> Optional["CapaExplorerDataItem"]: """get parent""" return self.pred diff --git a/capa/ida/plugin/proxy.py b/capa/ida/plugin/proxy.py index ae490d87..e67147bd 100644 --- a/capa/ida/plugin/proxy.py +++ b/capa/ida/plugin/proxy.py @@ -92,7 +92,7 @@ class CapaExplorerRangeProxyModel(QtCore.QSortFilterProxyModel): @param parent: QModelIndex of parent """ # filter not set - if self.min_ea is None and self.max_ea is None: + if self.min_ea is None or self.max_ea is None: return True index = self.sourceModel().index(row, 0, parent) diff --git a/capa/ida/plugin/view.py b/capa/ida/plugin/view.py index 86505fb1..75abf59c 100644 --- a/capa/ida/plugin/view.py +++ b/capa/ida/plugin/view.py @@ -18,7 +18,7 @@ import capa.ida.helpers import capa.features.common import capa.features.basicblock from capa.ida.plugin.item import CapaExplorerFunctionItem -from capa.features.address import Address, _NoAddress +from capa.features.address import _NoAddress, AbsoluteVirtualAddress from capa.ida.plugin.model import CapaExplorerDataModel MAX_SECTION_SIZE = 750 @@ -1013,8 +1013,10 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget): self.parent_items = {} def format_address(e): - assert isinstance(e, Address) - return "%X" % e if not isinstance(e, _NoAddress) else "" + if isinstance(e, AbsoluteVirtualAddress): + return "%X" % int(e) + else: + return "" def format_feature(feature): """ """ diff --git a/capa/main.py b/capa/main.py index 1157a474..c973b61d 100644 --- a/capa/main.py +++ b/capa/main.py @@ -66,7 +66,7 @@ from capa.features.common import ( FORMAT_DOTNET, FORMAT_FREEZE, ) -from capa.features.address import NO_ADDRESS +from capa.features.address import NO_ADDRESS, Address from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor RULES_PATH_DEFAULT_STRING = "(embedded rules)" @@ -718,8 +718,8 @@ def compute_layout(rules, extractor, capabilities): otherwise, we may pollute the json document with a large amount of un-referenced data. """ - functions_by_bb = {} - bbs_by_function = {} + functions_by_bb: Dict[Address, Address] = {} + bbs_by_function: Dict[Address, List[Address]] = {} for f in extractor.get_functions(): bbs_by_function[f.address] = [] for bb in extractor.get_basic_blocks(f): @@ -1016,8 +1016,7 @@ def main(argv=None): return E_INVALID_FILE_TYPE try: - rules = get_rules(args.rules, disable_progress=args.quiet) - rules = capa.rules.RuleSet(rules) + rules = capa.rules.RuleSet(get_rules(args.rules, disable_progress=args.quiet)) logger.debug( "successfully loaded %s rules", @@ -1167,8 +1166,7 @@ def ida_main(): rules_path = os.path.join(get_default_root(), "rules") logger.debug("rule path: %s", rules_path) - rules = get_rules([rules_path]) - rules = capa.rules.RuleSet(rules) + rules = capa.rules.RuleSet(get_rules([rules_path])) meta = capa.ida.helpers.collect_metadata([rules_path]) diff --git a/capa/perf.py b/capa/perf.py index cb0e89ec..1d98f6c2 100644 --- a/capa/perf.py +++ b/capa/perf.py @@ -2,7 +2,7 @@ import collections from typing import Dict # this structure is unstable and may change before the next major release. -counters: Dict[str, int] = collections.Counter() +counters: collections.Counter[str] = collections.Counter() def reset(): diff --git a/capa/rules.py b/capa/rules.py index 8287eba0..c4d2ad77 100644 --- a/capa/rules.py +++ b/capa/rules.py @@ -634,7 +634,7 @@ class Rule: Returns: List[str]: names of rules upon which this rule depends. """ - deps = set([]) + deps: Set[str] = set([]) def rec(statement): if isinstance(statement, capa.features.common.MatchedRule): @@ -651,6 +651,7 @@ class Rule: deps.update(map(lambda r: r.name, namespaces[statement.value])) else: # not a namespace, assume its a rule name. + assert isinstance(statement.value, str) deps.add(statement.value) elif isinstance(statement, ceng.Statement): @@ -666,7 +667,11 @@ class Rule: def _extract_subscope_rules_rec(self, statement): if isinstance(statement, ceng.Statement): # for each child that is a subscope, - for subscope in filter(lambda statement: isinstance(statement, ceng.Subscope), statement.get_children()): + for child in statement.get_children(): + if not isinstance(child, ceng.Subscope): + continue + + subscope = child # create a new rule from it. # the name is a randomly generated, hopefully unique value. @@ -737,7 +742,7 @@ class Rule: return self.statement.evaluate(features, short_circuit=short_circuit) @classmethod - def from_dict(cls, d, definition): + def from_dict(cls, d, definition) -> "Rule": meta = d["rule"]["meta"] name = meta["name"] # if scope is not specified, default to function scope. @@ -771,14 +776,12 @@ class Rule: # prefer to use CLoader to be fast, see #306 # on Linux, make sure you install libyaml-dev or similar # on Windows, get WHLs from pyyaml.org/pypi - loader = yaml.CLoader logger.debug("using libyaml CLoader.") + return yaml.CLoader except: - loader = yaml.Loader logger.debug("unable to import libyaml CLoader, falling back to Python yaml parser.") logger.debug("this will be slower to load rules.") - - return loader + return yaml.Loader @staticmethod def _get_ruamel_yaml_parser(): @@ -790,8 +793,9 @@ class Rule: # use block mode, not inline json-like mode y.default_flow_style = False - # leave quotes unchanged - y.preserve_quotes = True + # leave quotes unchanged. + # manually verified this property exists, even if mypy complains. + y.preserve_quotes = True # type: ignore # indent lists by two spaces below their parent # @@ -802,12 +806,13 @@ class Rule: y.indent(sequence=2, offset=2) # avoid word wrapping - y.width = 4096 + # manually verified this property exists, even if mypy complains. + y.width = 4096 # type: ignore return y @classmethod - def from_yaml(cls, s, use_ruamel=False): + def from_yaml(cls, s, use_ruamel=False) -> "Rule": if use_ruamel: # ruamel enables nice formatting and doc roundtripping with comments doc = cls._get_ruamel_yaml_parser().load(s) @@ -817,7 +822,7 @@ class Rule: return cls.from_dict(doc, s) @classmethod - def from_yaml_file(cls, path, use_ruamel=False): + def from_yaml_file(cls, path, use_ruamel=False) -> "Rule": with open(path, "rb") as f: try: rule = cls.from_yaml(f.read().decode("utf-8"), use_ruamel=use_ruamel) @@ -832,7 +837,7 @@ class Rule: except pydantic.ValidationError as e: raise InvalidRuleWithPath(path, str(e)) from e - def to_yaml(self): + def to_yaml(self) -> str: # reformat the yaml document with a common style. # this includes: # - ordering the meta elements @@ -1261,7 +1266,7 @@ class RuleSet: return (easy_rules_by_feature, hard_rules) @staticmethod - def _get_rules_for_scope(rules, scope): + def _get_rules_for_scope(rules, scope) -> List[Rule]: """ given a collection of rules, collect the rules that are needed at the given scope. these rules are ordered topologically. @@ -1269,7 +1274,7 @@ class RuleSet: don't include auto-generated "subscope" rules. we want to include general "lib" rules here - even if they are not dependencies of other rules, see #398 """ - scope_rules = set([]) + scope_rules: Set[Rule] = set([]) # we need to process all rules, not just rules with the given scope. # this is because rules with a higher scope, e.g. file scope, may have subscope rules @@ -1283,7 +1288,7 @@ class RuleSet: return get_rules_with_scope(topologically_order_rules(list(scope_rules)), scope) @staticmethod - def _extract_subscope_rules(rules): + def _extract_subscope_rules(rules) -> List[Rule]: """ process the given sequence of rules. for each one, extract any embedded subscope rules into their own rule. diff --git a/rules b/rules index 2bc58afb..5ba70c97 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 2bc58afb5184a914ae13152df4ef09eb18ee3e79 +Subproject commit 5ba70c97d22dd59efcf29a128557e64213f7ace8 diff --git a/scripts/bulk-process.py b/scripts/bulk-process.py index 8ec23903..b57928c6 100644 --- a/scripts/bulk-process.py +++ b/scripts/bulk-process.py @@ -152,8 +152,7 @@ def main(argv=None): capa.main.handle_common_args(args) try: - rules = capa.main.get_rules(args.rules) - rules = capa.rules.RuleSet(rules) + rules = capa.rules.RuleSet(capa.main.get_rules(args.rules)) logger.info("successfully loaded %s rules", len(rules)) except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e: logger.error("%s", str(e)) diff --git a/scripts/capa2yara.py b/scripts/capa2yara.py index 06a1d031..9474347b 100644 --- a/scripts/capa2yara.py +++ b/scripts/capa2yara.py @@ -64,7 +64,6 @@ unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range"] # collect all converted rules to be able to check if we have needed sub rules for match: converted_rules = [] -count_incomplete = 0 default_tags = "CAPA " @@ -537,7 +536,8 @@ def output_unsupported_capa_rules(yaml, capa_rulename, url, reason): unsupported_capa_rules_names.write(url.encode("utf-8") + b"\n") -def convert_rules(rules, namespaces, cround): +def convert_rules(rules, namespaces, cround, make_priv): + count_incomplete = 0 for rule in rules.rules.values(): rule_name = convert_rule_name(rule.name) @@ -652,7 +652,6 @@ def convert_rules(rules, namespaces, cround): if meta_name and meta_value: yara_meta += "\t" + meta_name + ' = "' + meta_value + '"\n' - rule_name_bonus = "" if rule_comment: yara_meta += '\tcomment = "' + rule_comment + '"\n' yara_meta += '\tdate = "' + today + '"\n' @@ -679,12 +678,13 @@ def convert_rules(rules, namespaces, cround): # TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta (doing it for all of them using yara-ci upload at the moment) output_yar(yara) converted_rules.append(rule_name) - global count_incomplete count_incomplete += incomplete else: output_unsupported_capa_rules(rule.to_yaml(), rule.name, url, yara_condition) pass + return count_incomplete + def main(argv=None): if argv is None: @@ -696,7 +696,6 @@ def main(argv=None): capa.main.install_common_args(parser, wanted={"tag"}) args = parser.parse_args(args=argv) - global make_priv make_priv = args.private if args.verbose: @@ -710,9 +709,9 @@ def main(argv=None): logging.getLogger("capa2yara").setLevel(level) try: - rules = capa.main.get_rules([args.rules], disable_progress=True) - namespaces = capa.rules.index_rules_by_namespace(list(rules)) - rules = capa.rules.RuleSet(rules) + rules_ = capa.main.get_rules([args.rules], disable_progress=True) + namespaces = capa.rules.index_rules_by_namespace(rules_) + rules = capa.rules.RuleSet(rules_) logger.info("successfully loaded %s rules (including subscope rules which will be ignored)", len(rules)) if args.tag: rules = rules.filter_rules_by_meta(args.tag) @@ -745,14 +744,15 @@ def main(argv=None): # do several rounds of converting rules because some rules for match: might not be converted in the 1st run num_rules = 9999999 cround = 0 + count_incomplete = 0 while num_rules != len(converted_rules) or cround < min_rounds: cround += 1 logger.info("doing convert_rules(), round: " + str(cround)) num_rules = len(converted_rules) - convert_rules(rules, namespaces, cround) + count_incomplete += convert_rules(rules, namespaces, cround, make_priv) # one last round to collect all unconverted rules - convert_rules(rules, namespaces, 9000) + count_incomplete += convert_rules(rules, namespaces, 9000, make_priv) stats = "\n// converted rules : " + str(len(converted_rules)) stats += "\n// among those are incomplete : " + str(count_incomplete) diff --git a/scripts/capa_as_library.py b/scripts/capa_as_library.py index 682d4dc6..2db6a644 100644 --- a/scripts/capa_as_library.py +++ b/scripts/capa_as_library.py @@ -172,7 +172,7 @@ def capa_details(rules_path, file_path, output_format="dictionary"): meta["analysis"].update(counts) meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities) - capa_output = False + capa_output: Any = False if output_format == "dictionary": # ...as python dictionary, simplified as textable but in dictionary doc = rd.ResultDocument.from_capa(meta, rules, capabilities) diff --git a/scripts/detect-elf-os.py b/scripts/detect-elf-os.py index 63186ed8..078b80dd 100644 --- a/scripts/detect-elf-os.py +++ b/scripts/detect-elf-os.py @@ -28,7 +28,7 @@ def main(argv=None): if capa.helpers.is_runtime_ida(): from capa.ida.helpers import IDAIO - f: BinaryIO = IDAIO() + f: BinaryIO = IDAIO() # type: ignore else: if argv is None: diff --git a/scripts/lint.py b/scripts/lint.py index b3593f80..cd6e32cb 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -902,11 +902,15 @@ def redirecting_print_to_tqdm(): old_print(*args, **kwargs) try: - # Globaly replace print with new_print - inspect.builtins.print = new_print + # Globaly replace print with new_print. + # Verified this works manually on Python 3.11: + # >>> import inspect + # >>> inspect.builtins + # + inspect.builtins.print = new_print # type: ignore yield finally: - inspect.builtins.print = old_print + inspect.builtins.print = old_print # type: ignore def lint(ctx: Context): @@ -998,10 +1002,8 @@ def main(argv=None): time0 = time.time() try: - rules = capa.main.get_rules(args.rules, disable_progress=True) - rule_count = len(rules) - rules = capa.rules.RuleSet(rules) - logger.info("successfully loaded %s rules", rule_count) + rules = capa.rules.RuleSet(capa.main.get_rules(args.rules, disable_progress=True)) + logger.info("successfully loaded %s rules", len(rules)) if args.tag: rules = rules.filter_rules_by_meta(args.tag) logger.debug("selected %s rules", len(rules)) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 0c5ff361..d1773021 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -141,8 +141,7 @@ def main(argv=None): return -1 try: - rules = capa.main.get_rules(args.rules) - rules = capa.rules.RuleSet(rules) + rules = capa.rules.RuleSet(capa.main.get_rules(args.rules)) logger.info("successfully loaded %s rules", len(rules)) if args.tag: rules = rules.filter_rules_by_meta(args.tag) diff --git a/scripts/show-features.py b/scripts/show-features.py index 00c1eb05..d23a9a0a 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -136,7 +136,7 @@ def main(argv=None): for feature, addr in extractor.extract_file_features(): print("file: %s: %s" % (format_address(addr), feature)) - function_handles = extractor.get_functions() + function_handles = tuple(extractor.get_functions()) if args.function: if args.format == "freeze": @@ -173,7 +173,7 @@ def ida_main(): print("file: %s: %s" % (format_address(addr), feature)) return - function_handles = extractor.get_functions() + function_handles = tuple(extractor.get_functions()) if function: function_handles = tuple(filter(lambda fh: fh.inner.start_ea == function, function_handles)) diff --git a/tests/data b/tests/data index 0ffc189e..da6fed53 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 0ffc189eea6113d2dfc6355dacad8fbd78f9675d +Subproject commit da6fed53395be292ffec57a2732f0f6105c03487 diff --git a/tests/test_engine.py b/tests/test_engine.py index 26bb59ce..8fee9b92 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -8,58 +8,63 @@ from capa.engine import * from capa.features import * from capa.features.insn import * +import capa.features.address +ADDR1 = capa.features.address.AbsoluteVirtualAddress(0x401001) +ADDR2 = capa.features.address.AbsoluteVirtualAddress(0x401002) +ADDR3 = capa.features.address.AbsoluteVirtualAddress(0x401003) +ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004) def test_number(): - assert Number(1).evaluate({Number(0): {1}}) == False - assert Number(1).evaluate({Number(1): {1}}) == True - assert Number(1).evaluate({Number(2): {1, 2}}) == False + assert Number(1).evaluate({Number(0): {ADDR1}}) == False + assert Number(1).evaluate({Number(1): {ADDR1}}) == True + assert Number(1).evaluate({Number(2): {ADDR1, ADDR2}}) == False def test_and(): - assert And([Number(1)]).evaluate({Number(0): {1}}) == False - assert And([Number(1)]).evaluate({Number(1): {1}}) == True - assert And([Number(1), Number(2)]).evaluate({Number(0): {1}}) == False - assert And([Number(1), Number(2)]).evaluate({Number(1): {1}}) == False - assert And([Number(1), Number(2)]).evaluate({Number(2): {1}}) == False - assert And([Number(1), Number(2)]).evaluate({Number(1): {1}, Number(2): {2}}) == True + assert And([Number(1)]).evaluate({Number(0): {ADDR1}}) == False + assert And([Number(1)]).evaluate({Number(1): {ADDR1}}) == True + assert And([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}}) == False + assert And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) == False + assert And([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) == False + assert And([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}}) == True def test_or(): - assert Or([Number(1)]).evaluate({Number(0): {1}}) == False - assert Or([Number(1)]).evaluate({Number(1): {1}}) == True - assert Or([Number(1), Number(2)]).evaluate({Number(0): {1}}) == False - assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}) == True - assert Or([Number(1), Number(2)]).evaluate({Number(2): {1}}) == True - assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}, Number(2): {2}}) == True + assert Or([Number(1)]).evaluate({Number(0): {ADDR1}}) == False + assert Or([Number(1)]).evaluate({Number(1): {ADDR1}}) == True + assert Or([Number(1), Number(2)]).evaluate({Number(0): {ADDR1}}) == False + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) == True + assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) == True + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR2}}) == True def test_not(): - assert Not(Number(1)).evaluate({Number(0): {1}}) == True - assert Not(Number(1)).evaluate({Number(1): {1}}) == False + assert Not(Number(1)).evaluate({Number(0): {ADDR1}}) == True + assert Not(Number(1)).evaluate({Number(1): {ADDR1}}) == False def test_some(): - assert Some(0, [Number(1)]).evaluate({Number(0): {1}}) == True - assert Some(1, [Number(1)]).evaluate({Number(0): {1}}) == False + assert Some(0, [Number(1)]).evaluate({Number(0): {ADDR1}}) == True + assert Some(1, [Number(1)]).evaluate({Number(0): {ADDR1}}) == False - assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {1}}) == False - assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {1}, Number(1): {1}}) == False - assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}}) == True + assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}}) == False + assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) == False + assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}}) == True assert ( Some(2, [Number(1), Number(2), Number(3)]).evaluate( - {Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}} + {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}} ) == True ) assert ( Some(2, [Number(1), Number(2), Number(3)]).evaluate( { - Number(0): {1}, - Number(1): {1}, - Number(2): {1}, - Number(3): {1}, - Number(4): {1}, + Number(0): {ADDR1}, + Number(1): {ADDR1}, + Number(2): {ADDR1}, + Number(3): {ADDR1}, + Number(4): {ADDR1}, } ) == True @@ -69,10 +74,10 @@ def test_some(): def test_complex(): assert True == Or( [And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5), Number(6)])])] - ).evaluate({Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}}) + ).evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) assert False == Or([And([Number(1), Number(2)]), Or([Number(3), Some(2, [Number(4), Number(5)])])]).evaluate( - {Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}} + {Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}} ) @@ -83,54 +88,54 @@ def test_range(): # unbounded range with matching feature should always match assert Range(Number(1)).evaluate({Number(1): {}}) == True - assert Range(Number(1)).evaluate({Number(1): {0}}) == True + assert Range(Number(1)).evaluate({Number(1): {ADDR1}}) == True # unbounded max - assert Range(Number(1), min=1).evaluate({Number(1): {0}}) == True - assert Range(Number(1), min=2).evaluate({Number(1): {0}}) == False - assert Range(Number(1), min=2).evaluate({Number(1): {0, 1}}) == True + assert Range(Number(1), min=1).evaluate({Number(1): {ADDR1}}) == True + assert Range(Number(1), min=2).evaluate({Number(1): {ADDR1}}) == False + assert Range(Number(1), min=2).evaluate({Number(1): {ADDR1, ADDR2}}) == True # unbounded min - assert Range(Number(1), max=0).evaluate({Number(1): {0}}) == False - assert Range(Number(1), max=1).evaluate({Number(1): {0}}) == True - assert Range(Number(1), max=2).evaluate({Number(1): {0}}) == True - assert Range(Number(1), max=2).evaluate({Number(1): {0, 1}}) == True - assert Range(Number(1), max=2).evaluate({Number(1): {0, 1, 3}}) == False + assert Range(Number(1), max=0).evaluate({Number(1): {ADDR1}}) == False + assert Range(Number(1), max=1).evaluate({Number(1): {ADDR1}}) == True + assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1}}) == True + assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2}}) == True + assert Range(Number(1), max=2).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}}) == False # we can do an exact match by setting min==max assert Range(Number(1), min=1, max=1).evaluate({Number(1): {}}) == False - assert Range(Number(1), min=1, max=1).evaluate({Number(1): {1}}) == True - assert Range(Number(1), min=1, max=1).evaluate({Number(1): {1, 2}}) == False + assert Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1}}) == True + assert Range(Number(1), min=1, max=1).evaluate({Number(1): {ADDR1, ADDR2}}) == False # bounded range assert Range(Number(1), min=1, max=3).evaluate({Number(1): {}}) == False - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1}}) == True - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2}}) == True - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2, 3}}) == True - assert Range(Number(1), min=1, max=3).evaluate({Number(1): {1, 2, 3, 4}}) == False + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1}}) == True + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2}}) == True + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3}}) == True + assert Range(Number(1), min=1, max=3).evaluate({Number(1): {ADDR1, ADDR2, ADDR3, ADDR4}}) == False def test_short_circuit(): - assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}) == True + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) == True # with short circuiting, only the children up until the first satisfied child are captured. - assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {1}}, short_circuit=True).children) == 1 - assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {1}}, short_circuit=False).children) == 2 + assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}, short_circuit=True).children) == 1 + assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}, short_circuit=False).children) == 2 def test_eval_order(): # base cases. - assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}) == True - assert Or([Number(1), Number(2)]).evaluate({Number(2): {1}}) == True + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}) == True + assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}) == True # with short circuiting, only the children up until the first satisfied child are captured. - assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {1}}).children) == 1 - assert len(Or([Number(1), Number(2)]).evaluate({Number(2): {1}}).children) == 2 - assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {1}, Number(2): {1}}).children) == 1 + assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}).children) == 1 + assert len(Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}).children) == 2 + assert len(Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}, Number(2): {ADDR1}}).children) == 1 # and its guaranteed that children are evaluated in order. - assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}).children[0].statement == Number(1) - assert Or([Number(1), Number(2)]).evaluate({Number(1): {1}}).children[0].statement != Number(2) + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}).children[0].statement == Number(1) + assert Or([Number(1), Number(2)]).evaluate({Number(1): {ADDR1}}).children[0].statement != Number(2) - assert Or([Number(1), Number(2)]).evaluate({Number(2): {1}}).children[1].statement == Number(2) - assert Or([Number(1), Number(2)]).evaluate({Number(2): {1}}).children[1].statement != Number(1) + assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}).children[1].statement == Number(2) + assert Or([Number(1), Number(2)]).evaluate({Number(2): {ADDR1}}).children[1].statement != Number(1) diff --git a/tests/test_fmt.py b/tests/test_fmt.py index de96a1f4..1f37886c 100644 --- a/tests/test_fmt.py +++ b/tests/test_fmt.py @@ -98,7 +98,7 @@ def test_rule_reformat_order(): def test_rule_reformat_meta_update(): # test updating the rule content after parsing - rule = textwrap.dedent( + src = textwrap.dedent( """ rule: meta: @@ -116,7 +116,7 @@ def test_rule_reformat_meta_update(): """ ) - rule = capa.rules.Rule.from_yaml(rule) + rule = capa.rules.Rule.from_yaml(src) rule.name = "test rule" assert rule.to_yaml() == EXPECTED diff --git a/tests/test_match.py b/tests/test_match.py index 6fb319cd..2d8b9f2a 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -218,7 +218,7 @@ def test_match_matched_rules(): # the ordering of the rules must not matter, # the engine should match rules in an appropriate order. features, _ = match( - capa.rules.topologically_order_rules(reversed(rules)), + capa.rules.topologically_order_rules(list(reversed(rules))), {capa.features.insn.Number(100): {1}}, 0x0, ) diff --git a/tests/test_result_document.py b/tests/test_result_document.py index 8074e1cd..b98fadff 100644 --- a/tests/test_result_document.py +++ b/tests/test_result_document.py @@ -19,6 +19,7 @@ def test_optional_node_from_capa(): [], ) ) + assert isinstance(node, rdoc.StatementNode) assert isinstance(node.statement, rdoc.CompoundStatement) assert node.statement.type == rdoc.CompoundStatementType.OPTIONAL @@ -32,6 +33,7 @@ def test_some_node_from_capa(): ], ) ) + assert isinstance(node, rdoc.StatementNode) assert isinstance(node.statement, rdoc.SomeStatement) @@ -41,6 +43,7 @@ def test_range_node_from_capa(): capa.features.insn.Number(0), ) ) + assert isinstance(node, rdoc.StatementNode) assert isinstance(node.statement, rdoc.RangeStatement) @@ -51,6 +54,7 @@ def test_subscope_node_from_capa(): capa.features.insn.Number(0), ) ) + assert isinstance(node, rdoc.StatementNode) assert isinstance(node.statement, rdoc.SubscopeStatement) @@ -62,6 +66,7 @@ def test_and_node_from_capa(): ], ) ) + assert isinstance(node, rdoc.StatementNode) assert isinstance(node.statement, rdoc.CompoundStatement) assert node.statement.type == rdoc.CompoundStatementType.AND @@ -74,6 +79,7 @@ def test_or_node_from_capa(): ], ) ) + assert isinstance(node, rdoc.StatementNode) assert isinstance(node.statement, rdoc.CompoundStatement) assert node.statement.type == rdoc.CompoundStatementType.OR @@ -86,115 +92,138 @@ def test_not_node_from_capa(): ], ) ) + assert isinstance(node, rdoc.StatementNode) assert isinstance(node.statement, rdoc.CompoundStatement) assert node.statement.type == rdoc.CompoundStatementType.NOT def test_os_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.OS("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.OSFeature) def test_arch_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.Arch("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.ArchFeature) def test_format_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.Format("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.FormatFeature) def test_match_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.MatchedRule("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.MatchFeature) def test_characteristic_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.Characteristic("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.CharacteristicFeature) def test_substring_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.Substring("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.SubstringFeature) def test_regex_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.Regex("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.RegexFeature) def test_class_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.Class("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.ClassFeature) def test_namespace_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.Namespace("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.NamespaceFeature) def test_bytes_node_from_capa(): node = rdoc.node_from_capa(capa.features.common.Bytes(b"")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.BytesFeature) def test_export_node_from_capa(): node = rdoc.node_from_capa(capa.features.file.Export("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.ExportFeature) def test_import_node_from_capa(): node = rdoc.node_from_capa(capa.features.file.Import("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.ImportFeature) def test_section_node_from_capa(): node = rdoc.node_from_capa(capa.features.file.Section("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.SectionFeature) def test_function_name_node_from_capa(): node = rdoc.node_from_capa(capa.features.file.FunctionName("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.FunctionNameFeature) def test_api_node_from_capa(): node = rdoc.node_from_capa(capa.features.insn.API("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.APIFeature) def test_property_node_from_capa(): node = rdoc.node_from_capa(capa.features.insn.Property("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.PropertyFeature) def test_number_node_from_capa(): node = rdoc.node_from_capa(capa.features.insn.Number(0)) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.NumberFeature) def test_offset_node_from_capa(): node = rdoc.node_from_capa(capa.features.insn.Offset(0)) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.OffsetFeature) def test_mnemonic_node_from_capa(): node = rdoc.node_from_capa(capa.features.insn.Mnemonic("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.MnemonicFeature) def test_operand_number_node_from_capa(): node = rdoc.node_from_capa(capa.features.insn.OperandNumber(0, 0)) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.OperandNumberFeature) def test_operand_offset_node_from_capa(): node = rdoc.node_from_capa(capa.features.insn.OperandOffset(0, 0)) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.OperandOffsetFeature) def test_basic_block_node_from_capa(): node = rdoc.node_from_capa(capa.features.basicblock.BasicBlock("")) + assert isinstance(node, rdoc.FeatureNode) assert isinstance(node.feature, frzf.BasicBlockFeature) diff --git a/tests/test_rules.py b/tests/test_rules.py index 61bef111..d5aea406 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -13,8 +13,10 @@ import pytest import capa.rules import capa.engine import capa.features.common +from capa.features.address import AbsoluteVirtualAddress from capa.features.file import FunctionName from capa.features.insn import Number, Offset, Property +from capa.engine import Or from capa.features.common import ( OS, OS_LINUX, @@ -29,12 +31,19 @@ from capa.features.common import ( Substring, FeatureAccess, ) +import capa.features.address + + +ADDR1 = capa.features.address.AbsoluteVirtualAddress(0x401001) +ADDR2 = capa.features.address.AbsoluteVirtualAddress(0x401002) +ADDR3 = capa.features.address.AbsoluteVirtualAddress(0x401003) +ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004) def test_rule_ctor(): - r = capa.rules.Rule("test rule", capa.rules.FUNCTION_SCOPE, Number(1), {}) - assert r.evaluate({Number(0): {1}}) == False - assert r.evaluate({Number(1): {1}}) == True + r = capa.rules.Rule("test rule", capa.rules.FUNCTION_SCOPE, Or(Number(1)), {}) + assert r.evaluate({Number(0): {ADDR1}}) == False + assert r.evaluate({Number(1): {ADDR2}}) == True def test_rule_yaml(): @@ -56,10 +65,10 @@ def test_rule_yaml(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(0): {1}}) == False - assert r.evaluate({Number(0): {1}, Number(1): {1}}) == False - assert r.evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}}) == True - assert r.evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}, Number(3): {1}}) == True + assert r.evaluate({Number(0): {ADDR1}}) == False + assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) == False + assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}}) == True + assert r.evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}}) == True def test_rule_yaml_complex(): @@ -82,8 +91,8 @@ def test_rule_yaml_complex(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(5): {1}, Number(6): {1}, Number(7): {1}, Number(8): {1}}) == True - assert r.evaluate({Number(6): {1}, Number(7): {1}, Number(8): {1}}) == False + assert r.evaluate({Number(5): {ADDR1}, Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) == True + assert r.evaluate({Number(6): {ADDR1}, Number(7): {ADDR1}, Number(8): {ADDR1}}) == False def test_rule_descriptions(): @@ -160,8 +169,8 @@ def test_rule_yaml_not(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(1): {1}}) == True - assert r.evaluate({Number(1): {1}, Number(2): {1}}) == False + assert r.evaluate({Number(1): {ADDR1}}) == True + assert r.evaluate({Number(1): {ADDR1}, Number(2): {ADDR1}}) == False def test_rule_yaml_count(): @@ -175,9 +184,9 @@ def test_rule_yaml_count(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(100): {}}) == False - assert r.evaluate({Number(100): {1}}) == True - assert r.evaluate({Number(100): {1, 2}}) == False + assert r.evaluate({Number(100): set()}) == False + assert r.evaluate({Number(100): {ADDR1}}) == True + assert r.evaluate({Number(100): {ADDR1, ADDR2}}) == False def test_rule_yaml_count_range(): @@ -191,10 +200,10 @@ def test_rule_yaml_count_range(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(100): {}}) == False - assert r.evaluate({Number(100): {1}}) == True - assert r.evaluate({Number(100): {1, 2}}) == True - assert r.evaluate({Number(100): {1, 2, 3}}) == False + assert r.evaluate({Number(100): set()}) == False + assert r.evaluate({Number(100): {ADDR1}}) == True + assert r.evaluate({Number(100): {ADDR1, ADDR2}}) == True + assert r.evaluate({Number(100): {ADDR1, ADDR2, ADDR3}}) == False def test_rule_yaml_count_string(): @@ -208,10 +217,10 @@ def test_rule_yaml_count_string(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({String("foo"): {}}) == False - assert r.evaluate({String("foo"): {1}}) == False - assert r.evaluate({String("foo"): {1, 2}}) == True - assert r.evaluate({String("foo"): {1, 2, 3}}) == False + assert r.evaluate({String("foo"): set()}) == False + assert r.evaluate({String("foo"): {ADDR1}}) == False + assert r.evaluate({String("foo"): {ADDR1, ADDR2}}) == True + assert r.evaluate({String("foo"): {ADDR1, ADDR2, ADDR3}}) == False def test_invalid_rule_feature(): @@ -481,11 +490,11 @@ def test_count_number_symbol(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Number(2): {}}) == False - assert r.evaluate({Number(2): {1}}) == True - assert r.evaluate({Number(2): {1, 2}}) == False - assert r.evaluate({Number(0x100, description="symbol name"): {1}}) == False - assert r.evaluate({Number(0x100, description="symbol name"): {1, 2, 3}}) == True + assert r.evaluate({Number(2): set()}) == False + assert r.evaluate({Number(2): {ADDR1}}) == True + assert r.evaluate({Number(2): {ADDR1, ADDR2}}) == False + assert r.evaluate({Number(0x100, description="symbol name"): {ADDR1}}) == False + assert r.evaluate({Number(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) == True def test_invalid_number(): @@ -567,11 +576,11 @@ def test_count_offset_symbol(): """ ) r = capa.rules.Rule.from_yaml(rule) - assert r.evaluate({Offset(2): {}}) == False - assert r.evaluate({Offset(2): {1}}) == True - assert r.evaluate({Offset(2): {1, 2}}) == False - assert r.evaluate({Offset(0x100, description="symbol name"): {1}}) == False - assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True + assert r.evaluate({Offset(2): set()}) == False + assert r.evaluate({Offset(2): {ADDR1}}) == True + assert r.evaluate({Offset(2): {ADDR1, ADDR2}}) == False + assert r.evaluate({Offset(0x100, description="symbol name"): {ADDR1}}) == False + assert r.evaluate({Offset(0x100, description="symbol name"): {ADDR1, ADDR2, ADDR3}}) == True def test_invalid_offset(): @@ -966,10 +975,10 @@ def test_property_access(): """ ) ) - assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ): {1}}) == True + assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.READ): {ADDR1}}) == True - assert r.evaluate({Property("System.IO.FileInfo::Length"): {1}}) == False - assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.WRITE): {1}}) == False + assert r.evaluate({Property("System.IO.FileInfo::Length"): {ADDR1}}) == False + assert r.evaluate({Property("System.IO.FileInfo::Length", access=FeatureAccess.WRITE): {ADDR1}}) == False def test_property_access_symbol(): @@ -986,7 +995,7 @@ def test_property_access_symbol(): ) assert ( r.evaluate( - {Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {1}} + {Property("System.IO.FileInfo::Length", access=FeatureAccess.READ, description="some property"): {ADDR1}} ) == True ) From 81500a4d1dae34de61791c2c79729b152a543c28 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 14 Dec 2022 10:48:00 +0100 Subject: [PATCH 44/74] black --- capa/engine.py | 2 +- capa/features/extractors/dnfile_.py | 2 +- capa/features/extractors/null.py | 7 +------ capa/ida/plugin/form.py | 4 ++-- capa/ida/plugin/view.py | 2 +- tests/test_engine.py | 10 ++++++++-- tests/test_rules.py | 7 +++---- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/capa/engine.py b/capa/engine.py index b5fbb412..dde1e7c8 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -8,7 +8,7 @@ import copy import collections -from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Mapping, Iterable, Iterator, Union, cast +from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Union, Mapping, Iterable, Iterator, cast import capa.perf import capa.features.common diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py index cf82bbce..03859db7 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dnfile_.py @@ -109,7 +109,7 @@ class DnfileFeatureExtractor(FeatureExtractor): assert self.pe is not None assert self.pe.net is not None assert self.pe.net.struct is not None - + return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion def get_meta_version_string(self) -> str: diff --git a/capa/features/extractors/null.py b/capa/features/extractors/null.py index f8d6d077..892eadc8 100644 --- a/capa/features/extractors/null.py +++ b/capa/features/extractors/null.py @@ -68,10 +68,5 @@ class NullFeatureExtractor(FeatureExtractor): yield InsnHandle(address, None) def extract_insn_features(self, f, bb, insn): - for address, feature in ( - self.functions[f.address] - .basic_blocks[bb.address] - .instructions[insn.address] - .features - ): + for address, feature in self.functions[f.address].basic_blocks[bb.address].instructions[insn.address].features: yield feature, address diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index e6deecb7..eb205ae6 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -11,7 +11,7 @@ import copy import logging import itertools import collections -from typing import Set, Dict, Optional, List, Any +from typing import Any, Set, Dict, List, Optional import idaapi import ida_kernwin @@ -1108,7 +1108,7 @@ class CapaExplorerForm(idaapi.PluginForm): _, file_matches = capa.engine.match( capa.rules.RuleSet(list(capa.rules.get_rules_and_dependencies(rules, rule.name))).file_rules, file_features, - NO_ADDRESS + NO_ADDRESS, ) except Exception as e: self.set_rulegen_status("Failed to match rule (%s)" % e) diff --git a/capa/ida/plugin/view.py b/capa/ida/plugin/view.py index 75abf59c..0f577c7d 100644 --- a/capa/ida/plugin/view.py +++ b/capa/ida/plugin/view.py @@ -18,7 +18,7 @@ import capa.ida.helpers import capa.features.common import capa.features.basicblock from capa.ida.plugin.item import CapaExplorerFunctionItem -from capa.features.address import _NoAddress, AbsoluteVirtualAddress +from capa.features.address import AbsoluteVirtualAddress, _NoAddress from capa.ida.plugin.model import CapaExplorerDataModel MAX_SECTION_SIZE = 750 diff --git a/tests/test_engine.py b/tests/test_engine.py index 8fee9b92..89c3b739 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -5,16 +5,17 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. +import capa.features.address from capa.engine import * from capa.features import * from capa.features.insn import * -import capa.features.address ADDR1 = capa.features.address.AbsoluteVirtualAddress(0x401001) ADDR2 = capa.features.address.AbsoluteVirtualAddress(0x401002) ADDR3 = capa.features.address.AbsoluteVirtualAddress(0x401003) ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004) + def test_number(): assert Number(1).evaluate({Number(0): {ADDR1}}) == False assert Number(1).evaluate({Number(1): {ADDR1}}) == True @@ -50,7 +51,12 @@ def test_some(): assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}}) == False assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}}) == False - assert Some(2, [Number(1), Number(2), Number(3)]).evaluate({Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}}) == True + assert ( + Some(2, [Number(1), Number(2), Number(3)]).evaluate( + {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}} + ) + == True + ) assert ( Some(2, [Number(1), Number(2), Number(3)]).evaluate( {Number(0): {ADDR1}, Number(1): {ADDR1}, Number(2): {ADDR1}, Number(3): {ADDR1}} diff --git a/tests/test_rules.py b/tests/test_rules.py index d5aea406..43d7002d 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -13,10 +13,10 @@ import pytest import capa.rules import capa.engine import capa.features.common -from capa.features.address import AbsoluteVirtualAddress +import capa.features.address +from capa.engine import Or from capa.features.file import FunctionName from capa.features.insn import Number, Offset, Property -from capa.engine import Or from capa.features.common import ( OS, OS_LINUX, @@ -31,8 +31,7 @@ from capa.features.common import ( Substring, FeatureAccess, ) -import capa.features.address - +from capa.features.address import AbsoluteVirtualAddress ADDR1 = capa.features.address.AbsoluteVirtualAddress(0x401001) ADDR2 = capa.features.address.AbsoluteVirtualAddress(0x401002) From 9ae908c741c429cdaba5abd6e714da57a1d80117 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 14 Dec 2022 10:57:27 +0100 Subject: [PATCH 45/74] elf: better format attribution declarations --- capa/features/extractors/elf.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 4fed06dc..974b2c8d 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -96,14 +96,14 @@ class ELF: self.f = f # these will all be initialized in `_parse()` - self.bitness: int = 0 - self.endian: str = "" - self.e_phentsize: int = 0 - self.e_phnum: int = 0 - self.e_shentsize: int = 0 - self.e_shnum: int = 0 - self.phbuf: bytes = b"" - self.shbuf: bytes = b"" + self.bitness: int + self.endian: str + self.e_phentsize: int + self.e_phnum: int + self.e_shentsize: int + self.e_shnum: int + self.phbuf: bytes + self.shbuf: bytes self._parse() @@ -518,9 +518,9 @@ class PHNote: self.buf = buf # these will be initialized in `_parse()` - self.type_: int = 0 - self.descsz: int = 0 - self.name: str = "" + self.type_: int + self.descsz: int + self.name: str self._parse() @@ -567,9 +567,9 @@ class SHNote: self.buf = buf # these will be initialized in `_parse()` - self.type_: int = 0 - self.descsz: int = 0 - self.name: str = "" + self.type_: int + self.descsz: int + self.name: str self._parse() From 56d075fd323561a187d5dc7a8d5becf14d410c62 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 14 Dec 2022 11:08:46 +0100 Subject: [PATCH 46/74] typing --- capa/features/common.py | 5 +++-- capa/perf.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/capa/features/common.py b/capa/features/common.py index dca0d03f..0192a945 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -9,6 +9,7 @@ import re import abc import codecs +import typing import logging import collections from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional @@ -200,7 +201,7 @@ class Substring(String): # mapping from string value to list of locations. # will unique the locations later on. - matches: collections.defaultdict[str, Set[Address]] = collections.defaultdict(set) + matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set) assert isinstance(self.value, str) for feature, locations in ctx.items(): @@ -291,7 +292,7 @@ class Regex(String): # mapping from string value to list of locations. # will unique the locations later on. - matches: collections.defaultdict[str, Set[Address]] = collections.defaultdict(set) + matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set) for feature, locations in ctx.items(): if not isinstance(feature, (String,)): diff --git a/capa/perf.py b/capa/perf.py index 1d98f6c2..54575e99 100644 --- a/capa/perf.py +++ b/capa/perf.py @@ -1,8 +1,8 @@ +import typing import collections -from typing import Dict # this structure is unstable and may change before the next major release. -counters: collections.Counter[str] = collections.Counter() +counters: typing.Counter[str] = collections.Counter() def reset(): From 501227f23fe17761df61444d58d3dc4dcd6a85ce Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 14 Dec 2022 11:14:01 +0100 Subject: [PATCH 47/74] elf: fix missing attribute --- capa/features/extractors/elf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 974b2c8d..6c88d46e 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -531,8 +531,8 @@ class PHNote: logger.debug("ph:namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, self.descsz, self.type_) - name = self.buf[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") - logger.debug("name: %s", name) + self.name = self.buf[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii") + logger.debug("name: %s", self.name) @property def abi_tag(self) -> Optional[ABITag]: From 613c18542848ec531f209b53719522328527bd89 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 14 Dec 2022 11:51:25 +0100 Subject: [PATCH 48/74] tests: fix broken test --- tests/test_rules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_rules.py b/tests/test_rules.py index 43d7002d..466ac306 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -40,7 +40,7 @@ ADDR4 = capa.features.address.AbsoluteVirtualAddress(0x401004) def test_rule_ctor(): - r = capa.rules.Rule("test rule", capa.rules.FUNCTION_SCOPE, Or(Number(1)), {}) + r = capa.rules.Rule("test rule", capa.rules.FUNCTION_SCOPE, Or([Number(1)]), {}) assert r.evaluate({Number(0): {ADDR1}}) == False assert r.evaluate({Number(1): {ADDR2}}) == True From aee0ec8016fd23850e3fe3ce777b84239ca6c616 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 14 Dec 2022 21:22:52 +0100 Subject: [PATCH 49/74] features: cleanup mypy checking --- capa/features/common.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/capa/features/common.py b/capa/features/common.py index 0192a945..5d30f10b 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -233,9 +233,8 @@ class Substring(String): return Result(False, _MatchedSubstring(self, {}), []) def __str__(self): - v = self.value - assert isinstance(v, str) - return "substring(%s)" % v + assert isinstance(self.value, str) + return "substring(%s)" % self.value class _MatchedSubstring(Substring): From 505910edb5a46215fd936d844adfd3fa74761a3b Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Wed, 14 Dec 2022 21:28:32 +0100 Subject: [PATCH 50/74] dotnet: remove duplicative validate_has_dotnet helper --- capa/features/extractors/dnfile/helpers.py | 13 ------------- capa/features/extractors/dnfile_.py | 11 ----------- capa/features/extractors/dotnetfile.py | 11 ----------- 3 files changed, 35 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 2c489c22..37c8e42b 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -107,15 +107,8 @@ class DnUnmanagedMethod: return f"{module}.{method}" -def validate_has_dotnet(pe: dnfile.dnPE): - assert pe.net is not None - assert pe.net.mdtables is not None - assert pe.net.Flags is not None - - def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: """map generic token to string or table row""" - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.mdtables is not None @@ -153,7 +146,6 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) - def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: """read user string from #US stream""" - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.user_strings is not None @@ -183,7 +175,6 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: TypeName (index into String heap) TypeNamespace (index into String heap) """ - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.mdtables is not None assert pe.net.mdtables.MemberRef is not None @@ -277,7 +268,6 @@ def get_dotnet_properties(pe: dnfile.dnPE) -> Iterator[DnType]: def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: """get managed methods from MethodDef table""" - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.mdtables is not None assert pe.net.mdtables.MethodDef is not None @@ -331,7 +321,6 @@ def calculate_dotnet_token_value(table: int, rid: int) -> int: def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool: - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.mdtables is not None @@ -339,7 +328,6 @@ def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool: def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.Flags is not None @@ -347,7 +335,6 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: def iter_dotnet_table(pe: dnfile.dnPE, name: str) -> Iterator[Any]: - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.mdtables is not None diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py index 03859db7..7286001b 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dnfile_.py @@ -19,16 +19,9 @@ def extract_file_os(**kwargs) -> Iterator[Tuple[Feature, Address]]: yield OS(OS_ANY), NO_ADDRESS -def validate_has_dotnet(pe: dnfile.dnPE): - assert pe.net is not None - assert pe.net.mdtables is not None - assert pe.net.Flags is not None - - def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Feature, Address]]: # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 # .NET 4.5 added option: any CPU, 32-bit preferred - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.Flags is not None @@ -81,7 +74,6 @@ class DnfileFeatureExtractor(FeatureExtractor): # self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT # True: native EP: Token # False: managed EP: RVA - validate_has_dotnet(self.pe) assert self.pe.net is not None assert self.pe.net.struct is not None @@ -97,7 +89,6 @@ class DnfileFeatureExtractor(FeatureExtractor): return bool(self.pe.net) def is_mixed_mode(self) -> bool: - validate_has_dotnet(self.pe) assert self.pe is not None assert self.pe.net is not None assert self.pe.net.Flags is not None @@ -105,7 +96,6 @@ class DnfileFeatureExtractor(FeatureExtractor): return not bool(self.pe.net.Flags.CLR_ILONLY) def get_runtime_version(self) -> Tuple[int, int]: - validate_has_dotnet(self.pe) assert self.pe is not None assert self.pe.net is not None assert self.pe.net.struct is not None @@ -113,7 +103,6 @@ class DnfileFeatureExtractor(FeatureExtractor): return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion def get_meta_version_string(self) -> str: - validate_has_dotnet(self.pe) assert self.pe.net is not None assert self.pe.net.metadata is not None assert self.pe.net.metadata.struct is not None diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index e7bb67fc..08dfd45c 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -40,12 +40,6 @@ def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]: yield Format(FORMAT_DOTNET), NO_ADDRESS -def validate_has_dotnet(pe: dnfile.dnPE): - assert pe.net is not None - assert pe.net.mdtables is not None - assert pe.net.Flags is not None - - def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]: for method in get_dotnet_managed_imports(pe): # like System.IO.File::OpenRead @@ -84,7 +78,6 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.mdtables is not None assert pe.net.mdtables.TypeDef is not None @@ -106,7 +99,6 @@ def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]: def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]: # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 # .NET 4.5 added option: any CPU, 32-bit preferred - validate_has_dotnet(pe) assert pe.net is not None assert pe.net.Flags is not None @@ -171,7 +163,6 @@ class DotnetFileFeatureExtractor(FeatureExtractor): # self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT # True: native EP: Token # False: managed EP: RVA - validate_has_dotnet(self.pe) assert self.pe.net is not None assert self.pe.net.struct is not None @@ -190,7 +181,6 @@ class DotnetFileFeatureExtractor(FeatureExtractor): return is_dotnet_mixed_mode(self.pe) def get_runtime_version(self) -> Tuple[int, int]: - validate_has_dotnet(self.pe) assert self.pe.net is not None assert self.pe.net.struct is not None assert self.pe.net.struct.MajorRuntimeVersion is not None @@ -199,7 +189,6 @@ class DotnetFileFeatureExtractor(FeatureExtractor): return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion def get_meta_version_string(self) -> str: - validate_has_dotnet(self.pe) assert self.pe.net is not None assert self.pe.net.metadata is not None assert self.pe.net.metadata.struct is not None From 3af7fe0b084f4f62c0a4362a69be1368fbe783cd Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 15 Dec 2022 12:55:57 -0700 Subject: [PATCH 51/74] dotnet: address unhandled exceptions through improved type checking (#1230) * dotnet: bump dncil version * dotnet: check #US stream valid before access * dotnet: use assert statements to guard types --- CHANGELOG.md | 1 + capa/features/extractors/dnfile/helpers.py | 173 +++++++++++++-------- capa/features/extractors/dotnetfile.py | 35 +++-- setup.py | 2 +- 4 files changed, 134 insertions(+), 77 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bb0ee71..27ce7c67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ - do not overwrite version in version.py during PyInstaller build #1169 @mr-tz - render: fix vverbose rendering of offsets #1215 @williballenthin - elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin +- dotnet: address unhandled exceptions with improved type checking #1230 @mike-hunhoff ### capa explorer IDA Pro plugin - fix: display instruction items #1154 @mr-tz diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 37c8e42b..27b0c91a 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -10,7 +10,7 @@ from __future__ import annotations import logging from enum import Enum -from typing import Any, Tuple, Iterator, Optional +from typing import Any, Tuple, Union, Iterator, Optional import dnfile from dncil.cil.body import CilMethodBody @@ -140,19 +140,23 @@ def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) - try: return CilMethodBody(DnfileMethodBodyReader(pe, row)) except MethodBodyFormatError as e: - logger.warning("failed to parse managed method body @ 0x%08x (%s)" % (row.Rva, e)) + logger.debug("failed to parse managed method body @ 0x%08x (%s)", row.Rva, e) return None def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: """read user string from #US stream""" assert pe.net is not None - assert pe.net.user_strings is not None + + if pe.net.user_strings is None: + # stream may not exist (seen in obfuscated .NET) + logger.debug("#US stream does not exist for stream index 0x%08x", token.rid) + return None try: user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) except UnicodeDecodeError as e: - logger.warning("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e)) + logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e) return None if user_string is None: @@ -175,15 +179,17 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: TypeName (index into String heap) TypeNamespace (index into String heap) """ - assert pe.net is not None - assert pe.net.mdtables is not None - assert pe.net.mdtables.MemberRef is not None + for (rid, member_ref) in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number): + assert isinstance(member_ref, dnfile.mdtable.MemberRefRow) - for (rid, row) in enumerate(iter_dotnet_table(pe, "MemberRef")): - if not isinstance(row.Class.row, dnfile.mdtable.TypeRefRow): + if not isinstance(member_ref.Class.row, dnfile.mdtable.TypeRefRow): + # only process class imports from TypeRef table continue - token: int = calculate_dotnet_token_value(pe.net.mdtables.MemberRef.number, rid + 1) - yield DnType(token, row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace, member=row.Name) + + token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid) + yield DnType( + token, member_ref.Class.row.TypeName, namespace=member_ref.Class.row.TypeNamespace, member=member_ref.Name + ) def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: @@ -197,22 +203,47 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) """ - for row in iter_dotnet_table(pe, "TypeDef"): - for index in row.MethodList: - token = calculate_dotnet_token_value(index.table.number, index.row_index) - yield DnType(token, row.TypeName, namespace=row.TypeNamespace, member=index.row.Name) + for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + + for (idx, method) in enumerate(typedef.MethodList): + if method.table is None: + logger.debug("TypeDef[0x%X] MethodList[0x%X] table is None", rid, idx) + continue + if method.row is None: + logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx) + continue + token = calculate_dotnet_token_value(method.table.number, method.row_index) + yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method.row.Name) def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: - """get fields from TypeDef table""" - for row in iter_dotnet_table(pe, "TypeDef"): - for index in row.FieldList: - token = calculate_dotnet_token_value(index.table.number, index.row_index) - yield DnType(token, row.TypeName, namespace=row.TypeNamespace, member=index.row.Name) + """get fields from TypeDef table + + see https://www.ntcore.com/files/dotnetformat.htm + + 02 - TypeDef Table + Each row represents a class in the current assembly. + TypeName (index into String heap) + TypeNamespace (index into String heap) + FieldList (index into Field table; it marks the first of a continguous run of Fields owned by this Type) + """ + for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + + for (idx, field) in enumerate(typedef.FieldList): + if field.table is None: + logger.debug("TypeDef[0x%X] FieldList[0x%X] table is None", rid, idx) + continue + if field.row is None: + logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx) + continue + token: int = calculate_dotnet_token_value(field.table.number, field.row_index) + yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name) def get_dotnet_property_map( - pe: dnfile.dnPE, property_row: dnfile.mdtable.PropertyRow + pe: dnfile.dnPE, mapped_property: Union[dnfile.mdtable.PropertyRow, dnfile.mdtable.EventRow] ) -> Optional[dnfile.mdtable.TypeDefRow]: """get property map from PropertyMap table @@ -225,10 +256,15 @@ def get_dotnet_property_map( the last row of the Property table the next run of Properties, found by inspecting the PropertyList of the next row in this PropertyMap table """ - for row in iter_dotnet_table(pe, "PropertyMap"): - for index in row.PropertyList: - if index.row.Name == property_row.Name: - return row.Parent.row + for (rid, property_map) in iter_dotnet_table(pe, dnfile.mdtable.PropertyMap.number): + assert isinstance(property_map, dnfile.mdtable.PropertyMapRow) + + for (idx, property_) in enumerate(property_map.PropertyList): + if property_.row is None: + logger.debug("PropertyMap[0x%X] PropertyList[0x%x] row is None", rid, idx) + continue + if property_.row.Name == mapped_property.Name: + return property_map.Parent.row return None @@ -243,48 +279,57 @@ def get_dotnet_properties(pe: dnfile.dnPE) -> Iterator[DnType]: Method (index into the MethodDef table) Association (index into the Event or Property table; more precisely, a HasSemantics coded index) """ - for row in iter_dotnet_table(pe, "MethodSemantics"): - typedef_row = get_dotnet_property_map(pe, row.Association.row) - if typedef_row is None: + for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number): + assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow) + + if method_semantics.Association.row is None: + logger.debug("MethodSemantics[0x%X] Association row is None", rid) + continue + if method_semantics.Method.table is None: + logger.debug("MethodSemantics[0x%X] Method table is None", rid) continue - token = calculate_dotnet_token_value(row.Method.table.number, row.Method.row_index) + typedef: Optional[dnfile.mdtable.TypeDefRow] = get_dotnet_property_map(pe, method_semantics.Association.row) + if typedef is None: + logger.debug("MethodSemantics[0x%X] TypeDef is None", rid) + continue - if row.Semantics.msSetter: + token: int = calculate_dotnet_token_value( + method_semantics.Method.table.number, method_semantics.Method.row_index + ) + + access: Optional[str] + if method_semantics.Semantics.msSetter: access = FeatureAccess.WRITE - elif row.Semantics.msGetter: + elif method_semantics.Semantics.msGetter: access = FeatureAccess.READ else: access = None yield DnType( token, - typedef_row.TypeName, + typedef.TypeName, access=access, - namespace=typedef_row.TypeNamespace, - member=row.Association.row.Name, + namespace=typedef.TypeNamespace, + member=method_semantics.Association.row.Name, ) def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: """get managed methods from MethodDef table""" - assert pe.net is not None - assert pe.net.mdtables is not None - assert pe.net.mdtables.MethodDef is not None + for (rid, method_def) in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number): + assert isinstance(method_def, dnfile.mdtable.MethodDefRow) - if not hasattr(pe.net.mdtables, "MethodDef"): - return - - for (rid, row) in enumerate(pe.net.mdtables.MethodDef): - if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)): + if not method_def.ImplFlags.miIL or any((method_def.Flags.mdAbstract, method_def.Flags.mdPinvokeImpl)): # skip methods that do not have a method body continue - body: Optional[CilMethodBody] = read_dotnet_method_body(pe, row) + body: Optional[CilMethodBody] = read_dotnet_method_body(pe, method_def) if body is None: + logger.debug("MethodDef[0x%X] method body is None", rid) continue - token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MethodDef.value, rid + 1) + token: int = calculate_dotnet_token_value(dnfile.mdtable.MethodDef.number, rid) yield token, body @@ -299,14 +344,29 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod] ImportName (index into the String heap) ImportScope (index into the ModuleRef table) """ - for row in iter_dotnet_table(pe, "ImplMap"): - module: str = row.ImportScope.row.Name - method: str = row.ImportName + for (rid, impl_map) in iter_dotnet_table(pe, dnfile.mdtable.ImplMap.number): + assert isinstance(impl_map, dnfile.mdtable.ImplMapRow) + + module: str + if impl_map.ImportScope.row is None: + logger.debug("ImplMap[0x%X] ImportScope row is None", rid) + module = "" + else: + module = impl_map.ImportScope.row.Name + method: str = impl_map.ImportName + + member_forward_table: int + if impl_map.MemberForwarded.table is None: + logger.debug("ImplMap[0x%X] MemberForwarded table is None", rid) + continue + else: + member_forward_table = impl_map.MemberForwarded.table.number + member_forward_row: int = impl_map.MemberForwarded.row_index # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded # MethodDef table token to help us later record native import method calls made from CIL - token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index) + token: int = calculate_dotnet_token_value(member_forward_table, member_forward_row) # like Kernel32.dll if module and "." in module: @@ -320,13 +380,6 @@ def calculate_dotnet_token_value(table: int, rid: int) -> int: return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) -def is_dotnet_table_valid(pe: dnfile.dnPE, table_name: str) -> bool: - assert pe.net is not None - assert pe.net.mdtables is not None - - return bool(getattr(pe.net.mdtables, table_name, None)) - - def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: assert pe.net is not None assert pe.net.Flags is not None @@ -334,12 +387,10 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: return not bool(pe.net.Flags.CLR_ILONLY) -def iter_dotnet_table(pe: dnfile.dnPE, name: str) -> Iterator[Any]: +def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]: assert pe.net is not None assert pe.net.mdtables is not None - if not is_dotnet_table_valid(pe, name): - return - - for row in getattr(pe.net.mdtables, name): - yield row + for (rid, row) in enumerate(pe.net.mdtables.tables.get(table_index, [])): + # .NET tables are 1-indexed + yield rid + 1, row diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 08dfd45c..1b5aa1f3 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -1,5 +1,5 @@ import logging -from typing import Tuple, Iterator +from typing import Tuple, Iterator, cast import dnfile import pefile @@ -62,11 +62,15 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple # namespaces may be referenced multiple times, so we need to filter namespaces = set() - for row in iter_dotnet_table(pe, "TypeDef"): - namespaces.add(row.TypeNamespace) + for (_, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + # emit internal .NET namespaces + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + namespaces.add(typedef.TypeNamespace) - for row in iter_dotnet_table(pe, "TypeRef"): - namespaces.add(row.TypeNamespace) + for (_, typeref) in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): + # emit external .NET namespaces + assert isinstance(typeref, dnfile.mdtable.TypeRefRow) + namespaces.add(typeref.TypeNamespace) # namespaces may be empty, discard namespaces.discard("") @@ -78,18 +82,19 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" - assert pe.net is not None - assert pe.net.mdtables is not None - assert pe.net.mdtables.TypeDef is not None - assert pe.net.mdtables.TypeRef is not None + for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + # emit internal .NET classes + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeDef")): - token = calculate_dotnet_token_value(pe.net.mdtables.TypeDef.number, rid + 1) - yield Class(DnType.format_name(row.TypeName, namespace=row.TypeNamespace)), DNTokenAddress(token) + token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) + yield Class(DnType.format_name(typedef.TypeName, namespace=typedef.TypeNamespace)), DNTokenAddress(token) - for (rid, row) in enumerate(iter_dotnet_table(pe, "TypeRef")): - token = calculate_dotnet_token_value(pe.net.mdtables.TypeRef.number, rid + 1) - yield Class(DnType.format_name(row.TypeName, namespace=row.TypeNamespace)), DNTokenAddress(token) + for (rid, typeref) in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): + # emit external .NET classes + assert isinstance(typeref, dnfile.mdtable.TypeRefRow) + + token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) + yield Class(DnType.format_name(typeref.TypeName, namespace=typeref.TypeNamespace)), DNTokenAddress(token) def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]: diff --git a/setup.py b/setup.py index b41c395f..95890d03 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ requirements = [ "pefile==2022.5.30", "pyelftools==0.29", "dnfile==0.12.0", - "dncil==1.0.1", + "dncil==1.0.2", "pydantic==1.10.2", ] From b12d526a600c1b0b6b6096b7f45c3aedd09113e0 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 19 Dec 2022 11:12:42 +0100 Subject: [PATCH 52/74] tests: use python 3.11 (#1191) --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ed635959..81d8f504 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -67,7 +67,7 @@ jobs: matrix: os: [ubuntu-20.04, windows-2019, macos-11] # across all operating systems - python-version: ["3.7", "3.10"] + python-version: ["3.7", "3.11"] include: # on Ubuntu run these as well - os: ubuntu-20.04 From 88cffee9028618ed36914156a181efc4e30493fb Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 19 Dec 2022 12:34:18 +0100 Subject: [PATCH 53/74] ci: bump action versions (#1233) * ci: bump action versions Co-authored-by: Moritz --- .github/workflows/build.yml | 10 +++++----- .github/workflows/changelog.yml | 2 +- .github/workflows/publish.yml | 2 +- .github/workflows/tag.yml | 2 +- .github/workflows/tests.yml | 12 ++++++------ 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index df80d708..e793376c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,12 +28,12 @@ jobs: asset_name: macos steps: - name: Checkout capa - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: submodules: true # using Python 3.8 to support running across multiple operating systems including Windows 7 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: 3.8 - if: matrix.os == 'ubuntu-18.04' @@ -50,7 +50,7 @@ jobs: run: dist/capa "tests/data/499c2a85f6e8142c3f48d4251c9c7cd6.raw32" - name: Does it run (ELF)? run: dist/capa "tests/data/7351f8a40c5450557b24622417fc478d.elf_" - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: ${{ matrix.asset_name }} path: dist/${{ matrix.artifact_name }} @@ -74,7 +74,7 @@ jobs: asset_name: windows steps: - name: Download ${{ matrix.asset_name }} - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: ${{ matrix.asset_name }} - name: Set executable flag @@ -100,7 +100,7 @@ jobs: artifact_name: capa steps: - name: Download ${{ matrix.asset_name }} - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: ${{ matrix.asset_name }} - name: Set executable flag diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml index c028ce82..b68845f7 100644 --- a/.github/workflows/changelog.yml +++ b/.github/workflows/changelog.yml @@ -17,7 +17,7 @@ jobs: steps: - name: Get changed files id: files - uses: Ana06/get-changed-files@v1.2 + uses: Ana06/get-changed-files@v2.2.0 - name: check changelog updated id: changelog_updated env: diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 42525df9..338fc0a6 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -11,7 +11,7 @@ jobs: deploy: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v2 with: diff --git a/.github/workflows/tag.yml b/.github/workflows/tag.yml index bed2512d..744ea207 100644 --- a/.github/workflows/tag.yml +++ b/.github/workflows/tag.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout capa-rules - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: repository: mandiant/capa-rules token: ${{ secrets.CAPA_TOKEN }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 81d8f504..4c25a31c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout capa - uses: actions/checkout@v2 + uses: actions/checkout@v3 # The sync GH action in capa-rules relies on a single '- *$' in the CHANGELOG file - name: Ensure CHANGELOG has '- *$' run: | @@ -26,9 +26,9 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout capa - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: "3.8" - name: Install dependencies @@ -46,11 +46,11 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout capa with submodules - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: submodules: recursive - name: Set up Python 3.8 - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: "3.8" - name: Install capa @@ -76,7 +76,7 @@ jobs: python-version: "3.9" steps: - name: Checkout capa with submodules - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: submodules: recursive - name: Set up Python ${{ matrix.python-version }} From 6dcd115765aad0e4710d103839a95d42445c4b95 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Dec 2022 14:02:58 +0000 Subject: [PATCH 54/74] build(deps-dev): bump isort from 5.10.1 to 5.11.3 Bumps [isort](https://github.com/pycqa/isort) from 5.10.1 to 5.11.3. - [Release notes](https://github.com/pycqa/isort/releases) - [Changelog](https://github.com/PyCQA/isort/blob/main/CHANGELOG.md) - [Commits](https://github.com/pycqa/isort/compare/5.10.1...5.11.3) --- updated-dependencies: - dependency-name: isort dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 95890d03..b79f3e8d 100644 --- a/setup.py +++ b/setup.py @@ -75,7 +75,7 @@ setuptools.setup( "pytest-cov==4.0.0", "pycodestyle==2.10.0", "black==22.12.0", - "isort==5.10.1", + "isort==5.11.3", "mypy==0.991", "psutil==5.9.2", "stix2==3.0.1", From fa3d658f3362a925528d748463fe62ad2d602e1b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Dec 2022 10:43:54 -0700 Subject: [PATCH 55/74] build(deps): bump dnfile from 0.12.0 to 0.13.0 (#1240) Bumps [dnfile](https://github.com/malwarefrank/dnfile) from 0.12.0 to 0.13.0. - [Release notes](https://github.com/malwarefrank/dnfile/releases) - [Changelog](https://github.com/malwarefrank/dnfile/blob/master/HISTORY.rst) - [Commits](https://github.com/malwarefrank/dnfile/compare/v0.12.0...v0.13.0) --- updated-dependencies: - dependency-name: dnfile dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 95890d03..e32501a4 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ requirements = [ "smda==1.8.4", "pefile==2022.5.30", "pyelftools==0.29", - "dnfile==0.12.0", + "dnfile==0.13.0", "dncil==1.0.2", "pydantic==1.10.2", ] From e0491097b029289a2add7fe577bb6b8aac166f5b Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 19 Dec 2022 14:45:21 -0700 Subject: [PATCH 56/74] dotnet: emit API features for generic methods (#1231) * dotnet: emit API features for generic methods * dotnet: improve type checking * dotnet: emit namespace/class features for generic methods * dotnet: update for dnfile 0.13.0 * dotnet: refactor property extraction --- CHANGELOG.md | 7 +- capa/features/extractors/dnfile/helpers.py | 165 +++++++-------- capa/features/extractors/dnfile/insn.py | 226 ++++++++------------- tests/fixtures.py | 86 ++++++-- 4 files changed, 241 insertions(+), 243 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27ce7c67..11f2a995 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,9 @@ ### New Features - verify rule metadata format on load #1160 @mr-tz -- extract property features from .NET PE files #1168 @anushkavirgaonkar -- emit features for .NET newobj instruction #1186 @mike-hunhoff -- fix import-to-ida script formatting #1208 @williballenthin +- dotnet: emit property features #1168 @anushkavirgaonkar +- dotnet: emit API features for objects created via the newobj instruction #1186 @mike-hunhoff +- dotnet: emit API features for generic methods #1231 @mike-hunhoff - Python 3.11 support #1192 @williballenthin ### Breaking Changes @@ -54,6 +54,7 @@ - render: fix vverbose rendering of offsets #1215 @williballenthin - elf: better detect OS via GLIBC ABI version needed and dependencies #1221 @williballenthin - dotnet: address unhandled exceptions with improved type checking #1230 @mike-hunhoff +- fix import-to-ida script formatting #1208 @williballenthin ### capa explorer IDA Pro plugin - fix: display instruction items #1154 @mr-tz diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 27b0c91a..086ad3f8 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -9,8 +9,7 @@ from __future__ import annotations import logging -from enum import Enum -from typing import Any, Tuple, Union, Iterator, Optional +from typing import Dict, Tuple, Union, Iterator, Optional import dnfile from dncil.cil.body import CilMethodBody @@ -22,9 +21,6 @@ from capa.features.common import FeatureAccess logger = logging.getLogger(__name__) -# key indexes to dotnet metadata tables -DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in dnfile.enums.MetadataTables} - class DnfileMethodBodyReader(CilMethodBodyReaderBase): def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow): @@ -47,6 +43,7 @@ class DnfileMethodBodyReader(CilMethodBodyReaderBase): class DnType(object): def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None): self.token = token + # property access self.access = access self.namespace = namespace self.class_ = class_ @@ -107,7 +104,7 @@ class DnUnmanagedMethod: return f"{module}.{method}" -def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: +def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Union[dnfile.base.MDTableRow, InvalidToken, str]: """map generic token to string or table row""" assert pe.net is not None assert pe.net.mdtables is not None @@ -118,14 +115,9 @@ def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any: return InvalidToken(token.value) return user_string - table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") - if not table_name: - # table_index is not valid - return InvalidToken(token.value) - - table: Any = getattr(pe.net.mdtables, table_name, None) + table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(token.table, None) if table is None: - # table index is valid but table is not present + # table index is not valid return InvalidToken(token.value) try: @@ -187,11 +179,67 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: continue token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid) + access: Optional[str] + + # assume .NET imports starting with get_/set_ are used to access a property + if member_ref.Name.startswith("get_"): + access = FeatureAccess.READ + elif member_ref.Name.startswith("set_"): + access = FeatureAccess.WRITE + else: + access = None + + member_ref_name: str = member_ref.Name + if member_ref_name.startswith(("get_", "set_")): + # remove get_/set_ from MemberRef name + member_ref_name = member_ref_name[4:] + yield DnType( - token, member_ref.Class.row.TypeName, namespace=member_ref.Class.row.TypeNamespace, member=member_ref.Name + token, + member_ref.Class.row.TypeName, + namespace=member_ref.Class.row.TypeNamespace, + member=member_ref_name, + access=access, ) +def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get MethodDef methods used to access properties + + see https://www.ntcore.com/files/dotnetformat.htm + + 24 - MethodSemantics Table + Links Events and Properties to specific methods. For example one Event can be associated to more methods. A property uses this table to associate get/set methods. + Semantics (a 2-byte bitmask of type MethodSemanticsAttributes) + Method (index into the MethodDef table) + Association (index into the Event or Property table; more precisely, a HasSemantics coded index) + """ + for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number): + assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow) + + if method_semantics.Association.row is None: + logger.debug("MethodSemantics[0x%X] Association row is None", rid) + continue + + if isinstance(method_semantics.Association.row, dnfile.mdtable.EventRow): + # ignore events + logger.debug("MethodSemantics[0x%X] ignoring Event", rid) + continue + + if method_semantics.Method.table is None: + logger.debug("MethodSemantics[0x%X] Method table is None", rid) + continue + + token: int = calculate_dotnet_token_value( + method_semantics.Method.table.number, method_semantics.Method.row_index + ) + + if method_semantics.Semantics.msSetter: + yield token, FeatureAccess.WRITE + elif method_semantics.Semantics.msGetter: + yield token, FeatureAccess.READ + + def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: """get managed method names from TypeDef table @@ -203,6 +251,10 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) MethodList (index into MethodDef table; it marks the first of a continguous run of Methods owned by this Type) """ + accessor_map: Dict[int, str] = {} + for (methoddef, methoddef_access) in get_dotnet_methoddef_property_accessors(pe): + accessor_map[methoddef] = methoddef_access + for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -213,8 +265,16 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: if method.row is None: logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx) continue - token = calculate_dotnet_token_value(method.table.number, method.row_index) - yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method.row.Name) + + token: int = calculate_dotnet_token_value(method.table.number, method.row_index) + access: Optional[str] = accessor_map.get(token, None) + + method_name: str = method.row.Name + if method_name.startswith(("get_", "set_")): + # remove get_/set_ + method_name = method_name[4:] + + yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access) def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: @@ -242,79 +302,6 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name) -def get_dotnet_property_map( - pe: dnfile.dnPE, mapped_property: Union[dnfile.mdtable.PropertyRow, dnfile.mdtable.EventRow] -) -> Optional[dnfile.mdtable.TypeDefRow]: - """get property map from PropertyMap table - - see https://www.ntcore.com/files/dotnetformat.htm - - 21 - PropertyMap Table - List of Properties owned by a specific class. - Parent (index into the TypeDef table) - PropertyList (index into Property table). It marks the first of a contiguous run of Properties owned by Parent. The run continues to the smaller of: - the last row of the Property table - the next run of Properties, found by inspecting the PropertyList of the next row in this PropertyMap table - """ - for (rid, property_map) in iter_dotnet_table(pe, dnfile.mdtable.PropertyMap.number): - assert isinstance(property_map, dnfile.mdtable.PropertyMapRow) - - for (idx, property_) in enumerate(property_map.PropertyList): - if property_.row is None: - logger.debug("PropertyMap[0x%X] PropertyList[0x%x] row is None", rid, idx) - continue - if property_.row.Name == mapped_property.Name: - return property_map.Parent.row - return None - - -def get_dotnet_properties(pe: dnfile.dnPE) -> Iterator[DnType]: - """get property from MethodSemantics table - - see https://www.ntcore.com/files/dotnetformat.htm - - 24 - MethodSemantics Table - Links Events and Properties to specific methods. For example one Event can be associated to more methods. A property uses this table to associate get/set methods. - Semantics (a 2-byte bitmask of type MethodSemanticsAttributes) - Method (index into the MethodDef table) - Association (index into the Event or Property table; more precisely, a HasSemantics coded index) - """ - for (rid, method_semantics) in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number): - assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow) - - if method_semantics.Association.row is None: - logger.debug("MethodSemantics[0x%X] Association row is None", rid) - continue - if method_semantics.Method.table is None: - logger.debug("MethodSemantics[0x%X] Method table is None", rid) - continue - - typedef: Optional[dnfile.mdtable.TypeDefRow] = get_dotnet_property_map(pe, method_semantics.Association.row) - if typedef is None: - logger.debug("MethodSemantics[0x%X] TypeDef is None", rid) - continue - - token: int = calculate_dotnet_token_value( - method_semantics.Method.table.number, method_semantics.Method.row_index - ) - - access: Optional[str] - if method_semantics.Semantics.msSetter: - access = FeatureAccess.WRITE - elif method_semantics.Semantics.msGetter: - access = FeatureAccess.READ - else: - access = None - - yield DnType( - token, - typedef.TypeName, - access=access, - namespace=typedef.TypeNamespace, - member=method_semantics.Association.row.Name, - ) - - def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: """get managed methods from MethodDef table""" for (rid, method_def) in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number): diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index da88464f..341a6505 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -8,13 +8,12 @@ from __future__ import annotations -from typing import Any, Dict, Tuple, Union, Iterator, Optional +import logging +from typing import Dict, Tuple, Union, Iterator, Optional import dnfile -from dncil.cil.body import CilMethodBody from dncil.clr.token import Token, StringToken, InvalidToken from dncil.cil.opcode import OpCodes -from dncil.cil.instruction import Instruction import capa.features.extractors.helpers from capa.features.insn import API, Number, Property @@ -26,16 +25,14 @@ from capa.features.extractors.dnfile.helpers import ( DnUnmanagedMethod, get_dotnet_fields, resolve_dotnet_token, - get_dotnet_properties, read_dotnet_user_string, get_dotnet_managed_imports, get_dotnet_managed_methods, + calculate_dotnet_token_value, get_dotnet_unmanaged_imports, ) -METHODDEF_TABLE = dnfile.mdtable.MethodDef.number -MEMBERREF_TABLE = dnfile.mdtable.MemberRef.number -FIELD_TABLE = dnfile.mdtable.Field.number +logger = logging.getLogger(__name__) def get_managed_imports(ctx: Dict) -> Dict: @@ -62,26 +59,6 @@ def get_methods(ctx: Dict) -> Dict: return ctx["methods_cache"] -def get_callee(ctx: Dict, token: int) -> Union[DnType, DnUnmanagedMethod, None]: - """map dotnet token to un/managed method""" - callee: Union[DnType, DnUnmanagedMethod, None] = get_managed_imports(ctx).get(token, None) - if callee is None: - # we must check unmanaged imports before managed methods because we map forwarded managed methods - # to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis - callee = get_unmanaged_imports(ctx).get(token, None) - if callee is None: - callee = get_methods(ctx).get(token, None) - return callee - - -def get_properties(ctx: Dict) -> Dict: - if "properties_cache" not in ctx: - ctx["properties_cache"] = {} - for prop in get_dotnet_properties(ctx["pe"]): - ctx["properties_cache"][prop.token] = prop - return ctx["properties_cache"] - - def get_fields(ctx: Dict) -> Dict: if "fields_cache" not in ctx: ctx["fields_cache"] = {} @@ -90,31 +67,45 @@ def get_fields(ctx: Dict) -> Dict: return ctx["fields_cache"] +def get_callee(ctx: Dict, token: Token) -> Union[DnType, DnUnmanagedMethod, None]: + """map .NET token to un/managed (generic) method""" + row: Union[dnfile.base.MDTableRow, InvalidToken, str] = resolve_dotnet_token(ctx["pe"], token) + if not isinstance(row, (dnfile.mdtable.MethodDefRow, dnfile.mdtable.MemberRefRow, dnfile.mdtable.MethodSpecRow)): + # we only handle MethodDef (internal), MemberRef (external), and MethodSpec (generic) + return None + + token_: int + if isinstance(row, dnfile.mdtable.MethodSpecRow): + # map MethodSpec to MethodDef or MemberRef + if row.Method.table is None: + logger.debug("MethodSpec[0x%X] Method table is None", token.rid) + return None + token_ = calculate_dotnet_token_value(row.Method.table.number, row.Method.row_index) + else: + token_ = token.value + + callee: Union[DnType, DnUnmanagedMethod, None] = get_managed_imports(ctx).get(token_, None) + if callee is None: + # we must check unmanaged imports before managed methods because we map forwarded managed methods + # to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis + callee = get_unmanaged_imports(ctx).get(token_, None) + if callee is None: + callee = get_methods(ctx).get(token_, None) + return callee + + def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: """parse instruction API features""" - insn: Instruction = ih.inner - - if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, OpCodes.Newobj): - return - - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, insn.operand.value) - if callee is None: + if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, OpCodes.Newobj): return + callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) if isinstance(callee, DnType): - if callee.member.startswith(("get_", "set_")): - if insn.operand.table == METHODDEF_TABLE: - # check if the method belongs to the MethodDef table and whether it is used to access a property - if get_properties(fh.ctx).get(insn.operand.value, None) is not None: - return - elif insn.operand.table == MEMBERREF_TABLE: - # if the method belongs to the MemberRef table, we assume it is used to access a property - return - - # like System.IO.File::Delete - yield API(str(callee)), ih.address - - else: + # ignore methods used to access properties + if callee.access is None: + # like System.IO.File::Delete + yield API(str(callee)), ih.address + elif isinstance(callee, DnUnmanagedMethod): # like kernel32.CreateFileA for name in capa.features.extractors.helpers.generate_symbols(callee.module, callee.method): yield API(name), ih.address @@ -122,52 +113,30 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: """parse instruction property features""" - insn: Instruction = ih.inner - name: Optional[str] = None access: Optional[str] = None - if insn.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): - if insn.operand.table == METHODDEF_TABLE: - # check if the method belongs to the MethodDef table and whether it is used to access a property - prop = get_properties(fh.ctx).get(insn.operand.value, None) - if prop is not None: - name = str(prop) - access = prop.access + if ih.inner.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + # property access via MethodDef or MemberRef + callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) + if isinstance(callee, DnType): + if callee.access is not None: + name = str(callee) + access = callee.access - elif insn.operand.table == MEMBERREF_TABLE: - # if the method belongs to the MemberRef table, we assume it is used to access a property - row: Any = resolve_dotnet_token(fh.ctx["pe"], insn.operand) - if row is None: - return - if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)): - return - if not row.Name.startswith(("get_", "set_")): - return + elif ih.inner.opcode in (OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda): + # property read via Field + read_field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) + if read_field is not None: + name = str(read_field) + access = FeatureAccess.READ - name = DnType.format_name( - row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace, member=row.Name[4:] - ) - if row.Name.startswith("get_"): - access = FeatureAccess.READ - elif row.Name.startswith("set_"): - access = FeatureAccess.WRITE - - elif insn.opcode in (OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda): - if insn.operand.table == FIELD_TABLE: - # determine whether the operand is a field by checking if it belongs to the Field table - read_field: Optional[DnType] = get_fields(fh.ctx).get(insn.operand.value, None) - if read_field: - name = str(read_field) - access = FeatureAccess.READ - - elif insn.opcode in (OpCodes.Stfld, OpCodes.Stsfld): - if insn.operand.table == FIELD_TABLE: - # determine whether the operand is a field by checking if it belongs to the Field table - write_field: Optional[DnType] = get_fields(fh.ctx).get(insn.operand.value, None) - if write_field: - name = str(write_field) - access = FeatureAccess.WRITE + elif ih.inner.opcode in (OpCodes.Stfld, OpCodes.Stsfld): + # property write via Field + write_field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) + if write_field is not None: + name = str(write_field) + access = FeatureAccess.WRITE if name is not None: if access is not None: @@ -177,92 +146,74 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Class, Address]]: """parse instruction class features""" - if ih.inner.opcode not in ( + if ih.inner.opcode in ( OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, + OpCodes.Newobj, + ): + # method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef) + callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) + if isinstance(callee, DnType): + yield Class(DnType.format_name(callee.class_, namespace=callee.namespace)), ih.address + + elif ih.inner.opcode in ( OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda, OpCodes.Stfld, OpCodes.Stsfld, - OpCodes.Newobj, ): - return - - row: Any = resolve_dotnet_token(fh.ctx["pe"], ih.inner.operand) - if isinstance(row, dnfile.mdtable.MemberRefRow): - if isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)): - yield Class(DnType.format_name(row.Class.row.TypeName, namespace=row.Class.row.TypeNamespace)), ih.address - - elif isinstance(row, dnfile.mdtable.MethodDefRow): - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand.value) - if isinstance(callee, DnType): - yield Class(DnType.format_name(callee.class_, namespace=callee.namespace)), ih.address - - elif isinstance(row, dnfile.mdtable.FieldRow): + # field access field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) - if field is not None: + if isinstance(field, DnType): yield Class(DnType.format_name(field.class_, namespace=field.namespace)), ih.address def extract_insn_namespace_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Namespace, Address]]: """parse instruction namespace features""" - if ih.inner.opcode not in ( + if ih.inner.opcode in ( OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, + OpCodes.Newobj, + ): + # method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef) + callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) + if isinstance(callee, DnType) and callee.namespace is not None: + yield Namespace(callee.namespace), ih.address + + elif ih.inner.opcode in ( OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda, OpCodes.Stfld, OpCodes.Stsfld, - OpCodes.Newobj, ): - return - - row: Any = resolve_dotnet_token(fh.ctx["pe"], Token(ih.inner.operand.value)) - - if isinstance(row, dnfile.mdtable.MemberRefRow): - if isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow, dnfile.mdtable.TypeDefRow)): - if row.Class.row.TypeNamespace: - yield Namespace(row.Class.row.TypeNamespace), ih.address - - elif isinstance(row, dnfile.mdtable.MethodDefRow): - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand.value) - if isinstance(callee, DnType) and callee.namespace is not None: - yield Namespace(callee.namespace), ih.address - - elif isinstance(row, dnfile.mdtable.FieldRow): field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) - if field is not None: + if isinstance(field, DnType) and field.namespace is not None: yield Namespace(field.namespace), ih.address def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: """parse instruction number features""" - insn: Instruction = ih.inner - - if insn.is_ldc(): - yield Number(insn.get_ldc()), ih.address + if ih.inner.is_ldc(): + yield Number(ih.inner.get_ldc()), ih.address def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: """parse instruction string features""" - f: CilMethodBody = fh.inner - insn: Instruction = ih.inner - - if not insn.is_ldstr(): + if not ih.inner.is_ldstr(): return - if not isinstance(insn.operand, StringToken): + if not isinstance(ih.inner.operand, StringToken): return - user_string: Optional[str] = read_dotnet_user_string(fh.ctx["pe"], insn.operand) + user_string: Optional[str] = read_dotnet_user_string(fh.ctx["pe"], ih.inner.operand) if user_string is None: return @@ -272,17 +223,14 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter def extract_unmanaged_call_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle ) -> Iterator[Tuple[Characteristic, Address]]: - insn: Instruction = ih.inner - if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): return - token: Any = resolve_dotnet_token(fh.ctx["pe"], insn.operand) - if isinstance(token, InvalidToken): - return - if not isinstance(token, dnfile.mdtable.MethodDefRow): + row: Union[str, InvalidToken, dnfile.base.MDTableRow] = resolve_dotnet_token(fh.ctx["pe"], ih.inner.operand) + if not isinstance(row, dnfile.mdtable.MethodDefRow): return - if any((token.Flags.mdPinvokeImpl, token.ImplFlags.miUnmanaged, token.ImplFlags.miNative)): + if any((row.Flags.mdPinvokeImpl, row.ImplFlags.miUnmanaged, row.ImplFlags.miNative)): yield Characteristic("unmanaged call"), ih.address diff --git a/tests/fixtures.py b/tests/fixtures.py index 1d0ba0fa..6deb0e24 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -278,6 +278,10 @@ def get_data_path_by_name(name): return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe") elif name.startswith("_1c444"): return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_") + elif name.startswith("_387f15"): + return os.path.join( + CD, "data", "dotnet", "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_" + ) elif name.startswith("_692f"): return os.path.join(CD, "data", "dotnet", "692f7fd6d198e804d6af98eb9e390d61.exe_") elif name.startswith("_0953c"): @@ -751,6 +755,9 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), + ("_692f", "token=0x6000004", capa.features.insn.API("System.Linq.Enumerable::First"), True), # generic method + ("_692f", "token=0x6000004", capa.features.common.Namespace("System.Linq"), True), # generic method + ("_692f", "token=0x6000004", capa.features.common.Class("System.Linq.Enumerable"), True), # generic method ( "_1c444", "function=0x1F59, bb=0x1F59, insn=0x1F5B", @@ -772,25 +779,25 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( "token=0x600002B", capa.features.insn.Property("System.IO.FileInfo::Length", access=FeatureAccess.READ), True, - ), # MemberRef method + ), # MemberRef property access ( "_1c444", "token=0x600002B", capa.features.insn.Property("System.IO.FileInfo::Length"), True, - ), # MemberRef method + ), # MemberRef property access ( "_1c444", "token=0x6000081", capa.features.insn.API("System.Diagnostics.Process::Start"), True, - ), # MemberRef method + ), # MemberRef property access ( "_1c444", "token=0x6000081", capa.features.insn.Property( "System.Diagnostics.ProcessStartInfo::UseShellExecute", access=FeatureAccess.WRITE - ), # MemberRef method + ), # MemberRef property access True, ), ( @@ -798,7 +805,7 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( "token=0x6000081", capa.features.insn.Property( "System.Diagnostics.ProcessStartInfo::WorkingDirectory", access=FeatureAccess.WRITE - ), # MemberRef method + ), # MemberRef property access True, ), ( @@ -806,41 +813,96 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( "token=0x6000081", capa.features.insn.Property( "System.Diagnostics.ProcessStartInfo::FileName", access=FeatureAccess.WRITE - ), # MemberRef method + ), # MemberRef property access True, ), ( "_1c444", "token=0x6000087", - capa.features.insn.Property("Sockets.MySocket::reConnectionDelay", access=FeatureAccess.WRITE), # Field + capa.features.insn.Property( + "Sockets.MySocket::reConnectionDelay", access=FeatureAccess.WRITE + ), # Field property access True, ), ( "_1c444", "token=0x600008A", - capa.features.insn.Property("Sockets.MySocket::isConnected", access=FeatureAccess.WRITE), # Field + capa.features.insn.Property( + "Sockets.MySocket::isConnected", access=FeatureAccess.WRITE + ), # Field property access True, ), ( "_1c444", "token=0x600008A", - capa.features.insn.Property("Sockets.MySocket::onConnected", access=FeatureAccess.READ), # Field + capa.features.common.Class("Sockets.MySocket"), # Field property access + True, + ), + ( + "_1c444", + "token=0x600008A", + capa.features.common.Namespace("Sockets"), # Field property access + True, + ), + ( + "_1c444", + "token=0x600008A", + capa.features.insn.Property( + "Sockets.MySocket::onConnected", access=FeatureAccess.READ + ), # Field property access True, ), ( "_0953c", "token=0x6000004", - capa.features.insn.Property("System.Diagnostics.Debugger::IsAttached", access=FeatureAccess.READ), + capa.features.insn.Property( + "System.Diagnostics.Debugger::IsAttached", access=FeatureAccess.READ + ), # MemberRef property access True, - ), # MemberRef method + ), + ( + "_0953c", + "token=0x6000004", + capa.features.common.Class("System.Diagnostics.Debugger"), # MemberRef property access + True, + ), + ( + "_0953c", + "token=0x6000004", + capa.features.common.Namespace("System.Diagnostics"), # MemberRef property access + True, + ), ( "_692f", "token=0x6000006", capa.features.insn.Property( "System.Management.Automation.PowerShell::Streams", access=FeatureAccess.READ - ), # MemberRef method + ), # MemberRef property access False, ), + ( + "_387f15", + "token=0x600009E", + capa.features.insn.Property( + "Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE::geoplugin_countryCode", + access=FeatureAccess.READ, + ), # MethodDef property access + True, + ), + ( + "_387f15", + "token=0x600009E", + capa.features.common.Class( + "Modulo.IqQzcRDvSTulAhyLtZHqyeYGgaXGbuLwhxUKXYmhtnOmgpnPJDTSIPhYPpnE" + ), # MethodDef property access + True, + ), + ( + "_387f15", + "token=0x600009E", + capa.features.common.Namespace("Modulo"), # MethodDef property access + True, + ), ( "_039a6", "token=0x6000007", From 2b85af0f8820e6b7a3730e58be9a1b5fbe6ceec6 Mon Sep 17 00:00:00 2001 From: Moritz Date: Mon, 19 Dec 2022 22:53:16 +0100 Subject: [PATCH 57/74] explorer: update and remove outdated documentation (#1238) --- capa/ida/plugin/README.md | 74 ++++++++++++++------------------------- 1 file changed, 26 insertions(+), 48 deletions(-) diff --git a/capa/ida/plugin/README.md b/capa/ida/plugin/README.md index 4364d4aa..0a8883ef 100644 --- a/capa/ida/plugin/README.md +++ b/capa/ida/plugin/README.md @@ -32,52 +32,6 @@ For more information on the FLARE team's open-source framework, capa, check out ## Getting Started -### Requirements - -capa explorer supports Python versions >= 3.7.x and the following IDA Pro versions: - -* IDA 7.4 -* IDA 7.5 -* IDA 7.6 (caveat below) -* IDA 7.7 - -capa explorer is however limited to the Python versions supported by your IDA installation (which may not include all Python versions >= 3.7.x). Based on our testing the following matrix shows the Python versions supported -by each supported IDA version: - -| | IDA 7.4 | IDA 7.5 | IDA 7.6 | -| --- | --- | --- | --- | -| Python 3.7.x | Yes | Yes | Yes | -| Python 3.8.x | Partial (see below) | Yes | Yes | -| Python 3.9.x | No | Partial (see below) | Yes | - -To use capa explorer with IDA 7.4 and Python 3.8.x you must follow the instructions provided by hex-rays [here](https://hex-rays.com/blog/ida-7-4-and-python-3-8/). - -To use capa explorer with IDA 7.5 and Python 3.9.x you must follow the instructions provided by hex-rays [here](https://hex-rays.com/blog/python-3-9-support-for-ida-7-5/). - -If you encounter issues with your specific setup, please open a new [Issue](https://github.com/mandiant/capa/issues). - -#### IDA 7.6 caveat: IDA 7.6sp1 or patch required - -As described [here](https://www.hex-rays.com/blog/ida-7-6-empty-qtreeview-qtreewidget/): - -> A rather nasty issue evaded our testing and found its way into IDA 7.6: using the PyQt5 modules that are shipped with IDA, QTreeView (or QTreeWidget) instances will always fail to display contents. - -Therefore, in order to use capa under IDA 7.6 you need the [Service Pack 1 for IDA 7.6](https://www.hex-rays.com/products/ida/news/7_6sp1). Alternatively, you can download and install the fix corresponding to your IDA installation, replacing the original QtWidgets DLL with the one contained in the .zip file (links to Hex-Rays): - - - - Windows: [pyqt5_qtwidgets_win](https://www.hex-rays.com/wp-content/uploads/2021/04/pyqt5_qtwidgets_win.zip) - - Linux: [pyqt5_qtwidgets_linux](https://www.hex-rays.com/wp-content/uploads/2021/04/pyqt5_qtwidgets_linux.zip) - - MacOS (Intel): [pyqt5_qtwidgets_mac_x64](https://www.hex-rays.com/wp-content/uploads/2021/04/pyqt5_qtwidgets_mac_x64.zip) - - MacOS (AppleSilicon): [pyqt5_qtwidgets_mac_arm](https://www.hex-rays.com/wp-content/uploads/2021/04/pyqt5_qtwidgets_mac_arm.zip) - - -### Supported File Types - -capa explorer is limited to the file types supported by capa, which include: - -* Windows x86 (32- and 64-bit) PE and ELF files -* Windows x86 (32- and 64-bit) shellcode - ### Installation You can install capa explorer using the following steps: @@ -86,8 +40,16 @@ You can install capa explorer using the following steps: ``` $ pip install flare-capa ``` -3. Download the [standard collection of capa rules](https://github.com/mandiant/capa-rules) (capa explorer needs capa rules to analyze a database) -4. Copy [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ida/plugin/capa_explorer.py) to your IDA plugins directory +2. Download the [standard collection of capa rules](https://github.com/mandiant/capa-rules) (capa explorer needs capa rules to analyze a database) +3. Copy [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ida/plugin/capa_explorer.py) to your IDA plugins directory + +### Supported File Types + +capa explorer is limited to the file types supported by capa, which include: + +* Windows x86 (32- and 64-bit) PE files +* Windows x86 (32- and 64-bit) shellcode +* ELF files on various operating systems ### Usage @@ -122,6 +84,22 @@ downloading and using the [standard collection of capa rules](https://github.com * Directly edit rule text and metadata fields using the `Preview` pane * Change the default rule author and default rule scope displayed in the `Preview` pane by clicking `Settings` +### Requirements + +capa explorer supports Python versions >= 3.7.x and IDA Pro versions >= 7.4. The following IDA Pro versions have been tested: + +* IDA 7.4 +* IDA 7.5 +* IDA 7.6 Service Pack 1 +* IDA 7.7 +* IDA 8.0 +* IDA 8.1 +* IDA 8.2 + +capa explorer is however limited to the Python versions supported by your IDA installation (which may not include all Python versions >= 3.7.x). + +If you encounter issues with your specific setup, please open a new [Issue](https://github.com/mandiant/capa/issues). + ## Development capa explorer is packaged with capa so you will need to install capa locally for development. You can install capa locally by following the steps outlined in `Method 3: Inspecting the capa source code` of the [capa From 4ece47c64cae012c8aa0f200ea3174d4d31c190e Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 19 Dec 2022 15:06:16 -0700 Subject: [PATCH 58/74] dotnet: emit calls to/from MethodDef methods (#1236) * dotnet: emit calls to/from MethodDef methods * dotnet: update function.py copyright header --- CHANGELOG.md | 1 + capa/features/extractors/dnfile/extractor.py | 38 ++++++++++++--- capa/features/extractors/dnfile/function.py | 50 ++++++++++++++++++++ tests/fixtures.py | 9 +++- 4 files changed, 91 insertions(+), 7 deletions(-) create mode 100644 capa/features/extractors/dnfile/function.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 11f2a995..0525c0e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - dotnet: emit API features for objects created via the newobj instruction #1186 @mike-hunhoff - dotnet: emit API features for generic methods #1231 @mike-hunhoff - Python 3.11 support #1192 @williballenthin +- dotnet: emit calls to/from MethodDef methods #1236 @mike-hunhoff ### Breaking Changes diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py index b5f707c9..036952e0 100644 --- a/capa/features/extractors/dnfile/extractor.py +++ b/capa/features/extractors/dnfile/extractor.py @@ -8,13 +8,15 @@ from __future__ import annotations -from typing import List, Tuple, Iterator +from typing import Dict, List, Tuple, Iterator, Optional import dnfile +from dncil.cil.opcode import OpCodes import capa.features.extractors import capa.features.extractors.dnfile.file import capa.features.extractors.dnfile.insn +import capa.features.extractors.dnfile.function from capa.features.common import Feature from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor @@ -41,12 +43,36 @@ class DnfileFeatureExtractor(FeatureExtractor): yield from capa.features.extractors.dnfile.file.extract_features(self.pe) def get_functions(self) -> Iterator[FunctionHandle]: - for token, f in get_dotnet_managed_method_bodies(self.pe): - yield FunctionHandle(address=DNTokenAddress(token), inner=f, ctx={"pe": self.pe}) + # create a method lookup table + methods: Dict[Address, FunctionHandle] = {} + for (token, method) in get_dotnet_managed_method_bodies(self.pe): + fh: FunctionHandle = FunctionHandle( + address=DNTokenAddress(token), inner=method, ctx={"pe": self.pe, "calls_from": set(), "calls_to": set()} + ) - def extract_function_features(self, f): - # TODO - yield from [] + # method tokens should be unique + assert fh.address not in methods.keys() + methods[fh.address] = fh + + # calculate unique calls to/from each method + for fh in methods.values(): + for insn in fh.inner.instructions: + if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, OpCodes.Newobj): + continue + + # record call to destination method; note: we only consider MethodDef methods for destinations + dest: Optional[FunctionHandle] = methods.get(DNTokenAddress(insn.operand.value), None) + if dest is not None: + dest.ctx["calls_to"].add(fh.address) + + # record call from source method; note: we record all unique calls from a MethodDef method, not just + # those calls to other MethodDef methods e.g. calls to imported MemberRef methods + fh.ctx["calls_from"].add(DNTokenAddress(insn.operand.value)) + + yield from methods.values() + + def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]: + yield from capa.features.extractors.dnfile.function.extract_features(fh) def get_basic_blocks(self, f) -> Iterator[BBHandle]: # each dotnet method is considered 1 basic block diff --git a/capa/features/extractors/dnfile/function.py b/capa/features/extractors/dnfile/function.py new file mode 100644 index 00000000..0d698719 --- /dev/null +++ b/capa/features/extractors/dnfile/function.py @@ -0,0 +1,50 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +import logging +from typing import Tuple, Iterator + +from capa.features.common import Feature, Characteristic +from capa.features.address import Address +from capa.features.extractors.base_extractor import FunctionHandle + +logger = logging.getLogger(__name__) + + +def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: + """extract callers to a function""" + for dest in fh.ctx["calls_to"]: + yield Characteristic("calls to"), dest + + +def extract_function_calls_from(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: + """extract callers from a function""" + for src in fh.ctx["calls_from"]: + yield Characteristic("calls from"), src + + +def extract_recursive_call(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: + """extract recursive function call""" + if fh.address in fh.ctx["calls_to"]: + yield Characteristic("recursive call"), fh.address + + +def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: + """extract loop indicators from a function""" + raise NotImplementedError() + + +def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + for func_handler in FUNCTION_HANDLERS: + for (feature, addr) in func_handler(fh): + yield feature, addr + + +FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_calls_from, extract_recursive_call) diff --git a/tests/fixtures.py b/tests/fixtures.py index 6deb0e24..83e63cd5 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -753,6 +753,10 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True), ("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True), + ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), True), + ("_1c444", "token=0x6000018", capa.features.common.Characteristic("calls to"), False), + ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls from"), True), + ("_1c444", "token=0x600000F", capa.features.common.Characteristic("calls from"), False), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), ("_692f", "token=0x6000004", capa.features.insn.API("System.Linq.Enumerable::First"), True), # generic method @@ -950,7 +954,10 @@ FEATURE_COUNT_TESTS = [ ] -FEATURE_COUNT_TESTS_DOTNET = [] # type: ignore +FEATURE_COUNT_TESTS_DOTNET = [ + ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls to"), 1), + ("_1c444", "token=0x600001D", capa.features.common.Characteristic("calls from"), 9), +] def do_test_feature_presence(get_extractor, sample, scope, feature, expected): From 50490e6a93c635f4c1dc2c7e5e3f380444b3e26c Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Tue, 20 Dec 2022 13:29:29 -0700 Subject: [PATCH 59/74] dotnet: emit namespace/class features for ldvirtftn/ldftn instructions (#1241) * dotnet: emit namespace/class features for ldvirtftn/ldftn instructions * dotnet: add unit tests for ldftn/ldvirtftn namespace/class features --- CHANGELOG.md | 1 + capa/features/extractors/dnfile/extractor.py | 13 ++++++++++--- capa/features/extractors/dnfile/insn.py | 15 +++++++++++---- tests/fixtures.py | 2 ++ 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0525c0e4..2985c720 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - dotnet: emit API features for generic methods #1231 @mike-hunhoff - Python 3.11 support #1192 @williballenthin - dotnet: emit calls to/from MethodDef methods #1236 @mike-hunhoff +- dotnet: emit namespace/class features for ldvirtftn/ldftn instructions #1241 @mike-hunhoff ### Breaking Changes diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py index 036952e0..3adb4947 100644 --- a/capa/features/extractors/dnfile/extractor.py +++ b/capa/features/extractors/dnfile/extractor.py @@ -57,17 +57,24 @@ class DnfileFeatureExtractor(FeatureExtractor): # calculate unique calls to/from each method for fh in methods.values(): for insn in fh.inner.instructions: - if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, OpCodes.Newobj): + if insn.opcode not in ( + OpCodes.Call, + OpCodes.Callvirt, + OpCodes.Jmp, + OpCodes.Newobj, + ): continue + address: DNTokenAddress = DNTokenAddress(insn.operand.value) + # record call to destination method; note: we only consider MethodDef methods for destinations - dest: Optional[FunctionHandle] = methods.get(DNTokenAddress(insn.operand.value), None) + dest: Optional[FunctionHandle] = methods.get(address, None) if dest is not None: dest.ctx["calls_to"].add(fh.address) # record call from source method; note: we record all unique calls from a MethodDef method, not just # those calls to other MethodDef methods e.g. calls to imported MemberRef methods - fh.ctx["calls_from"].add(DNTokenAddress(insn.operand.value)) + fh.ctx["calls_from"].add(address) yield from methods.values() diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index 341a6505..2e8b7b73 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -96,7 +96,12 @@ def get_callee(ctx: Dict, token: Token) -> Union[DnType, DnUnmanagedMethod, None def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: """parse instruction API features""" - if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli, OpCodes.Newobj): + if ih.inner.opcode not in ( + OpCodes.Call, + OpCodes.Callvirt, + OpCodes.Jmp, + OpCodes.Newobj, + ): return callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) @@ -116,7 +121,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It name: Optional[str] = None access: Optional[str] = None - if ih.inner.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + if ih.inner.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp): # property access via MethodDef or MemberRef callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) if isinstance(callee, DnType): @@ -150,7 +155,8 @@ def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Itera OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, - OpCodes.Calli, + OpCodes.Ldvirtftn, + OpCodes.Ldftn, OpCodes.Newobj, ): # method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef) @@ -178,7 +184,8 @@ def extract_insn_namespace_features(fh: FunctionHandle, bh, ih: InsnHandle) -> I OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, - OpCodes.Calli, + OpCodes.Ldvirtftn, + OpCodes.Ldftn, OpCodes.Newobj, ): # method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef) diff --git a/tests/fixtures.py b/tests/fixtures.py index 83e63cd5..05373d17 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -762,6 +762,8 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("_692f", "token=0x6000004", capa.features.insn.API("System.Linq.Enumerable::First"), True), # generic method ("_692f", "token=0x6000004", capa.features.common.Namespace("System.Linq"), True), # generic method ("_692f", "token=0x6000004", capa.features.common.Class("System.Linq.Enumerable"), True), # generic method + ("_1c444", "token=0x6000020", capa.features.common.Namespace("Reqss"), True), # ldftn + ("_1c444", "token=0x6000020", capa.features.common.Class("Reqss.Reqss"), True), # ldftn ( "_1c444", "function=0x1F59, bb=0x1F59, insn=0x1F5B", From 3b95ed0b5a445ac455507fb896c853f15c048cae Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 21 Dec 2022 16:03:05 +0100 Subject: [PATCH 60/74] simplified rule release guidance --- doc/installation.md | 52 +++++++++++++++++++++++++-------------------- doc/release.md | 15 ++++++------- doc/rules.md | 41 ++++++----------------------------- 3 files changed, 41 insertions(+), 67 deletions(-) diff --git a/doc/installation.md b/doc/installation.md index 0e455c10..04414062 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -6,13 +6,11 @@ If you simply want to use capa, use the standalone binaries we host on GitHub: h We use PyInstaller to create these packages. -The capa [README](../README.md#download) also links to nightly builds of standalone binaries from the latest development branch. - ### Linux Standalone installation The Linux Standalone binary has been built using GLIB 2.26. -Consequently it works when using GLIB >= 2.26. -This requirement is satisfied by default in most newer distribution such as Ubuntu >= 18, Debian >= 10, openSUSE >= 15.1 and CentOS >= 8. +Consequently, it works when using GLIB >= 2.26. +This requirement is satisfied by default in newer distribution such as Ubuntu >= 18, Debian >= 10, openSUSE >= 15.1 and CentOS >= 8. But the binary may not work in older distributions. ### MacOS Standalone installation @@ -24,24 +22,27 @@ By default, on MacOS Catalina or greater, Gatekeeper will block execution of the ## Method 2: Using capa as a Python library To install capa as a Python library use `pip` to fetch the `flare-capa` module. -#### *Note*: +### 1. Install capa module +Use `pip` to install the capa module to your local Python environment. This fetches the library code to your computer but does not keep editable source files around for you to hack on. If you'd like to edit the source files, see below. `$ pip install flare-capa` + +#### *Note on capa rules and library identification signatures* This method is appropriate for integrating capa in an existing project. -This technique doesn't pull the default rule set, so you should check it out separately from [capa-rules](https://github.com/mandiant/capa-rules/) and pass the directory to the entrypoint using `-r` or set the rules path in the IDA Pro plugin: +This technique doesn't pull the default rule set. You can obtain rule releases from [capa-rules](https://github.com/mandiant/capa-rules/releases) and pass the directory to the entrypoint using `-r`. In the IDA Pro plugin you need to configure the rules directory path once. ```console -$ git clone https://github.com/mandiant/capa-rules.git -b v3 /local/path/to/rules -$ capa -r /local/path/to/rules suspicious.exe +$ wget https://github.com/mandiant/capa-rules/archive/refs/tags/v4.0.0.zip +$ unzip v4.0.0.zip +$ capa -r /path/to/capa-rules suspicious.exe ``` This technique also doesn't set up the default library identification [signatures](https://github.com/mandiant/capa/tree/master/sigs). You can pass the signature directory using the `-s` argument. For example, to run capa with both a rule path and a signature path: +```console +$ capa -s /path/to/capa-sigs suspicious.exe +``` - capa -r /path/to/capa-rules -s /path/to/capa-sigs suspicious.exe Alternatively, see Method 3 below. -### 1. Install capa module -Use `pip` to install the capa module to your local Python environment. This fetches the library code to your computer but does not keep editable source files around for you to hack on. If you'd like to edit the source files, see below. `$ pip install flare-capa` - ### 2. Use capa You can now import the `capa` module from a Python script or use the IDA Pro plugins from the `capa/ida` directory. For more information please see the [usage](usage.md) documentation. @@ -49,18 +50,20 @@ You can now import the `capa` module from a Python script or use the IDA Pro plu If you'd like to review and modify the capa source code, you'll need to check it out from GitHub and install it locally. By following these instructions, you'll maintain a local directory of source code that you can modify and run easily. ### 1. Check out source code -Next, clone the capa git repository. +Clone the capa git repository. We use submodules to separate [code](https://github.com/mandiant/capa), [rules](https://github.com/mandiant/capa-rules), and [test data](https://github.com/mandiant/capa-testfiles). + To clone everything use the `--recurse-submodules` option: -- CAUTION: The capa testfiles repository contains many malware samples. If you pull down everything using this method, you may want to install to a directory that won't trigger your anti-virus software. +- CAUTION: The capa testfiles repository contains many malware samples. If you pull down everything using this method, you may want to install to a directory that is ignored by your anti-virus software. - `$ git clone --recurse-submodules https://github.com/mandiant/capa.git /local/path/to/src` (HTTPS) - `$ git clone --recurse-submodules git@github.com:mandiant/capa.git /local/path/to/src` (SSH) -To only get the source code and our provided rules (common), follow these steps: +To only get the source code and our provided rules (a more common use-case), follow these steps: - clone repository - `$ git clone https://github.com/mandiant/capa.git /local/path/to/src` (HTTPS) - `$ git clone git@github.com:mandiant/capa.git /local/path/to/src` (SSH) - `$ cd /local/path/to/src` +- initialize the rules submodule and pull rules - `$ git submodule update --init rules` ### 2. Install the local source code @@ -76,8 +79,7 @@ You'll find that the `capa.exe` (Windows) or `capa` (Linux/MacOS) executables in For development, we recommend to use [venv](https://docs.python.org/3/tutorial/venv.html). It allows you to create a virtual environment: a self-contained directory tree that contains a Python installation for a particular version of Python, plus a number of additional packages. This approach avoids conflicts between the requirements of different applications on your computer. It also ensures that you don't overlook to add a new requirement to `setup.up` using a library already installed on your system. -To create an environment (in the parent directory, to avoid commiting it by accident or messing with the linters), run: -`$ python3 -m venv ../capa-env` +To create an environment (in the parent directory, to avoid commiting it by accident or messing with the linters), run: `$ python3 -m venv ../capa-env` To activate `capa-env` in Linux or MacOS, run: `$ source ../capa-env/bin/activate` @@ -90,8 +92,8 @@ For more details about creating and using virtual environments, check out the [v ##### Install development dependencies We use the following tools to ensure consistent code style and formatting: - - [black](https://github.com/psf/black) code formatter, with `-l 120` - - [isort 5](https://pypi.org/project/isort/) code formatter, with `--profile black --length-sort --line-width 120` + - [black](https://github.com/psf/black) code formatter + - [isort 5](https://pypi.org/project/isort/) code formatter - [dos2unix](https://linux.die.net/man/1/dos2unix) for UNIX-style LF newlines - [capafmt](https://github.com/mandiant/capa/blob/master/scripts/capafmt.py) rule formatter @@ -104,7 +106,7 @@ You can run it with the argument `no_tests` to skip the tests and only run the c ##### Setup hooks [optional] -If you plan to contribute to capa, you may want to setup the hooks. +If you plan to contribute to capa, you may want to setup the provided hooks. Run `scripts/setup-hooks.sh` to set the following hooks up: - The `pre-commit` hook runs checks before every `git commit`. It runs `scripts/ci.sh no_tests` aborting the commit if there are code style or rule linter offenses you need to fix. @@ -112,13 +114,17 @@ Run `scripts/setup-hooks.sh` to set the following hooks up: It runs `scripts/ci.sh` aborting the push if there are code style or rule linter offenses or if the tests fail. This way you can ensure everything is alright before sending a pull request. -You can skip the checks by using the `--no-verify` git option. +You can skip the checks by using the `-n`/`--no-verify` git option. ### 3. Compile binary using PyInstaller -We compile capa standalone binaries using PyInstaller. To reproduce the build process check out the source code as described above and follow these steps. +We compile capa standalone binaries using PyInstaller. To reproduce the build process check out the source code as described above and follow the following steps. #### Install PyInstaller: -`$ pip install pyinstaller` (Python 3) +`$ pip install pyinstaller` + +Or install capa with build dependencies: + +`$ pip install -e /local/path/to/src[build]` #### Run Pyinstaller `$ pyinstaller .github/pyinstaller/pyinstaller.spec` diff --git a/doc/release.md b/doc/release.md index cd26fd84..ec950e88 100644 --- a/doc/release.md +++ b/doc/release.md @@ -3,7 +3,7 @@ - [ ] Ensure all [milestoned issues/PRs](https://github.com/mandiant/capa/milestones) are addressed, or reassign to a new milestone. - [ ] Add the `dont merge` label to all PRs that are close to be ready to merge (or merge them if they are ready) in [capa](https://github.com/mandiant/capa/pulls) and [capa-rules](https://github.com/mandiant/capa-rules/pulls). - [ ] Ensure the [CI workflow succeeds in master](https://github.com/mandiant/capa/actions/workflows/tests.yml?query=branch%3Amaster). -- [ ] Ensure that `python scripts/lint.py rules/ --thorough` succeeds (only `missing examples` offenses are allowed in the nursery). +- [ ] Ensure that `python scripts/lint.py rules/ --thorough` succeeds (only `missing examples` offenses are allowed in the nursery). - [ ] Review changes - capa https://github.com/mandiant/capa/compare/\...master - capa-rules https://github.com/mandiant/capa-rules/compare/\\...master @@ -37,13 +37,10 @@ - [ ] Update [capa/version.py](https://github.com/mandiant/capa/blob/master/capa/version.py) - [ ] Create a PR with the updated [CHANGELOG.md](https://github.com/mandiant/capa/blob/master/CHANGELOG.md) and [capa/version.py](https://github.com/mandiant/capa/blob/master/capa/version.py). Copy this checklist in the PR description. - [ ] After PR review, merge the PR and [create the release in GH](https://github.com/mandiant/capa/releases/new) using text from the [CHANGELOG.md](https://github.com/mandiant/capa/blob/master/CHANGELOG.md). -- [ ] Verify GH actions [upload artifacts](https://github.com/mandiant/capa/releases), [publish to PyPI](https://pypi.org/project/flare-capa) and [create a tag in capa rules](https://github.com/mandiant/capa-rules/tags) upon completion. -- [ ] Manually update capa rules major version rule branch - ```commandline - [capa/rules] $ git pull master - [capa/rules] $ git checkout v3 # create if new major version: git checkout -b vX - [capa/rules] $ git merge master - [capa/rules] $ git push origin v3 - ``` +- Verify GH actions + - [ ] [upload artifacts](https://github.com/mandiant/capa/releases) + - [ ] [publish to PyPI](https://pypi.org/project/flare-capa) + - [ ] [create tag in capa rules](https://github.com/mandiant/capa-rules/tags) + - [ ] [create release in capa rules](https://github.com/mandiant/capa-rules/releases) - [ ] [Spread the word](https://twitter.com) - [ ] Update internal service diff --git a/doc/rules.md b/doc/rules.md index be68e00b..170379e0 100644 --- a/doc/rules.md +++ b/doc/rules.md @@ -1,6 +1,5 @@ ### rules - capa uses a collection of rules to identify capabilities within a program. The [github.com/mandiant/capa-rules](https://github.com/mandiant/capa-rules) repository contains hundreds of standard library rules that are distributed with capa. @@ -12,8 +11,8 @@ $ capa suspicious.exe However, you may want to modify the rules for a variety of reasons: - - develop new rules to find behaviors, and/or - - tweak existing rules to reduce false positives, and/or + - develop new rules to find behaviors, + - tweak existing rules to reduce false positives, - collect a private selection of rules not shared publicly. Or, you may want to use capa as a Python library within another application. @@ -21,22 +20,18 @@ Or, you may want to use capa as a Python library within another application. In these scenarios, you must provide the rule set to capa as a directory on your file system. Do this using the `-r`/`--rules` parameter: ```console -$ capa --rules /local/path/to/rules suspicious.exe +$ capa --rules /local/path/to/rules suspicious.exe ``` -You can collect the standard set of rules in two ways: +You can download the standard set of rules as ZIP or TGZ archives from the [capa-rules release page](https://github.com/mandiant/capa-rules/releases). - - [download from the Github releases page](#download-release-archive), or - - [clone from Github](#clone-with-git). - -Note that you must use match the rules major version with the capa major version, -i.e., use `v1` rules with `v1` of capa. +Note that you must use match the rules major version with the capa major version, i.e., use `v1` rules with `v1` of capa. This is so that new versions of capa can update rule syntax, such as by adding new fields and logic. Otherwise, using rules with a mismatched version of capa may lead to errors like: ``` -$ capa --rules /path/to/mismatched/rules suspicious.exe +$ capa --rules /path/to/mismatched/rules suspicious.exe ERROR:lint:invalid rule: injection.yml: invalid rule: unexpected statement: instruction ``` @@ -46,27 +41,3 @@ You can check the version of capa you're currently using like this: $ capa --version capa 3.0.3 ``` - -#### download release archive - -The releases page is [here](https://github.com/mandiant/capa-rules/tags/). -Find the most recent release corresponding to your major version of capa and download the ZIP archive. -Here are some quick links: - - v1: [v1](https://github.com/mandiant/capa-rules/releases/tag/v1) - - v2: [v2](https://github.com/mandiant/capa-rules/releases/tag/v2) - - v3: [v3](https://github.com/mandiant/capa-rules/releases/tag/v3) - -#### clone with git - -To fetch with git, clone the appropriate branch like this: - -```console -$ git clone https://github.com/mandiant/capa-rules.git -b v3 /local/path/to/rules -``` - -Note that the branch name (`v3` in the example above) must match the major version of capa you're using. - - - [v1](https://github.com/mandiant/capa-rules/tree/v1): `v1` - - [v2](https://github.com/mandiant/capa-rules/tree/v2): `v2` - - [v3](https://github.com/mandiant/capa-rules/tree/v3): `v3` - From b68be0c2cec6f45d6fedd118bde85ebe55c04f45 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Wed, 21 Dec 2022 15:59:29 -0700 Subject: [PATCH 61/74] dotnet: emit namespace/class features for type references (#1242) * dotnet: emit namespace/class features for type references * dotnet: pre-compute .NET token caches --- CHANGELOG.md | 1 + capa/features/extractors/dnfile/extractor.py | 56 +++++++- capa/features/extractors/dnfile/helpers.py | 80 +++-------- capa/features/extractors/dnfile/insn.py | 144 +++++++------------ capa/features/extractors/dnfile/types.py | 75 ++++++++++ tests/fixtures.py | 6 + 6 files changed, 206 insertions(+), 156 deletions(-) create mode 100644 capa/features/extractors/dnfile/types.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 2985c720..e8f5f4e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Python 3.11 support #1192 @williballenthin - dotnet: emit calls to/from MethodDef methods #1236 @mike-hunhoff - dotnet: emit namespace/class features for ldvirtftn/ldftn instructions #1241 @mike-hunhoff +- dotnet: emit namespace/class features for type references #1242 @mike-hunhoff ### Breaking Changes diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py index 3adb4947..bd4b9c9e 100644 --- a/capa/features/extractors/dnfile/extractor.py +++ b/capa/features/extractors/dnfile/extractor.py @@ -8,7 +8,8 @@ from __future__ import annotations -from typing import Dict, List, Tuple, Iterator, Optional +from enum import Enum +from typing import Dict, List, Tuple, Union, Iterator, Optional import dnfile from dncil.cil.opcode import OpCodes @@ -19,8 +20,51 @@ import capa.features.extractors.dnfile.insn import capa.features.extractors.dnfile.function from capa.features.common import Feature from capa.features.address import NO_ADDRESS, Address, DNTokenAddress, DNTokenOffsetAddress +from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor -from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies +from capa.features.extractors.dnfile.helpers import ( + get_dotnet_types, + get_dotnet_fields, + get_dotnet_managed_imports, + get_dotnet_managed_methods, + get_dotnet_unmanaged_imports, + get_dotnet_managed_method_bodies, +) + + +class DnFileFeatureExtractorCache: + def __init__(self, pe: dnfile.dnPE): + self.imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.native_imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.methods: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.fields: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.types: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} + + for import_ in get_dotnet_managed_imports(pe): + self.imports[import_.token] = import_ + for native_import in get_dotnet_unmanaged_imports(pe): + self.native_imports[native_import.token] = native_import + for method in get_dotnet_managed_methods(pe): + self.methods[method.token] = method + for field in get_dotnet_fields(pe): + self.fields[field.token] = field + for type_ in get_dotnet_types(pe): + self.types[type_.token] = type_ + + def get_import(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]: + return self.imports.get(token, None) + + def get_native_import(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]: + return self.native_imports.get(token, None) + + def get_method(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]: + return self.methods.get(token, None) + + def get_field(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]: + return self.fields.get(token, None) + + def get_type(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]: + return self.types.get(token, None) class DnfileFeatureExtractor(FeatureExtractor): @@ -28,6 +72,10 @@ class DnfileFeatureExtractor(FeatureExtractor): super().__init__() self.pe: dnfile.dnPE = dnfile.dnPE(path) + # pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction + # most relevant at instruction scope + self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe) + # pre-compute these because we'll yield them at *every* scope. self.global_features: List[Tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe)) @@ -47,7 +95,9 @@ class DnfileFeatureExtractor(FeatureExtractor): methods: Dict[Address, FunctionHandle] = {} for (token, method) in get_dotnet_managed_method_bodies(self.pe): fh: FunctionHandle = FunctionHandle( - address=DNTokenAddress(token), inner=method, ctx={"pe": self.pe, "calls_from": set(), "calls_to": set()} + address=DNTokenAddress(token), + inner=method, + ctx={"pe": self.pe, "calls_from": set(), "calls_to": set(), "cache": self.token_cache}, ) # method tokens should be unique diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 086ad3f8..d79d802b 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -18,6 +18,7 @@ from dncil.clr.token import Token, StringToken, InvalidToken from dncil.cil.body.reader import CilMethodBodyReaderBase from capa.features.common import FeatureAccess +from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod logger = logging.getLogger(__name__) @@ -40,70 +41,6 @@ class DnfileMethodBodyReader(CilMethodBodyReaderBase): return self.offset -class DnType(object): - def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None): - self.token = token - # property access - self.access = access - self.namespace = namespace - self.class_ = class_ - if member == ".ctor": - member = "ctor" - if member == ".cctor": - member = "cctor" - self.member = member - - def __hash__(self): - return hash((self.token, self.access, self.namespace, self.class_, self.member)) - - def __eq__(self, other): - return ( - self.token == other.token - and self.access == other.access - and self.namespace == other.namespace - and self.class_ == other.class_ - and self.member == other.member - ) - - def __str__(self): - return DnType.format_name(self.class_, namespace=self.namespace, member=self.member) - - def __repr__(self): - return str(self) - - @staticmethod - def format_name(class_: str, namespace: str = "", member: str = ""): - # like File::OpenRead - name: str = f"{class_}::{member}" if member else class_ - if namespace: - # like System.IO.File::OpenRead - name = f"{namespace}.{name}" - return name - - -class DnUnmanagedMethod: - def __init__(self, token: int, module: str, method: str): - self.token: int = token - self.module: str = module - self.method: str = method - - def __hash__(self): - return hash((self.token, self.module, self.method)) - - def __eq__(self, other): - return self.token == other.token and self.module == other.module and self.method == other.method - - def __str__(self): - return DnUnmanagedMethod.format_name(self.module, self.method) - - def __repr__(self): - return str(self) - - @staticmethod - def format_name(module, method): - return f"{module}.{method}" - - def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Union[dnfile.base.MDTableRow, InvalidToken, str]: """map generic token to string or table row""" assert pe.net is not None @@ -363,6 +300,21 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod] yield DnUnmanagedMethod(token, module, method) +def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: + """get .NET types from TypeDef and TypeRef tables""" + for (rid, typedef) in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + + typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) + yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) + + for (rid, typeref) in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): + assert isinstance(typeref, dnfile.mdtable.TypeRefRow) + + typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) + yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) + + def calculate_dotnet_token_value(table: int, rid: int) -> int: return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index 2e8b7b73..fb95d5cd 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -9,7 +9,10 @@ from __future__ import annotations import logging -from typing import Dict, Tuple, Union, Iterator, Optional +from typing import TYPE_CHECKING, Any, Dict, Tuple, Union, Iterator, Optional + +if TYPE_CHECKING: + from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache import dnfile from dncil.clr.token import Token, StringToken, InvalidToken @@ -19,78 +22,42 @@ import capa.features.extractors.helpers from capa.features.insn import API, Number, Property from capa.features.common import Class, String, Feature, Namespace, FeatureAccess, Characteristic from capa.features.address import Address +from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle from capa.features.extractors.dnfile.helpers import ( - DnType, - DnUnmanagedMethod, - get_dotnet_fields, resolve_dotnet_token, read_dotnet_user_string, - get_dotnet_managed_imports, - get_dotnet_managed_methods, calculate_dotnet_token_value, - get_dotnet_unmanaged_imports, ) logger = logging.getLogger(__name__) -def get_managed_imports(ctx: Dict) -> Dict: - if "managed_imports_cache" not in ctx: - ctx["managed_imports_cache"] = {} - for method in get_dotnet_managed_imports(ctx["pe"]): - ctx["managed_imports_cache"][method.token] = method - return ctx["managed_imports_cache"] - - -def get_unmanaged_imports(ctx: Dict) -> Dict: - if "unmanaged_imports_cache" not in ctx: - ctx["unmanaged_imports_cache"] = {} - for imp in get_dotnet_unmanaged_imports(ctx["pe"]): - ctx["unmanaged_imports_cache"][imp.token] = imp - return ctx["unmanaged_imports_cache"] - - -def get_methods(ctx: Dict) -> Dict: - if "methods_cache" not in ctx: - ctx["methods_cache"] = {} - for method in get_dotnet_managed_methods(ctx["pe"]): - ctx["methods_cache"][method.token] = method - return ctx["methods_cache"] - - -def get_fields(ctx: Dict) -> Dict: - if "fields_cache" not in ctx: - ctx["fields_cache"] = {} - for field in get_dotnet_fields(ctx["pe"]): - ctx["fields_cache"][field.token] = field - return ctx["fields_cache"] - - -def get_callee(ctx: Dict, token: Token) -> Union[DnType, DnUnmanagedMethod, None]: +def get_callee( + pe: dnfile.dnPE, cache: DnFileFeatureExtractorCache, token: Token +) -> Optional[Union[DnType, DnUnmanagedMethod]]: """map .NET token to un/managed (generic) method""" - row: Union[dnfile.base.MDTableRow, InvalidToken, str] = resolve_dotnet_token(ctx["pe"], token) - if not isinstance(row, (dnfile.mdtable.MethodDefRow, dnfile.mdtable.MemberRefRow, dnfile.mdtable.MethodSpecRow)): - # we only handle MethodDef (internal), MemberRef (external), and MethodSpec (generic) - return None - token_: int - if isinstance(row, dnfile.mdtable.MethodSpecRow): + if token.table == dnfile.mdtable.MethodSpec.number: # map MethodSpec to MethodDef or MemberRef + row: Union[dnfile.base.MDTableRow, InvalidToken, str] = resolve_dotnet_token(pe, token) + assert isinstance(row, dnfile.mdtable.MethodSpecRow) + if row.Method.table is None: logger.debug("MethodSpec[0x%X] Method table is None", token.rid) return None + token_ = calculate_dotnet_token_value(row.Method.table.number, row.Method.row_index) else: token_ = token.value - callee: Union[DnType, DnUnmanagedMethod, None] = get_managed_imports(ctx).get(token_, None) + callee: Optional[Union[DnType, DnUnmanagedMethod]] = cache.get_import(token_) if callee is None: # we must check unmanaged imports before managed methods because we map forwarded managed methods # to their unmanaged imports; we prefer a forwarded managed method be mapped to its unmanaged import for analysis - callee = get_unmanaged_imports(ctx).get(token_, None) + callee = cache.get_native_import(token_) if callee is None: - callee = get_methods(ctx).get(token_, None) + callee = cache.get_method(token_) return callee @@ -104,7 +71,7 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato ): return - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) + callee: Optional[Union[DnType, DnUnmanagedMethod]] = get_callee(fh.ctx["pe"], fh.ctx["cache"], ih.inner.operand) if isinstance(callee, DnType): # ignore methods used to access properties if callee.access is None: @@ -123,7 +90,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It if ih.inner.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp): # property access via MethodDef or MemberRef - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) + callee: Optional[Union[DnType, DnUnmanagedMethod]] = get_callee(fh.ctx["pe"], fh.ctx["cache"], ih.inner.operand) if isinstance(callee, DnType): if callee.access is not None: name = str(callee) @@ -131,14 +98,14 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It elif ih.inner.opcode in (OpCodes.Ldfld, OpCodes.Ldflda, OpCodes.Ldsfld, OpCodes.Ldsflda): # property read via Field - read_field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) + read_field: Optional[Union[DnType, DnUnmanagedMethod]] = fh.ctx["cache"].get_field(ih.inner.operand.value) if read_field is not None: name = str(read_field) access = FeatureAccess.READ elif ih.inner.opcode in (OpCodes.Stfld, OpCodes.Stsfld): # property write via Field - write_field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) + write_field: Optional[Union[DnType, DnUnmanagedMethod]] = fh.ctx["cache"].get_field(ih.inner.operand.value) if write_field is not None: name = str(write_field) access = FeatureAccess.WRITE @@ -149,8 +116,12 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It yield Property(name), ih.address -def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Class, Address]]: - """parse instruction class features""" +def extract_insn_namespace_class_features( + fh: FunctionHandle, bh, ih: InsnHandle +) -> Iterator[Tuple[Union[Namespace, Class], Address]]: + """parse instruction namespace and class features""" + type_: Optional[Union[DnType, DnUnmanagedMethod]] = None + if ih.inner.opcode in ( OpCodes.Call, OpCodes.Callvirt, @@ -160,9 +131,7 @@ def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Itera OpCodes.Newobj, ): # method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef) - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) - if isinstance(callee, DnType): - yield Class(DnType.format_name(callee.class_, namespace=callee.namespace)), ih.address + type_ = get_callee(fh.ctx["pe"], fh.ctx["cache"], ih.inner.operand) elif ih.inner.opcode in ( OpCodes.Ldfld, @@ -173,37 +142,35 @@ def extract_insn_class_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Itera OpCodes.Stsfld, ): # field access - field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) - if isinstance(field, DnType): - yield Class(DnType.format_name(field.class_, namespace=field.namespace)), ih.address - - -def extract_insn_namespace_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Namespace, Address]]: - """parse instruction namespace features""" - if ih.inner.opcode in ( - OpCodes.Call, - OpCodes.Callvirt, - OpCodes.Jmp, - OpCodes.Ldvirtftn, - OpCodes.Ldftn, - OpCodes.Newobj, - ): - # method call - includes managed methods (MethodDef, TypeRef) and properties (MethodSemantics, TypeRef) - callee: Union[DnType, DnUnmanagedMethod, None] = get_callee(fh.ctx, ih.inner.operand) - if isinstance(callee, DnType) and callee.namespace is not None: - yield Namespace(callee.namespace), ih.address + type_ = fh.ctx["cache"].get_field(ih.inner.operand.value) + # ECMA 335 VI.C.4.10 elif ih.inner.opcode in ( - OpCodes.Ldfld, - OpCodes.Ldflda, - OpCodes.Ldsfld, - OpCodes.Ldsflda, - OpCodes.Stfld, - OpCodes.Stsfld, + OpCodes.Initobj, + OpCodes.Box, + OpCodes.Castclass, + OpCodes.Cpobj, + OpCodes.Isinst, + OpCodes.Ldelem, + OpCodes.Ldelema, + OpCodes.Ldobj, + OpCodes.Mkrefany, + OpCodes.Newarr, + OpCodes.Refanyval, + OpCodes.Sizeof, + OpCodes.Stobj, + OpCodes.Unbox, + OpCodes.Constrained, + OpCodes.Stelem, + OpCodes.Unbox_Any, ): - field: Optional[DnType] = get_fields(fh.ctx).get(ih.inner.operand.value, None) - if isinstance(field, DnType) and field.namespace is not None: - yield Namespace(field.namespace), ih.address + # type access + type_ = fh.ctx["cache"].get_type(ih.inner.operand.value) + + if isinstance(type_, DnType): + yield Class(DnType.format_name(type_.class_, namespace=type_.namespace)), ih.address + if type_.namespace: + yield Namespace(type_.namespace), ih.address def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: @@ -230,7 +197,7 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter def extract_unmanaged_call_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle ) -> Iterator[Tuple[Characteristic, Address]]: - if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp): return row: Union[str, InvalidToken, dnfile.base.MDTableRow] = resolve_dotnet_token(fh.ctx["pe"], ih.inner.operand) @@ -254,7 +221,6 @@ INSTRUCTION_HANDLERS = ( extract_insn_property_features, extract_insn_number_features, extract_insn_string_features, - extract_insn_namespace_features, - extract_insn_class_features, + extract_insn_namespace_class_features, extract_unmanaged_call_characteristic_features, ) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py new file mode 100644 index 00000000..822b5d67 --- /dev/null +++ b/capa/features/extractors/dnfile/types.py @@ -0,0 +1,75 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from enum import Enum +from typing import Union, Optional + + +class DnType(object): + def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None): + self.token: int = token + self.access: Optional[str] = access + self.namespace: str = namespace + self.class_: str = class_ + + if member == ".ctor": + member = "ctor" + if member == ".cctor": + member = "cctor" + + self.member: str = member + + def __hash__(self): + return hash((self.token, self.access, self.namespace, self.class_, self.member)) + + def __eq__(self, other): + return ( + self.token == other.token + and self.access == other.access + and self.namespace == other.namespace + and self.class_ == other.class_ + and self.member == other.member + ) + + def __str__(self): + return DnType.format_name(self.class_, namespace=self.namespace, member=self.member) + + def __repr__(self): + return str(self) + + @staticmethod + def format_name(class_: str, namespace: str = "", member: str = ""): + # like File::OpenRead + name: str = f"{class_}::{member}" if member else class_ + if namespace: + # like System.IO.File::OpenRead + name = f"{namespace}.{name}" + return name + + +class DnUnmanagedMethod: + def __init__(self, token: int, module: str, method: str): + self.token: int = token + self.module: str = module + self.method: str = method + + def __hash__(self): + return hash((self.token, self.module, self.method)) + + def __eq__(self, other): + return self.token == other.token and self.module == other.module and self.method == other.method + + def __str__(self): + return DnUnmanagedMethod.format_name(self.module, self.method) + + def __repr__(self): + return str(self) + + @staticmethod + def format_name(module, method): + return f"{module}.{method}" diff --git a/tests/fixtures.py b/tests/fixtures.py index 05373d17..ce0effc4 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -760,6 +760,12 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted( ("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True), ("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False), ("_692f", "token=0x6000004", capa.features.insn.API("System.Linq.Enumerable::First"), True), # generic method + ( + "_692f", + "token=0x6000004", + capa.features.insn.Property("System.Linq.Enumerable::First"), + False, + ), # generic method ("_692f", "token=0x6000004", capa.features.common.Namespace("System.Linq"), True), # generic method ("_692f", "token=0x6000004", capa.features.common.Class("System.Linq.Enumerable"), True), # generic method ("_1c444", "token=0x6000020", capa.features.common.Namespace("Reqss"), True), # ldftn From ee90fc876102aba65a3ba82ad3ac6511d21d4519 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Fri, 23 Dec 2022 18:30:25 +0100 Subject: [PATCH 62/74] update rule compatibility doc --- capa/ida/plugin/form.py | 8 +++----- capa/main.py | 8 ++++---- capa/version.py | 8 -------- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index eb205ae6..e2bde4e8 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -21,6 +21,7 @@ from PyQt5 import QtGui, QtCore, QtWidgets import capa.main import capa.rules import capa.engine +import capa.version import capa.ida.helpers import capa.render.json import capa.features.common @@ -701,16 +702,13 @@ class CapaExplorerForm(idaapi.PluginForm): ) logger.error("Failed to load rules from %s (error: %s).", settings.user[CAPA_SETTINGS_RULE_PATH], e) logger.error( - "Make sure your file directory contains properly formatted capa rules. You can download the standard collection of capa rules from https://github.com/mandiant/capa-rules." + "Make sure your file directory contains properly formatted capa rules. You can download the standard " + "collection of capa rules from https://github.com/mandiant/capa-rules/releases." ) logger.error( "Please ensure you're using the rules that correspond to your major version of capa (%s)", capa.version.get_major_version(), ) - logger.error( - "You can check out these rules with the following command:\n %s", - capa.version.get_rules_checkout_command(), - ) logger.error( "Or, for more details, see the rule set documentation here: %s", "https://github.com/mandiant/capa/blob/master/doc/rules.md", diff --git a/capa/main.py b/capa/main.py index c973b61d..2641e7fa 100644 --- a/capa/main.py +++ b/capa/main.py @@ -1034,12 +1034,12 @@ def main(argv=None): except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e: logger.error("%s", str(e)) logger.error( - "Please ensure you're using the rules that correspond to your major version of capa (%s)", - capa.version.get_major_version(), + "Make sure your file directory contains properly formatted capa rules. You can download the standard " + "collection of capa rules from https://github.com/mandiant/capa-rules/releases." ) logger.error( - "You can check out these rules with the following command:\n %s", - capa.version.get_rules_checkout_command(), + "Please ensure you're using the rules that correspond to your major version of capa (%s)", + capa.version.get_major_version(), ) logger.error( "Or, for more details, see the rule set documentation here: %s", diff --git a/capa/version.py b/capa/version.py index 740d7f28..af021c70 100644 --- a/capa/version.py +++ b/capa/version.py @@ -3,11 +3,3 @@ __version__ = "4.0.1" def get_major_version(): return int(__version__.partition(".")[0]) - - -def get_rules_branch(): - return f"v{get_major_version()}" - - -def get_rules_checkout_command(): - return f"$ git clone https://github.com/mandiant/capa-rules.git -b {get_rules_branch()} /local/path/to/rules" From b12865f1e5f69ac2209ba8512d74494edddafdee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Dec 2022 14:04:30 +0000 Subject: [PATCH 63/74] build(deps-dev): bump isort from 5.11.3 to 5.11.4 Bumps [isort](https://github.com/pycqa/isort) from 5.11.3 to 5.11.4. - [Release notes](https://github.com/pycqa/isort/releases) - [Changelog](https://github.com/PyCQA/isort/blob/main/CHANGELOG.md) - [Commits](https://github.com/pycqa/isort/compare/5.11.3...5.11.4) --- updated-dependencies: - dependency-name: isort dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5e329451..9095bcdc 100644 --- a/setup.py +++ b/setup.py @@ -75,7 +75,7 @@ setuptools.setup( "pytest-cov==4.0.0", "pycodestyle==2.10.0", "black==22.12.0", - "isort==5.11.3", + "isort==5.11.4", "mypy==0.991", "psutil==5.9.2", "stix2==3.0.1", From 5f772001087b3e3749f48e8b08d9b94bf968a559 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 29 Dec 2022 07:08:10 -0700 Subject: [PATCH 64/74] explorer: assume 32-bit displacement for offsets (#1250) * explorer: assume 32-bit displacement for offsets --- CHANGELOG.md | 1 + capa/features/extractors/ida/helpers.py | 3 ++- capa/features/extractors/ida/insn.py | 6 +++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8f5f4e2..9a3f20e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,7 @@ - fix: accept only plaintext pasted content #1194 @williballenthin - fix: UnboundLocalError #1217 @williballenthin - extractor: add support for COFF files and extern functions #1223 @mike-hunhoff +- fix: assume 32-bit displacement for offsets #1250 @mike-hunhoff ### Development diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 5c997f69..3b411654 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -221,7 +221,8 @@ def get_op_phrase_info(op: idaapi.op_t) -> Dict: return {} scale = 1 << ((op.specflag2 & 0xC0) >> 6) - offset = op.addr + # IDA ea_t may be 32- or 64-bit; we assume displacement can only be 32-bit + offset = op.addr & 0xFFFFFFFF if op.specflag1 == 0: index = None diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index da9e1387..b160cbc6 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -213,7 +213,11 @@ def extract_insn_offset_features( continue p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op) - op_off = p_info.get("offset", 0) + + op_off = p_info.get("offset", None) + if op_off is None: + continue + if idaapi.is_mapped(op_off): # Ignore: # mov esi, dword_1005B148[esi] From 1dd5a8dbf2f6293a072f94f33c5db99d1369f49b Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 2 Jan 2023 17:31:53 +0000 Subject: [PATCH 65/74] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a3f20e3..e1f7606f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ ### Breaking Changes -### New Rules (32) +### New Rules (33) - collection/use-dotnet-library-sharpclipboard @johnk3r - data-manipulation/encryption/aes/use-dotnet-library-encryptdecryptutils @johnk3r @@ -47,6 +47,7 @@ - nursery/encrypt-data-using-aes william.ballenthin@mandiant.com Ivan Kwiatkowski (@JusticeRage) - host-interaction/uac/bypass/bypass-uac-via-rpc david.cannings@pwc.com david@edeca.net - nursery/check-for-vm-using-instruction-vpcext richard.weiss@mandiant.com +- nursery/get-windows-directory-from-kuser_shared_data david.cannings@pwc.com - ### Bug Fixes diff --git a/README.md b/README.md index d054a7ec..acda0e9c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-733-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-734-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 5ba70c97..f8bc46cd 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 5ba70c97d22dd59efcf29a128557e64213f7ace8 +Subproject commit f8bc46cd1b9ff3494caeb7234b05cfc4de50113f From 565b002bfeea91e7feefd9e9d94f97bcbc1de7e8 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Mon, 2 Jan 2023 17:33:19 +0000 Subject: [PATCH 66/74] Sync capa rules submodule --- CHANGELOG.md | 5 ++++- README.md | 2 +- rules | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1f7606f..70275804 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ ### Breaking Changes -### New Rules (33) +### New Rules (36) - collection/use-dotnet-library-sharpclipboard @johnk3r - data-manipulation/encryption/aes/use-dotnet-library-encryptdecryptutils @johnk3r @@ -48,6 +48,9 @@ - host-interaction/uac/bypass/bypass-uac-via-rpc david.cannings@pwc.com david@edeca.net - nursery/check-for-vm-using-instruction-vpcext richard.weiss@mandiant.com - nursery/get-windows-directory-from-kuser_shared_data david.cannings@pwc.com +- nursery/encrypt-data-using-openssl-dsa Ana06 +- nursery/encrypt-data-using-openssl-ecdsa Ana06 +- nursery/encrypt-data-using-openssl-rsa Ana06 - ### Bug Fixes diff --git a/README.md b/README.md index acda0e9c..2fda15dd 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-734-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-737-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index f8bc46cd..cc9e69b6 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit f8bc46cd1b9ff3494caeb7234b05cfc4de50113f +Subproject commit cc9e69b615640469b34cae12f3569db19e7bea7f From 5f39083df6a49f7e0c44614d504b9440733c6dc0 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 3 Jan 2023 10:17:36 +0000 Subject: [PATCH 67/74] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index da6fed53..e2782b30 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit da6fed53395be292ffec57a2732f0f6105c03487 +Subproject commit e2782b30f644a8bf652763047319ccad4476d227 From 966e38babfe83443c7a0ba83ce5e01d371b24e18 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 3 Jan 2023 10:19:17 +0000 Subject: [PATCH 68/74] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index cc9e69b6..63b63f85 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit cc9e69b615640469b34cae12f3569db19e7bea7f +Subproject commit 63b63f856d75a603213610f569dc8f58e14e29dd From 2219139605bb1dc0ccfb1b1f4fde4872724c498b Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 3 Jan 2023 10:20:18 +0000 Subject: [PATCH 69/74] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index e2782b30..4784dee3 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit e2782b30f644a8bf652763047319ccad4476d227 +Subproject commit 4784dee36e23a68c98bab09ec5b21cc7d16e84ff From c0a8a9128142664e080774dec449db3a984b0ebe Mon Sep 17 00:00:00 2001 From: mr-tz Date: Mon, 2 Jan 2023 19:14:52 +0100 Subject: [PATCH 70/74] update Actions --- .github/workflows/build.yml | 2 +- .github/workflows/publish.yml | 2 +- .github/workflows/tests.yml | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e793376c..52be6841 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,7 +33,7 @@ jobs: submodules: true # using Python 3.8 to support running across multiple operating systems including Windows 7 - name: Set up Python 3.8 - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: 3.8 - if: matrix.os == 'ubuntu-18.04' diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 338fc0a6..65278522 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -13,7 +13,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: '3.7' - name: Install dependencies diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4c25a31c..6678e0aa 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -28,7 +28,7 @@ jobs: - name: Checkout capa uses: actions/checkout@v3 - name: Set up Python 3.8 - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: "3.8" - name: Install dependencies @@ -50,7 +50,7 @@ jobs: with: submodules: recursive - name: Set up Python 3.8 - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: "3.8" - name: Install capa @@ -80,7 +80,7 @@ jobs: with: submodules: recursive - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install pyyaml From 974d79f2be35eb89becb2abbd845e251bd4e61bc Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 3 Jan 2023 10:42:41 +0000 Subject: [PATCH 71/74] Sync capa rules submodule --- CHANGELOG.md | 3 ++- README.md | 2 +- rules | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70275804..eeffe25a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ ### Breaking Changes -### New Rules (36) +### New Rules (37) - collection/use-dotnet-library-sharpclipboard @johnk3r - data-manipulation/encryption/aes/use-dotnet-library-encryptdecryptutils @johnk3r @@ -51,6 +51,7 @@ - nursery/encrypt-data-using-openssl-dsa Ana06 - nursery/encrypt-data-using-openssl-ecdsa Ana06 - nursery/encrypt-data-using-openssl-rsa Ana06 +- runtime/dotnet/execute-via-dotnet-startup-hook william.ballenthin@mandiant.com - ### Bug Fixes diff --git a/README.md b/README.md index 2fda15dd..b2e38cab 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-737-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-738-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 63b63f85..ef9e0ebc 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 63b63f856d75a603213610f569dc8f58e14e29dd +Subproject commit ef9e0ebc673e6e40d8a7f3ef4a35327e10feae74 From 4f75b3d9f6697d1f45f7c7e3e1a5bb303fe7e420 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 3 Jan 2023 10:46:49 +0000 Subject: [PATCH 72/74] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index ef9e0ebc..25910846 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit ef9e0ebc673e6e40d8a7f3ef4a35327e10feae74 +Subproject commit 25910846089949e3f39ea7d8572e98e2ccf6cc7e From c959506ae9a470a6e436959e3f55bf65c680656f Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 3 Jan 2023 14:58:40 +0000 Subject: [PATCH 73/74] Sync capa rules submodule --- CHANGELOG.md | 8 +++++++- README.md | 2 +- rules | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eeffe25a..8a355615 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ ### Breaking Changes -### New Rules (37) +### New Rules (43) - collection/use-dotnet-library-sharpclipboard @johnk3r - data-manipulation/encryption/aes/use-dotnet-library-encryptdecryptutils @johnk3r @@ -52,6 +52,12 @@ - nursery/encrypt-data-using-openssl-ecdsa Ana06 - nursery/encrypt-data-using-openssl-rsa Ana06 - runtime/dotnet/execute-via-dotnet-startup-hook william.ballenthin@mandiant.com +- host-interaction/console/manipulate-console-buffer william.ballenthin@mandiant.com michael.hunhoff@mandiant.com +- nursery/access-wmi-data-in-dotnet michael.hunhoff@mandiant.com +- nursery/allocate-unmanaged-memory-via-dotnet michael.hunhoff@mandiant.com +- nursery/generate-random-bytes-in-dotnet michael.hunhoff@mandiant.com +- nursery/manipulate-console-window michael.hunhoff@mandiant.com +- nursery/obfuscated-with-koivm michael.hunhoff@mandiant.com - ### Bug Fixes diff --git a/README.md b/README.md index b2e38cab..ccdf0db9 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-738-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-742-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/rules b/rules index 25910846..519b87e4 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 25910846089949e3f39ea7d8572e98e2ccf6cc7e +Subproject commit 519b87e44639b463b7b95518e1a998fac8701ec4 From 90591811dfb39a32f860cc5f56d62f84a7c001e2 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Tue, 3 Jan 2023 09:09:05 -0700 Subject: [PATCH 74/74] explorer: improve rules error messaging and documentation (#1249) --- CHANGELOG.md | 1 + capa/ida/plugin/README.md | 24 +++++++------ capa/ida/plugin/form.py | 72 +++++++++++++++++++++++++++------------ 3 files changed, 66 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a355615..85102707 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,6 +75,7 @@ - fix: accept only plaintext pasted content #1194 @williballenthin - fix: UnboundLocalError #1217 @williballenthin - extractor: add support for COFF files and extern functions #1223 @mike-hunhoff +- doc: improve error messaging and documentation related to capa rule set #1249 @mike-hunhoff - fix: assume 32-bit displacement for offsets #1250 @mike-hunhoff ### Development diff --git a/capa/ida/plugin/README.md b/capa/ida/plugin/README.md index 0a8883ef..23002b97 100644 --- a/capa/ida/plugin/README.md +++ b/capa/ida/plugin/README.md @@ -1,8 +1,8 @@ ![capa explorer](../../../.github/capa-explorer-logo.png) capa explorer is an IDAPython plugin that integrates the FLARE team's open-source framework, capa, with IDA Pro. capa is a framework that uses a well-defined collection of rules to -identify capabilities in a program. You can run capa against a PE file or shellcode and it tells you what it thinks the program can do. For example, it might suggest that -the program is a backdoor, can install services, or relies on HTTP to communicate. capa explorer runs capa directly against your IDA Pro database (IDB) without requiring access +identify capabilities in a program. You can run capa against a PE file, ELF file, or shellcode and it tells you what it thinks the program can do. For example, it might suggest that +the program is a backdoor, can install services, or relies on HTTP to communicate. capa explorer runs capa analysis on your IDA Pro database (IDB) without needing access to the original binary file. Once a database has been analyzed, capa explorer helps you identify interesting areas of a program and build new capa rules using features extracted from your IDB. We love using capa explorer during malware analysis because it teaches us what parts of a program suggest a behavior. As we click on rows, capa explorer jumps directly @@ -21,10 +21,10 @@ We can use capa explorer to navigate our Disassembly view directly to the suspec Using the `Rule Information` and `Details` columns capa explorer shows us that the suspect function matched `self delete via COMSPEC environment variable` because it contains capa rule matches for `create process`, `get COMSPEC environment variable`, and `query environment variable`, references to the strings `COMSPEC`, ` > nul`, and `/c del `, and calls to the Windows API functions `GetEnvironmentVariableA` and `ShellExecuteEx`. -capa explorer also helps you build new capa rules. To start select the `Rule Generator` tab, navigate to a function in your Disassembly view, +capa explorer also helps you build and test new capa rules. To start, select the `Rule Generator` tab, navigate to a function in your Disassembly view, and click `Analyze`. capa explorer will extract features from the function and display them in the `Features` pane. You can add features listed in this pane to the `Editor` pane by either double-clicking a feature or using multi-select + right-click to add multiple features at once. The `Preview` and `Editor` panes help edit your rule. Use the `Preview` pane -to modify the rule text directly and the `Editor` pane to construct and rearrange your hierarchy of statements and features. When you finish a rule you can save it directly to a file by clicking `Save`. +to modify rule text directly and the `Editor` pane to construct and rearrange your hierarchy of statements and features. When you finish a rule you can save it directly to a file by clicking `Save`. ![](../../../doc/img/rulegen_expanded.png) @@ -36,11 +36,15 @@ For more information on the FLARE team's open-source framework, capa, check out You can install capa explorer using the following steps: -1. Install capa and its dependencies from PyPI for the Python interpreter used by your IDA installation: +1. Install capa and its dependencies from PyPI using the Python interpreter configured for your IDA installation: ``` $ pip install flare-capa ``` -2. Download the [standard collection of capa rules](https://github.com/mandiant/capa-rules) (capa explorer needs capa rules to analyze a database) +2. Download and extract the [official capa rules](https://github.com/mandiant/capa-rules/releases) that match the version of capa you have installed + 1. Use the following command to view the version of capa you have installed: + ```commandline + $ pip show flare-capa + ``` 3. Copy [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ida/plugin/capa_explorer.py) to your IDA plugins directory ### Supported File Types @@ -59,15 +63,15 @@ capa explorer is limited to the file types supported by capa, which include: 3. Select the `Program Analysis` tab 4. Click the `Analyze` button -When running capa explorer for the first time you are prompted to select a file directory containing capa rules. The plugin conveniently -remembers your selection for future runs; you can change this selection and other default settings by clicking `Settings`. We recommend -downloading and using the [standard collection of capa rules](https://github.com/mandiant/capa-rules) when getting started with the plugin. +The first time you run capa explorer you will be asked to specify a local directory containing capa rules to use for analysis. We recommend downloading and extracting the [official capa rules](https://github.com/mandiant/capa-rules/releases) that match +the version of capa you have installed (see installation instructions above for more details). capa explorer remembers your selection for future analysis which you +can update using the `Settings` button. #### Tips for Program Analysis * Start analysis by clicking the `Analyze` button * Reset the plugin user interface and remove highlighting from your Disassembly view by clicking the `Reset` button -* Change your capa rules directory and other default settings by clicking `Settings` +* Change your local capa rules directory and other default settings by clicking the `Settings` button * Hover your cursor over a rule match to view the source content of the rule * Double-click the `Address` column to navigate your Disassembly view to the address of the associated feature * Double-click a result in the `Rule Information` column to expand its children diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index e2bde4e8..065b09f4 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -49,6 +49,11 @@ CAPA_SETTINGS_RULE_PATH = "rule_path" CAPA_SETTINGS_RULEGEN_AUTHOR = "rulegen_author" CAPA_SETTINGS_RULEGEN_SCOPE = "rulegen_scope" + +CAPA_OFFICIAL_RULESET_URL = f"https://github.com/mandiant/capa-rules/releases/tag/v{capa.version.__version__}" +CAPA_RULESET_DOC_URL = "https://github.com/mandiant/capa/blob/master/doc/rules.md" + + from enum import IntFlag @@ -214,6 +219,12 @@ class CapaSettingsInputDialog(QtWidgets.QDialog): self.edit_rule_path = QLineEditClicked(settings.user.get(CAPA_SETTINGS_RULE_PATH, "")) self.edit_rule_author = QtWidgets.QLineEdit(settings.user.get(CAPA_SETTINGS_RULEGEN_AUTHOR, "")) self.edit_rule_scope = QtWidgets.QComboBox() + self.edit_rules_link = QtWidgets.QLabel() + + self.edit_rules_link.setText( + f'Download and extract official capa rules' + ) + self.edit_rules_link.setOpenExternalLinks(True) scopes = ("file", "function", "basic block") @@ -223,7 +234,8 @@ class CapaSettingsInputDialog(QtWidgets.QDialog): buttons = QtWidgets.QDialogButtonBox(QtWidgets.QDialogButtonBox.Ok | QtWidgets.QDialogButtonBox.Cancel, self) layout = QtWidgets.QFormLayout(self) - layout.addRow("capa rules path", self.edit_rule_path) + layout.addRow("capa rules", self.edit_rule_path) + layout.addRow("", self.edit_rules_link) layout.addRow("Default rule author", self.edit_rule_author) layout.addRow("Default rule scope", self.edit_rule_scope) @@ -614,7 +626,7 @@ class CapaExplorerForm(idaapi.PluginForm): """ if post: if idaapi.get_imagebase() != meta.get("prev_base", -1): - capa.ida.helpers.inform_user_ida_ui("Running capa analysis again after program rebase") + capa.ida.helpers.inform_user_ida_ui("Running capa analysis using new program base") self.slot_analyze() else: meta["prev_base"] = idaapi.get_imagebase() @@ -629,15 +641,36 @@ class CapaExplorerForm(idaapi.PluginForm): try: # resolve rules directory - check self and settings first, then ask user if not os.path.exists(settings.user.get(CAPA_SETTINGS_RULE_PATH, "")): - idaapi.info("Please select a file directory containing capa rules.") + # configure rules selection messagebox + rules_message = QtWidgets.QMessageBox() + rules_message.setIcon(QtWidgets.QMessageBox.Information) + rules_message.setWindowTitle("capa explorer") + rules_message.setText("You must specify a directory containing capa rules before running analysis.") + rules_message.setInformativeText( + "Click 'Ok' to specify a local directory of rules or you can download and extract the official " + f"rules from the URL listed in the details." + ) + rules_message.setDetailedText(f"{CAPA_OFFICIAL_RULESET_URL}") + rules_message.setStandardButtons(QtWidgets.QMessageBox.Ok | QtWidgets.QMessageBox.Cancel) + + # display rules selection messagebox, check user button selection + pressed = rules_message.exec_() + if pressed == QtWidgets.QMessageBox.Cancel: + raise UserCancelledError() + path = self.ask_user_directory() if not path: - logger.warning( - "You must select a file directory containing capa rules before analysis can be run. The standard collection of capa rules can be downloaded from https://github.com/mandiant/capa-rules." - ) - return False + raise UserCancelledError() + settings.user[CAPA_SETTINGS_RULE_PATH] = path + except UserCancelledError as e: + capa.ida.helpers.inform_user_ida_ui("Analysis requires capa rules") + logger.warning( + f"You must specify a directory containing capa rules before running analysis. Download and extract the official rules from {CAPA_OFFICIAL_RULESET_URL} (recommended)." + ) + return False except Exception as e: + capa.ida.helpers.inform_user_ida_ui("Failed to load capa rules") logger.error("Failed to load capa rules (error: %s).", e) return False @@ -700,19 +733,16 @@ class CapaExplorerForm(idaapi.PluginForm): capa.ida.helpers.inform_user_ida_ui( "Failed to load capa rules from %s" % settings.user[CAPA_SETTINGS_RULE_PATH] ) - logger.error("Failed to load rules from %s (error: %s).", settings.user[CAPA_SETTINGS_RULE_PATH], e) + + logger.error("Failed to load capa rules from %s (error: %s).", settings.user[CAPA_SETTINGS_RULE_PATH], e) logger.error( - "Make sure your file directory contains properly formatted capa rules. You can download the standard " - "collection of capa rules from https://github.com/mandiant/capa-rules/releases." - ) - logger.error( - "Please ensure you're using the rules that correspond to your major version of capa (%s)", - capa.version.get_major_version(), - ) - logger.error( - "Or, for more details, see the rule set documentation here: %s", - "https://github.com/mandiant/capa/blob/master/doc/rules.md", + "Make sure your file directory contains properly " + "formatted capa rules. You can download and extract the official rules from %s. " + "Or, for more details, see the rules documentation here: %s", + CAPA_OFFICIAL_RULESET_URL, + CAPA_RULESET_DOC_URL, ) + settings.user[CAPA_SETTINGS_RULE_PATH] = "" return False @@ -831,7 +861,7 @@ class CapaExplorerForm(idaapi.PluginForm): self.model_data.render_capa_doc(self.doc, self.view_show_results_by_function.isChecked()) self.set_view_status_label( - "capa rules directory: %s (%d rules)" % (settings.user[CAPA_SETTINGS_RULE_PATH], len(self.rules_cache)) + "capa rules: %s (%d rules)" % (settings.user[CAPA_SETTINGS_RULE_PATH], len(self.rules_cache)) ) except Exception as e: logger.error("Failed to render results (error: %s)", e, exc_info=True) @@ -876,7 +906,7 @@ class CapaExplorerForm(idaapi.PluginForm): if not self.load_capa_rules(): return False else: - logger.info('Using cached ruleset, click "Reset" to reload rules from disk.') + logger.info('Using cached capa rules, click "Reset" to load rules from disk.') assert self.rules_cache is not None assert self.ruleset_cache is not None @@ -1034,7 +1064,7 @@ class CapaExplorerForm(idaapi.PluginForm): self.view_rulegen_status_label.clear() if not is_analyze: - # clear rules and ruleset cache only if user clicked "Reset" + # clear rules and rule set cache only if user clicked "Reset" self.rules_cache = None self.ruleset_cache = None