resolve merge conflict

2025-12-22 07:10:29 -08:00 · 2023-04-04 18:56:26 +05:30
parent 89c6c235f7 d46cf5b519
commit ccc51dab35
23 changed files with 475 additions and 116 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -15,7 +15,7 @@ jobs:
      fail-fast: true
      matrix:
        include:
-          - os: ubuntu-18.04
+          - os: ubuntu-20.04
            # use old linux so that the shared library versioning is more portable
            artifact_name: capa
            asset_name: linux
@@ -36,7 +36,7 @@ jobs:
        uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
        with:
          python-version: 3.8
-      - if: matrix.os == 'ubuntu-18.04'
+      - if: matrix.os == 'ubuntu-20.04'
        run: sudo apt-get install -y libyaml-dev
      - name: Upgrade pip, setuptools
        run: python -m pip install --upgrade pip setuptools
@@ -65,10 +65,7 @@ jobs:
      matrix:
        include:
          # OSs not already tested above
-          - os: ubuntu-18.04
+          - os: ubuntu-22.04
            artifact_name: capa
            asset_name: linux
          - os: ubuntu-20.04
            artifact_name: capa
            asset_name: linux
          - os: windows-2022
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -76,6 +76,8 @@ jobs:
            python-version: "3.8"
          - os: ubuntu-20.04
            python-version: "3.9"
          - os: ubuntu-20.04
            python-version: "3.10"
    steps:
    - name: Checkout capa with submodules
      uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
@@ -96,6 +98,7 @@ jobs:
  binja-tests:
    name: Binary Ninja tests for ${{ matrix.python-version }} on ${{ matrix.os }}
    runs-on: ubuntu-20.04
    needs: [code_style, rule_linter]
    strategy:
      fail-fast: false
      matrix:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,13 +4,14 @@
 ### New Features
 - add protobuf format for result documents #1219 @williballenthin @mr-tz 
 - extractor: add Binary Ninja feature extractor @xusheng6
 - new cli flag `--os` to override auto-detected operating system for a sample @captainGeech42
 - Change colour/highlight to "cyan" instead of "blue" for easy noticing.#1384 @ggold7046
 - add new format to parse output json back to capa #1396 @ooprathamm
 ### Breaking Changes
-### New Rules (22)
+### New Rules (26)
 - persistence/scheduled-tasks/schedule-task-via-at joren485
 - data-manipulation/prng/generate-random-numbers-via-rtlgenrandom william.ballenthin@mandiant.com
@@ -34,9 +35,14 @@
 - nursery/set-web-proxy-in-dotnet michael.hunhoff@mandiant.com
 - nursery/check-for-windows-sandbox-via-subdirectory echernofsky@google.com
 - nursery/enumerate-pe-sections-in-dotnet @mr-tz
 - nursery/destroy-software-breakpoint-capability echernofsky@google.com
 - nursery/send-data-to-internet michael.hunhoff@mandiant.com
 - nursery/compiled-with-cx_freeze @mr-tz
 - nursery/contain-a-thread-local-storage-tls-section-in-dotnet michael.hunhoff@mandiant.com
 -
 ### Bug Fixes
 - extractor: removed '.dynsym' as the library name for ELF imports #1318 @stevemk14ebr 
 - extractor: fix vivisect loop detection corner case #1310 @mr-tz
 - match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff
 - extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6 
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
 [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
-[![Number of rules](https://img.shields.io/badge/rules-789-blue.svg)](https://github.com/mandiant/capa-rules)
+[![Number of rules](https://img.shields.io/badge/rules-794-blue.svg)](https://github.com/mandiant/capa-rules)
 [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
 [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
 [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -450,6 +450,7 @@ FORMAT_AUTO = "auto"
 FORMAT_SC32 = "sc32"
 FORMAT_SC64 = "sc64"
 FORMAT_FREEZE = "freeze"
 FORMAT_RESULT = "result"
 FORMAT_UNKNOWN = "unknown"
--- a/capa/features/extractors/binja/file.py
+++ b/capa/features/extractors/binja/file.py
@@ -103,7 +103,7 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
        ordinal = sym.ordinal
        if ordinal != 0 and (lib_name != ""):
-            ordinal_name = "#%d" % (ordinal)
+            ordinal_name = f"#{ordinal}"
            for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name):
                yield Import(name), addr
@@ -147,7 +147,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
        # no file type to return when processing a binary file, but we want to continue processing
        return
    else:
-        raise NotImplementedError("unexpected file format: %d" % view_type)
+        raise NotImplementedError(f"unexpected file format: {view_type}")
 def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
--- a/capa/features/extractors/binja/find_binja_api.py
+++ b/capa/features/extractors/binja/find_binja_api.py
@@ -26,7 +26,7 @@ if spec is not None:
 def find_binja_path() -> str:
-    raw_output = subprocess.check_output(["python", "-c", "%s" % code]).decode("ascii").strip()
+    raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
    return bytes.fromhex(raw_output).decode("utf8")
--- a/capa/features/extractors/common.py
+++ b/capa/features/extractors/common.py
@@ -12,11 +12,14 @@ import capa.features.extractors.pefile
 import capa.features.extractors.strings
 from capa.features.common import (
    OS,
    OS_ANY,
    OS_AUTO,
    ARCH_ANY,
    FORMAT_PE,
    FORMAT_ELF,
    OS_WINDOWS,
    FORMAT_FREEZE,
    FORMAT_RESULT,
    Arch,
    Format,
    String,
@@ -27,6 +30,11 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress
 logger = logging.getLogger(__name__)
 # match strings for formats
 MATCH_PE = b"MZ"
 MATCH_ELF = b"\x7fELF"
 MATCH_RESULT = b'{"meta":'
 def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
    """
@@ -40,12 +48,14 @@ def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
 def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
-    if buf.startswith(b"MZ"):
+    if buf.startswith(MATCH_PE):
        yield Format(FORMAT_PE), NO_ADDRESS
-    elif buf.startswith(b"\x7fELF"):
+    elif buf.startswith(MATCH_ELF):
        yield Format(FORMAT_ELF), NO_ADDRESS
    elif is_freeze(buf):
        yield Format(FORMAT_FREEZE), NO_ADDRESS
    elif buf.startswith(MATCH_RESULT):
        yield Format(FORMAT_RESULT), NO_ADDRESS
    else:
        # we likely end up here:
        #  1. handling a file format (e.g. macho)
@@ -56,10 +66,13 @@ def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
 def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
-    if buf.startswith(b"MZ"):
+    if buf.startswith(MATCH_PE):
        yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
-    elif buf.startswith(b"\x7fELF"):
+    elif buf.startswith(MATCH_RESULT):
        yield Arch(ARCH_ANY), NO_ADDRESS
    elif buf.startswith(MATCH_ELF):
        with contextlib.closing(io.BytesIO(buf)) as f:
            arch = capa.features.extractors.elf.detect_elf_arch(f)
@@ -88,9 +101,11 @@ def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
    if os != OS_AUTO:
        yield OS(os), NO_ADDRESS
-    if buf.startswith(b"MZ"):
+    if buf.startswith(MATCH_PE):
        yield OS(OS_WINDOWS), NO_ADDRESS
-    elif buf.startswith(b"\x7fELF"):
+    elif buf.startswith(MATCH_RESULT):
        yield OS(OS_ANY), NO_ADDRESS
    elif buf.startswith(MATCH_ELF):
        with contextlib.closing(io.BytesIO(buf)) as f:
            os = capa.features.extractors.elf.detect_elf_os(f)
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -88,6 +88,7 @@ class Shdr:
    offset: int
    size: int
    link: int
    entsize: int
    buf: bytes
@@ -320,12 +321,12 @@ class ELF:
        shent = self.shbuf[shent_offset : shent_offset + self.e_shentsize]
        if self.bitness == 32:
-            sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(
+            sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, _, _, sh_entsize = struct.unpack_from(
-                self.endian + "IIIIIII", shent, 0x0
+                self.endian + "IIIIIIIIII", shent, 0x0
            )
        elif self.bitness == 64:
-            sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(
+            sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, _, _, sh_entsize = struct.unpack_from(
-                self.endian + "IIQQQQI", shent, 0x0
+                self.endian + "IIQQQQIIQQ", shent, 0x0
            )
        else:
            raise NotImplementedError()
@@ -337,7 +338,7 @@ class ELF:
        if len(buf) != sh_size:
            raise ValueError("failed to read section header content")
-        return Shdr(sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, buf)
+        return Shdr(sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, sh_entsize, buf)
    @property
    def section_headers(self):
@@ -502,6 +503,23 @@ class ELF:
            yield read_cstr(strtab, d_val)
    @property
    def symtab(self) -> Optional[Tuple[Shdr, Shdr]]:
        """
        fetch the Shdr for the symtab and the associated strtab.
        """
        SHT_SYMTAB = 0x2
        for shdr in self.section_headers:
            if shdr.type != SHT_SYMTAB:
                continue
            # the linked section contains strings referenced by the symtab structures.
            strtab_shdr = self.parse_section_header(shdr.link)
            return shdr, strtab_shdr
        return None
@dataclass
 class ABITag:
@@ -603,11 +621,76 @@ class SHNote:
        return ABITag(os, kmajor, kminor, kpatch)
-def guess_os_from_osabi(elf) -> Optional[OS]:
+@dataclass
 class Symbol:
    name_offset: int
    value: int
    size: int
    info: int
    other: int
    shndx: int
 class SymTab:
    def __init__(
        self,
        endian: str,
        bitness: int,
        symtab: Shdr,
        strtab: Shdr,
    ) -> None:
        self.symbols: List[Symbol] = []
        self.symtab = symtab
        self.strtab = strtab
        self._parse(endian, bitness, symtab.buf)
    def _parse(self, endian: str, bitness: int, symtab_buf: bytes) -> None:
        """
        return the symbol's information in
        the order specified by sys/elf32.h
        """
        for i in range(int(len(self.symtab.buf) / self.symtab.entsize)):
            if bitness == 32:
                name_offset, value, size, info, other, shndx = struct.unpack_from(
                    endian + "IIIBBH", symtab_buf, i * self.symtab.entsize
                )
            elif bitness == 64:
                name_offset, info, other, shndx, value, size = struct.unpack_from(
                    endian + "IBBBQQ", symtab_buf, i * self.symtab.entsize
                )
            self.symbols.append(Symbol(name_offset, value, size, info, other, shndx))
    def get_name(self, symbol: Symbol) -> str:
        """
        fetch a symbol's name from symtab's
        associated strings' section (SHT_STRTAB)
        """
        if not self.strtab:
            raise ValueError("no strings found")
        for i in range(symbol.name_offset, self.strtab.size):
            if self.strtab.buf[i] == 0:
                return self.strtab.buf[symbol.name_offset : i].decode("utf-8")
        raise ValueError("symbol name not found")
    def get_symbols(self) -> Iterator[Symbol]:
        """
        return a tuple: (name, value, size, info, other, shndx)
        for each symbol contained in the symbol table
        """
        for symbol in self.symbols:
            yield symbol
 def guess_os_from_osabi(elf: ELF) -> Optional[OS]:
    return elf.ei_osabi
-def guess_os_from_ph_notes(elf) -> Optional[OS]:
+def guess_os_from_ph_notes(elf: ELF) -> Optional[OS]:
    # search for PT_NOTE sections that specify an OS
    # for example, on Linux there is a GNU section with minimum kernel version
    PT_NOTE = 0x4
@@ -646,7 +729,7 @@ def guess_os_from_ph_notes(elf) -> Optional[OS]:
    return None
-def guess_os_from_sh_notes(elf) -> Optional[OS]:
+def guess_os_from_sh_notes(elf: ELF) -> Optional[OS]:
    # search for notes stored in sections that aren't visible in program headers.
    # e.g. .note.Linux in Linux kernel modules.
    SHT_NOTE = 0x7
@@ -679,7 +762,7 @@ def guess_os_from_sh_notes(elf) -> Optional[OS]:
    return None
-def guess_os_from_linker(elf) -> Optional[OS]:
+def guess_os_from_linker(elf: ELF) -> Optional[OS]:
    # search for recognizable dynamic linkers (interpreters)
    # for example, on linux, we see file paths like: /lib64/ld-linux-x86-64.so.2
    linker = elf.linker
@@ -689,7 +772,7 @@ def guess_os_from_linker(elf) -> Optional[OS]:
    return None
-def guess_os_from_abi_versions_needed(elf) -> Optional[OS]:
+def guess_os_from_abi_versions_needed(elf: ELF) -> Optional[OS]:
    # then lets look for GLIBC symbol versioning requirements.
    # this will let us guess about linux/hurd in some cases.
@@ -720,7 +803,7 @@ def guess_os_from_abi_versions_needed(elf) -> Optional[OS]:
    return None
-def guess_os_from_needed_dependencies(elf) -> Optional[OS]:
+def guess_os_from_needed_dependencies(elf: ELF) -> Optional[OS]:
    for needed in elf.needed:
        if needed.startswith("libmachuser.so"):
            return OS.HURD
@@ -730,6 +813,33 @@ def guess_os_from_needed_dependencies(elf) -> Optional[OS]:
    return None
 def guess_os_from_symtab(elf: ELF) -> Optional[OS]:
    shdrs = elf.symtab
    if not shdrs:
        # executable does not contain a symbol table
        # or the symbol's names are stripped
        return None
    symtab_shdr, strtab_shdr = shdrs
    symtab = SymTab(elf.endian, elf.bitness, symtab_shdr, strtab_shdr)
    keywords = {
        OS.LINUX: [
            "linux",
            "/linux/",
        ],
    }
    for symbol in symtab.get_symbols():
        sym_name = symtab.get_name(symbol)
        for os, hints in keywords.items():
            if any(map(lambda x: x in sym_name, hints)):
                return os
    return None
 def detect_elf_os(f) -> str:
    """
    f: type Union[BinaryIO, IDAIO]
@@ -754,6 +864,9 @@ def detect_elf_os(f) -> str:
    needed_dependencies_guess = guess_os_from_needed_dependencies(elf)
    logger.debug("guess: needed dependencies: %s", needed_dependencies_guess)
    symtab_guess = guess_os_from_symtab(elf)
    logger.debug("guess: pertinent symbol name: %s", symtab_guess)
    ret = None
    if osabi_guess:
@@ -774,6 +887,9 @@ def detect_elf_os(f) -> str:
    elif needed_dependencies_guess:
        ret = needed_dependencies_guess
    elif symtab_guess:
        ret = symtab_guess
    return ret.value if ret is not None else "unknown"
--- a/capa/features/extractors/ida/helpers.py
+++ b/capa/features/extractors/ida/helpers.py
@@ -90,8 +90,11 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
        if not library:
            continue
-        # IDA uses section names for the library of ELF imports, like ".dynsym"
+        # IDA uses section names for the library of ELF imports, like ".dynsym".
-        library = library.lstrip(".")
+        # These are not useful to us, we may need to expand this list over time
        # TODO: exhaust this list, see #1419
        if library == ".dynsym":
            library = ""
        def inspect_import(ea, function, ordinal):
            if function and function.startswith("__imp_"):
--- a/capa/features/insn.py
+++ b/capa/features/insn.py
@@ -70,7 +70,7 @@ class Number(Feature):
        elif isinstance(self.value, float):
            return str(self.value)
        else:
-            raise ValueError("invalid value type %s" % (type(self.value)))
+            raise ValueError(f"invalid value type {type(self.value)}")
 # max recognized structure size (and therefore, offset size)
--- a/capa/main.py
+++ b/capa/main.py
@@ -69,6 +69,7 @@ from capa.features.common import (
    FORMAT_SC64,
    FORMAT_DOTNET,
    FORMAT_FREEZE,
    FORMAT_RESULT,
 )
 from capa.features.address import NO_ADDRESS, Address
 from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
@@ -498,7 +499,7 @@ def get_workspace(path, format_, sigpaths):
 def get_extractor(
    path: str,
    format_: str,
-    os: str,
+    os_: str,
    backend: str,
    sigpaths: List[str],
    should_save_workspace=False,
@@ -517,7 +518,7 @@ def get_extractor(
        if not is_supported_arch(path):
            raise UnsupportedArchError()
-        if os == OS_AUTO and not is_supported_os(path):
+        if os_ == OS_AUTO and not is_supported_os(path):
            raise UnsupportedOSError()
    if format_ == FORMAT_DOTNET:
@@ -548,7 +549,7 @@ def get_extractor(
        with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
            bv: BinaryView = BinaryViewType.get_view_of_file(path)
            if bv is None:
-                raise RuntimeError("Binary Ninja cannot open file %s" % (path))
+                raise RuntimeError(f"Binary Ninja cannot open file {path}")
        return capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)
@@ -569,7 +570,7 @@ def get_extractor(
            else:
                logger.debug("CAPA_SAVE_WORKSPACE unset, not saving workspace")
-        return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os)
+        return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os_)
 def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
@@ -913,12 +914,12 @@ def install_common_args(parser, wanted=None):
            (OS_MACOS,),
            (OS_WINDOWS,),
        ]
-        os_help = ", ".join(["%s (%s)" % (o[0], o[1]) if len(o) == 2 else o[0] for o in oses])
+        os_help = ", ".join([f"{o[0]} ({o[1]})" if len(o) == 2 else o[0] for o in oses])
        parser.add_argument(
            "--os",
            choices=[o[0] for o in oses],
            default=OS_AUTO,
-            help="select sample OS: %s" % os_help,
+            help=f"select sample OS: {os_help}",
        )
    if "rules" in wanted:
@@ -1182,53 +1183,72 @@ def main(argv=None):
                logger.debug("file limitation short circuit, won't analyze fully.")
                return E_FILE_LIMITATION
-    if format_ == FORMAT_FREEZE:
+    # TODO: #1411 use a real type, not a dict here.
-        with open(args.sample, "rb") as f:
+    meta: Dict[str, Any]
-            extractor = capa.features.freeze.load(f.read())
+    capabilities: MatchResults
    counts: Dict[str, Any]
    if format_ == FORMAT_RESULT:
        # result document directly parses into meta, capabilities
        result_doc = capa.render.result_document.ResultDocument.parse_file(args.sample)
        meta, capabilities = result_doc.to_capa()
    else:
-        try:
+        # all other formats we must create an extractor
-            if format_ == FORMAT_PE:
+        # and use that to extract meta and capabilities
                sig_paths = get_signatures(args.signatures)
            else:
                sig_paths = []
                logger.debug("skipping library code matching: only have native PE signatures")
        except IOError as e:
            logger.error("%s", str(e))
            return E_INVALID_SIG
-        should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
+        if format_ == FORMAT_FREEZE:
            # freeze format deserializes directly into an extractor
            with open(args.sample, "rb") as f:
                extractor = capa.features.freeze.load(f.read())
        else:
            # all other formats we must create an extractor,
            # such as viv, binary ninja, etc. workspaces
            # and use those for extracting.
-        try:
+            try:
-            extractor = get_extractor(
+                if format_ == FORMAT_PE:
-                args.sample,
+                    sig_paths = get_signatures(args.signatures)
-                format_,
+                else:
-                args.os,
+                    sig_paths = []
-                args.backend,
+                    logger.debug("skipping library code matching: only have native PE signatures")
-                sig_paths,
+            except IOError as e:
-                should_save_workspace,
+                logger.error("%s", str(e))
-                disable_progress=args.quiet,
+                return E_INVALID_SIG
            )
        except UnsupportedFormatError:
            log_unsupported_format_error()
            return E_INVALID_FILE_TYPE
        except UnsupportedArchError:
            log_unsupported_arch_error()
            return E_INVALID_FILE_ARCH
        except UnsupportedOSError:
            log_unsupported_os_error()
            return E_INVALID_FILE_OS
-    meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor)
+            should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
-    capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
+            try:
-    meta["analysis"].update(counts)
+                extractor = get_extractor(
-    meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
+                    args.sample,
                    format_,
                    args.os,
                    args.backend,
                    sig_paths,
                    should_save_workspace,
                    disable_progress=args.quiet,
                )
            except UnsupportedFormatError:
                log_unsupported_format_error()
                return E_INVALID_FILE_TYPE
            except UnsupportedArchError:
                log_unsupported_arch_error()
                return E_INVALID_FILE_ARCH
            except UnsupportedOSError:
                log_unsupported_os_error()
                return E_INVALID_FILE_OS
-    if has_file_limitation(rules, capabilities):
+        meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor)
-        # bail if capa encountered file limitation e.g. a packed binary
+
-        # do show the output in verbose mode, though.
+        capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
-        if not (args.verbose or args.vverbose or args.json):
+        meta["analysis"].update(counts)
-            return E_FILE_LIMITATION
+        meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
        if has_file_limitation(rules, capabilities):
            # bail if capa encountered file limitation e.g. a packed binary
            # do show the output in verbose mode, though.
            if not (args.verbose or args.vverbose or args.json):
                return E_FILE_LIMITATION
    if args.json:
        print(capa.render.json.render(meta, rules, capabilities))
--- a/capa/render/proto/init.py
+++ b/capa/render/proto/init.py
@@ -502,27 +502,36 @@ def metadata_from_pb2(meta: capa_pb2.Metadata) -> rd.Metadata:
            rules=tuple(meta.analysis.rules),
            base_address=addr_from_pb2(meta.analysis.base_address),
            layout=rd.Layout(
-                functions=[
+                functions=tuple(
-                    rd.FunctionLayout(
+                    [
-                        address=addr_from_pb2(f.address),
+                        rd.FunctionLayout(
-                        matched_basic_blocks=[
+                            address=addr_from_pb2(f.address),
-                            rd.BasicBlockLayout(address=addr_from_pb2(bb.address)) for bb in f.matched_basic_blocks
+                            matched_basic_blocks=tuple(
-                        ],
+                                [
-                    )
+                                    rd.BasicBlockLayout(address=addr_from_pb2(bb.address))
-                    for f in meta.analysis.layout.functions
+                                    for bb in f.matched_basic_blocks
-                ]
+                                ]
                            ),
                        )
                        for f in meta.analysis.layout.functions
                    ]
                )
            ),
            feature_counts=rd.FeatureCounts(
                file=meta.analysis.feature_counts.file,
-                functions=[
+                functions=tuple(
-                    rd.FunctionFeatureCount(address=addr_from_pb2(f.address), count=f.count)
+                    [
-                    for f in meta.analysis.feature_counts.functions
+                        rd.FunctionFeatureCount(address=addr_from_pb2(f.address), count=f.count)
-                ],
+                        for f in meta.analysis.feature_counts.functions
                    ]
                ),
            ),
            library_functions=tuple(
                [
                    rd.LibraryFunction(address=addr_from_pb2(lf.address), name=lf.name)
                    for lf in meta.analysis.library_functions
                ]
            ),
            library_functions=[
                rd.LibraryFunction(address=addr_from_pb2(lf.address), name=lf.name)
                for lf in meta.analysis.library_functions
            ],
        ),
    )
@@ -585,13 +594,14 @@ def feature_from_pb2(f: capa_pb2.FeatureNode) -> frzf.Feature:
        return frzf.ExportFeature(export=ff.export, description=ff.description or None)
    elif type_ == "import_":
        ff = f.import_
-        return frzf.ImportFeature(import_=ff.import_, description=ff.description or None)
+        return frzf.ImportFeature(import_=ff.import_, description=ff.description or None)  # type: ignore
        # Mypy is unable to recognize `import_` as an argument
    elif type_ == "section":
        ff = f.section
        return frzf.SectionFeature(section=ff.section, description=ff.description or None)
    elif type_ == "function_name":
        ff = f.function_name
-        return frzf.FunctionNameFeature(function_name=ff.function_name, description=ff.description or None)
+        return frzf.FunctionNameFeature(function_name=ff.function_name, description=ff.description or None)  # type: ignore
    elif type_ == "substring":
        ff = f.substring
        return frzf.SubstringFeature(substring=ff.substring, description=ff.description or None)
@@ -603,7 +613,8 @@ def feature_from_pb2(f: capa_pb2.FeatureNode) -> frzf.Feature:
        return frzf.StringFeature(string=ff.string, description=ff.description or None)
    elif type_ == "class_":
        ff = f.class_
-        return frzf.ClassFeature(class_=ff.class_, description=ff.description or None)
+        return frzf.ClassFeature(class_=ff.class_, description=ff.description or None)  # type: ignore
        # Mypy is unable to recognize `class_` as an argument due to aliasing
    elif type_ == "namespace":
        ff = f.namespace
        return frzf.NamespaceFeature(namespace=ff.namespace, description=ff.description or None)
@@ -629,12 +640,13 @@ def feature_from_pb2(f: capa_pb2.FeatureNode) -> frzf.Feature:
        ff = f.operand_number
        return frzf.OperandNumberFeature(
            index=ff.index, operand_number=number_from_pb2(ff.operand_number), description=ff.description or None
-        )
+        )  # type: ignore
    elif type_ == "operand_offset":
        ff = f.operand_offset
        return frzf.OperandOffsetFeature(
            index=ff.index, operand_offset=int_from_pb2(ff.operand_offset), description=ff.description or None
-        )
+        )  # type: ignore
        # Mypy is unable to recognize `operand_offset` as an argument due to aliasing
    elif type_ == "basic_block":
        ff = f.basic_block
        return frzf.BasicBlockFeature(description=ff.description or None)
@@ -651,16 +663,16 @@ def match_from_pb2(match: capa_pb2.Match) -> rd.Match:
        return rd.Match(
            success=match.success,
            node=rd.StatementNode(statement=statement_from_pb2(match.statement)),
-            children=children,
+            children=tuple(children),
-            locations=locations,
+            locations=tuple(locations),
            captures={},
        )
    elif node_type == "feature":
        return rd.Match(
            success=match.success,
            node=rd.FeatureNode(feature=feature_from_pb2(match.feature)),
-            children=children,
+            children=tuple(children),
-            locations=locations,
+            locations=tuple(locations),
            captures={capture: tuple(map(addr_from_pb2, locs.address)) for capture, locs in match.captures.items()},
        )
    else:
@@ -694,7 +706,8 @@ def maec_from_pb2(pb: capa_pb2.MaecMetadata) -> rd.MaecMetadata:
        malware_family=pb.malware_family or None,
        malware_category=pb.malware_category or None,
        malware_category_ov=pb.malware_category_ov or None,
-    )
+    )  # type: ignore
    # Mypy is unable to recognise arguments due to alias
 def rule_metadata_from_pb2(pb: capa_pb2.RuleMetadata) -> rd.RuleMetadata:
@@ -711,7 +724,8 @@ def rule_metadata_from_pb2(pb: capa_pb2.RuleMetadata) -> rd.RuleMetadata:
        lib=pb.lib,
        is_subscope_rule=pb.is_subscope_rule,
        maec=maec_from_pb2(pb.maec),
-    )
+    )  # type: ignore
    # Mypy is unable to recognise `attack` and `is_subscope_rule` as arguments due to alias
 def doc_from_pb2(doc: capa_pb2.ResultDocument) -> rd.ResultDocument:
--- a/capa/render/result_document.py
+++ b/capa/render/result_document.py
@@ -6,7 +6,8 @@
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import datetime
-from typing import Any, Dict, Tuple, Union, Optional
+import collections
 from typing import Any, Dict, List, Tuple, Union, Optional
 from pydantic import Field, BaseModel
@@ -125,6 +126,41 @@ class Metadata(FrozenModel):
            ),
        )
    def to_capa(self) -> Dict[str, Any]:
        capa_meta = {
            "timestamp": self.timestamp.isoformat(),
            "version": self.version,
            "sample": {
                "md5": self.sample.md5,
                "sha1": self.sample.sha1,
                "sha256": self.sample.sha256,
                "path": self.sample.path,
            },
            "analysis": {
                "format": self.analysis.format,
                "arch": self.analysis.arch,
                "os": self.analysis.os,
                "extractor": self.analysis.extractor,
                "rules": self.analysis.rules,
                "base_address": self.analysis.base_address.to_capa(),
                "layout": {
                    "functions": {
                        f.address.to_capa(): {
                            "matched_basic_blocks": [bb.address.to_capa() for bb in f.matched_basic_blocks]
                        }
                        for f in self.analysis.layout.functions
                    }
                },
                "feature_counts": {
                    "file": self.analysis.feature_counts.file,
                    "functions": {fc.address.to_capa(): fc.count for fc in self.analysis.feature_counts.functions},
                },
                "library_functions": {lf.address.to_capa(): lf.name for lf in self.analysis.library_functions},
            },
        }
        return capa_meta
 class CompoundStatementType:
    AND = "and"
@@ -227,6 +263,54 @@ def node_from_capa(node: Union[capa.engine.Statement, capa.engine.Feature]) -> N
        assert_never(node)
 def node_to_capa(
    node: Node, children: List[Union[capa.engine.Statement, capa.engine.Feature]]
 ) -> Union[capa.engine.Statement, capa.engine.Feature]:
    if isinstance(node, StatementNode):
        if isinstance(node.statement, CompoundStatement):
            if node.statement.type == CompoundStatementType.AND:
                return capa.engine.And(description=node.statement.description, children=children)
            elif node.statement.type == CompoundStatementType.OR:
                return capa.engine.Or(description=node.statement.description, children=children)
            elif node.statement.type == CompoundStatementType.NOT:
                return capa.engine.Not(description=node.statement.description, child=children[0])
            elif node.statement.type == CompoundStatementType.OPTIONAL:
                return capa.engine.Some(description=node.statement.description, count=0, children=children)
            else:
                assert_never(node.statement.type)
        elif isinstance(node.statement, SomeStatement):
            return capa.engine.Some(
                description=node.statement.description, count=node.statement.count, children=children
            )
        elif isinstance(node.statement, RangeStatement):
            return capa.engine.Range(
                description=node.statement.description,
                min=node.statement.min,
                max=node.statement.max,
                child=node.statement.child.to_capa(),
            )
        elif isinstance(node.statement, SubscopeStatement):
            return capa.engine.Subscope(
                description=node.statement.description, scope=node.statement.scope, child=children[0]
            )
        else:
            assert_never(node.statement)
    elif isinstance(node, FeatureNode):
        return node.feature.to_capa()
    else:
        assert_never(node)
 class Match(FrozenModel):
    """
    args:
@@ -359,6 +443,39 @@ class Match(FrozenModel):
            captures={capture: tuple(captures[capture]) for capture in captures},
        )
    def to_capa(self, rules_by_name: Dict[str, capa.rules.Rule]) -> capa.engine.Result:
        children = [child.to_capa(rules_by_name) for child in self.children]
        statement = node_to_capa(self.node, [child.statement for child in children])
        if isinstance(self.node, FeatureNode):
            feature = self.node.feature
            if isinstance(feature, (frzf.SubstringFeature, frzf.RegexFeature)):
                matches = {capture: {loc.to_capa() for loc in locs} for capture, locs in self.captures.items()}
                if isinstance(feature, frzf.SubstringFeature):
                    assert isinstance(statement, capa.features.common.Substring)
                    statement = capa.features.common._MatchedSubstring(statement, matches)
                elif isinstance(feature, frzf.RegexFeature):
                    assert isinstance(statement, capa.features.common.Regex)
                    statement = capa.features.common._MatchedRegex(statement, matches)
                else:
                    assert_never(feature)
        # apparently we don't have to fixup match and subscope entries here.
        # at least, default, verbose, and vverbose renderers seem to work well without any special handling here.
        #
        # children contains a single tree of results, corresponding to the logic of the matched rule.
        # self.node.feature.match contains the name of the rule that was matched.
        # so its all available to reconstruct, if necessary.
        return capa.features.common.Result(
            success=self.success,
            statement=statement,
            locations={loc.to_capa() for loc in self.locations},
            children=children,
        )
 def parse_parts_id(s: str):
    id_ = ""
@@ -543,3 +660,22 @@ class ResultDocument(FrozenModel):
            )
        return ResultDocument(meta=Metadata.from_capa(meta), rules=rule_matches)
    def to_capa(self) -> Tuple[Dict, Dict]:
        meta = self.meta.to_capa()
        capabilities: Dict[
            str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]
        ] = collections.defaultdict(list)
        # this doesn't quite work because we don't have the rule source for rules that aren't matched.
        rules_by_name = {
            rule_name: capa.rules.Rule.from_yaml(rule_match.source) for rule_name, rule_match in self.rules.items()
        }
        for rule_name, rule_match in self.rules.items():
            for addr, match in rule_match.matches:
                result: capa.engine.Result = match.to_capa(rules_by_name)
                capabilities[rule_name].append((addr.to_capa(), result))
        return meta, capabilities
--- a/capa/render/utils.py
+++ b/capa/render/utils.py
@@ -16,7 +16,7 @@ import capa.render.result_document as rd
 def bold(s: str) -> str:
    """draw attention to the given string"""
-    return termcolor.colored(s, "blue")
+    return termcolor.colored(s, "cyan")
 def bold2(s: str) -> str:
--- a/2
+++ b/2
--- a/setup.py
+++ b/setup.py
@@ -27,8 +27,8 @@ requirements = [
    "pyelftools==0.29",
    "dnfile==0.13.0",
    "dncil==1.0.2",
-    "pydantic==1.10.6",
+    "pydantic==1.10.7",
-    "protobuf==4.21.12",
+    "protobuf==4.22.1",
 ]
 # this sets __version__
@@ -71,7 +71,7 @@ setuptools.setup(
        "dev": [
            "pytest==7.1.3",
            "pytest-sugar==0.9.4",
-            "pytest-instafail==0.4.2",
+            "pytest-instafail==0.5.0",
            "pytest-cov==4.0.0",
            "pycodestyle==2.10.0",
            "ruff==0.0.259",
@@ -90,7 +90,7 @@ setuptools.setup(
            "types-termcolor==1.1.4",
            "types-psutil==5.8.23",
            "types_requests==2.28.1",
-            "types-protobuf==4.21.0.5",
+            "types-protobuf==4.22.0.1",
        ],
        "build": [
            "pyinstaller==5.9.0",
--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -241,6 +241,8 @@ def get_data_path_by_name(name):
        return os.path.join(CD, "data", "kernel32-64.dll_")
    elif name == "pma01-01":
        return os.path.join(CD, "data", "Practical Malware Analysis Lab 01-01.dll_")
    elif name == "pma01-01-rd":
        return os.path.join(CD, "data", "rd", "Practical Malware Analysis Lab 01-01.dll_.json")
    elif name == "pma12-04":
        return os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
    elif name == "pma16-01":
@@ -307,6 +309,8 @@ def get_data_path_by_name(name):
        return os.path.join(CD, "data", "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_")
    elif name.startswith("294b8d"):
        return os.path.join(CD, "data", "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_")
    elif name.startswith("2bf18d"):
        return os.path.join(CD, "data", "2bf18d0403677378adad9001b1243211.elf_")
    else:
        raise ValueError(f"unexpected sample fixture: {name}")
@@ -365,6 +369,8 @@ def get_sample_md5_by_name(name):
    elif name.startswith("294b8d"):
        # file name is SHA256 hash
        return "3db3e55b16a7b1b1afb970d5e77c5d98"
    elif name.startswith("2bf18d"):
        return "2bf18d0403677378adad9001b1243211"
    else:
        raise ValueError(f"unexpected sample fixture: {name}")
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -467,3 +467,12 @@ def test_main_dotnet4(_039a6_dotnetfile_extractor):
    # tests successful execution and one rendering
    path = _039a6_dotnetfile_extractor.path
    assert capa.main.main([path, "-vv"]) == 0
 def test_main_rd():
    path = fixtures.get_data_path_by_name("pma01-01-rd")
    assert capa.main.main([path, "-vv"]) == 0
    assert capa.main.main([path, "-v"]) == 0
    assert capa.main.main([path, "-j"]) == 0
    assert capa.main.main([path, "-q"]) == 0
    assert capa.main.main([path]) == 0
--- a/tests/test_os_detection.py
+++ b/tests/test_os_detection.py
@@ -20,6 +20,7 @@ def test_elf_sh_notes():
    # guess: sh notes: OS.LINUX
    # guess: linker: None
    # guess: ABI versions needed: None
    # guess: symtab: None
    # guess: needed dependencies: None
    path = get_data_path_by_name("2f7f5f")
    with open(path, "rb") as f:
@@ -32,6 +33,7 @@ def test_elf_pt_notes():
    # guess: sh notes: OS.LINUX
    # guess: linker: OS.LINUX
    # guess: ABI versions needed: OS.LINUX
    # guess: symtab: None
    # guess: needed dependencies: None
    path = get_data_path_by_name("7351f.elf")
    with open(path, "rb") as f:
@@ -44,6 +46,7 @@ def test_elf_so_needed():
    # guess: sh notes: OS.HURD
    # guess: linker: None
    # guess: ABI versions needed: OS.HURD
    # guess: symtab: None
    # guess: needed dependencies: OS.HURD
    path = get_data_path_by_name("b5f052")
    with open(path, "rb") as f:
@@ -56,7 +59,21 @@ def test_elf_abi_version_hurd():
    # guess: sh notes: OS.HURD
    # guess: linker: None
    # guess: ABI versions needed: OS.HURD
    # guess: symtab: None
    # guess: needed dependencies: None
    path = get_data_path_by_name("bf7a9c")
    with open(path, "rb") as f:
        assert capa.features.extractors.elf.detect_elf_os(f) == "hurd"
 def test_elf_symbol_table():
    # guess: osabi: None
    # guess: ph notes: None
    # guess: sh notes: None
    # guess: linker: None
    # guess: ABI versions needed: None
    # guess: symtab: OS.LINUX
    # guess: needed dependencies: None
    path = get_data_path_by_name("2bf18d")
    with open(path, "rb") as f:
        assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
--- a/tests/test_result_document.py
+++ b/tests/test_result_document.py
@@ -8,6 +8,7 @@
 import copy
 import pytest
 import fixtures
 from fixtures import *
 import capa
@@ -268,3 +269,18 @@ def assert_round_trip(rd: rdoc.ResultDocument):
 def test_round_trip(request, rd_file):
    rd: rdoc.ResultDocument = request.getfixturevalue(rd_file)
    assert_round_trip(rd)
 def test_json_to_rdoc():
    path = fixtures.get_data_path_by_name("pma01-01-rd")
    assert isinstance(rdoc.ResultDocument.parse_file(path), rdoc.ResultDocument)
 def test_rdoc_to_capa():
    path = fixtures.get_data_path_by_name("pma01-01-rd")
    rd = rdoc.ResultDocument.parse_file(path)
    meta, capabilites = rd.to_capa()
    assert isinstance(meta, dict)
    assert isinstance(capabilites, dict)
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -81,4 +81,4 @@ def test_proto_conversion(tmpdir):
    p = run_program(get_script_path("proto-to-results.py"), [pb])
    assert p.returncode == 0
-    assert p.stdout.startswith(b'{\n  "meta": ')
+    assert p.stdout.startswith(b'{\n  "meta": ') or p.stdout.startswith(b'{\r\n  "meta": ')