Merge branch 'mandiant:master' into vmray_extractor

2026-02-04 19:12:01 -08:00 · 2024-06-18 15:42:52 -05:00
parent bdc94c13ac 1975b6455c
commit be274d1d65
19 changed files with 796 additions and 159 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -37,7 +37,7 @@ jobs:
    - name: Install dependencies
      run: |
        pip install -r requirements.txt
-        pip install -e .[dev]
+        pip install -e .[dev,scripts]
    - name: Lint with ruff
      run: pre-commit run ruff
    - name: Lint with isort
@@ -65,7 +65,7 @@ jobs:
    - name: Install capa
      run: |
        pip install -r requirements.txt
-        pip install -e .[dev]
+        pip install -e .[dev,scripts]
    - name: Run rule linter
      run: python scripts/lint.py rules/

@@ -102,7 +102,7 @@ jobs:
    - name: Install capa
      run: |
        pip install -r requirements.txt
-        pip install -e .[dev]
+        pip install -e .[dev,scripts]
    - name: Run tests (fast)
      # this set of tests runs about 80% of the cases in 20% of the time,
      # and should catch most errors quickly.
@@ -139,7 +139,7 @@ jobs:
      if: ${{ env.BN_SERIAL != 0 }}
      run: |
        pip install -r requirements.txt
-        pip install -e .[dev]
+        pip install -e .[dev,scripts]
    - name: install Binary Ninja
      if: ${{ env.BN_SERIAL != 0 }}
      run: |
@@ -198,7 +198,7 @@ jobs:
    - name: Install capa
      run: |
        pip install -r requirements.txt
-        pip install -e .[dev] 
+        pip install -e .[dev,scripts]
    - name: Run tests
      run: | 
        mkdir ./.github/ghidra/project
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -110,6 +110,16 @@ repos:
        always_run: true
        pass_filenames: false

+-   repo: local
+    hooks:
+    -   id: deptry
+        name: deptry
+        stages: [push, manual]
+        language: system
+        entry: deptry .
+        always_run: true
+        pass_filenames: false
+
 -   repo: local
    hooks:
    -   id: pytest-fast
@@ -128,12 +138,3 @@ repos:
        always_run: true
        pass_filenames: false

-   repo: local
-    hooks:
-    -   id: deptry
-        name: deptry
-        stages: [push, manual]
-        language: system
-        entry: deptry .
-        always_run: true
-        pass_filenames: false
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,46 @@
 # Change Log

 ## master (unreleased)
- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls

 ### New Features

+### Breaking Changes
+
+### New Rules (0)
+
+-
+
+### Bug Fixes
+
+- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
+
+### capa explorer IDA Pro plugin
+
+### Development
+
+### Raw diffs
+- [capa v7.1.0...master](https://github.com/mandiant/capa/compare/v7.1.0...master)
+- [capa-rules v7.1.0...master](https://github.com/mandiant/capa-rules/compare/v7.1.0...master)
+
+## v7.1.0
+The v7.1.0 release brings large performance improvements to capa's rule matching engine.
+Additionally, we've fixed various bugs and added new features for people using and developing capa.
+
+Special thanks to our repeat and new contributors:
+* @sjha2048 made their first contribution in https://github.com/mandiant/capa/pull/2000
+* @Rohit1123 made their first contribution in https://github.com/mandiant/capa/pull/1990
+* @psahithireddy made their first contribution in https://github.com/mandiant/capa/pull/2020
+* @Atlas-64 made their first contribution in https://github.com/mandiant/capa/pull/2018
+* @s-ff made their first contribution in https://github.com/mandiant/capa/pull/2011
+* @samadpls made their first contribution in https://github.com/mandiant/capa/pull/2024
+* @acelynnzhang made their first contribution in https://github.com/mandiant/capa/pull/2044
+* @RainRat made their first contribution in https://github.com/mandiant/capa/pull/2058
+* @ReversingWithMe made their first contribution in https://github.com/mandiant/capa/pull/2093
+* @malwarefrank made their first contribution in https://github.com/mandiant/capa/pull/2037
+
+### New Features
+- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls
+- ELF: detect OS from statically-linked Go binaries #1978 @williballenthin
 - add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
 - document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
 - Add json to sarif conversion script @reversingwithme
@@ -13,11 +49,9 @@
 - optimize rule matching #2080 @williballenthin
 - add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin
 - relax dependency version requirements for the capa library #2053 @williballenthin
+- add scripts dependency group and update documentation #2145 @mr-tz

-### Breaking Changes
-
-
-### New Rules (17)
+### New Rules (25)

 - impact/wipe-disk/delete-drive-layout-via-ioctl william.ballenthin@mandiant.com
 - host-interaction/driver/interact-with-driver-via-ioctl moritz.raabe@mandiant.com
@@ -36,7 +70,14 @@
 - persistence/act-as-time-provider-dll jakub.jozwiak@mandiant.com
 - host-interaction/gui/window/hide/hide-graphical-window-from-taskbar jakub.jozwiak@mandiant.com
 - compiler/dart/compiled-with-dart jakub.jozwiak@mandiant.com
-
+- nursery/bypass-hidden-api-restrictions-via-jni-on-android mehunhoff@google.com
+- nursery/get-current-process-filesystem-mounts-on-linux mehunhoff@google.com
+- nursery/get-current-process-memory-mapping-on-linux mehunhoff@google.com
+- nursery/get-system-property-on-android mehunhoff@google.com
+- nursery/hook-routines-via-lsplant mehunhoff@google.com
+- nursery/load-packed-dex-via-jiagu-on-android mehunhoff@google.com
+- nursery/modify-api-blacklist-or-denylist-via-jni-on-android mehunhoff@google.com
+- nursery/truncate-file-on-linux mehunhoff@google.com

 ### Bug Fixes

@@ -45,7 +86,6 @@
 - binja: add support for forwarded export #1646 @xusheng6
 - cape: support more report formats #2035 @mr-tz

-
 ### capa explorer IDA Pro plugin
 - replace deprecated IDA API find_binary with bin_search #1606 @s-ff

@@ -60,8 +100,8 @@
 - add deptry support #1497 @s-ff

 ### Raw diffs
- [capa v7.0.1...master](https://github.com/mandiant/capa/compare/v7.0.1...master)
- [capa-rules v7.0.1...master](https://github.com/mandiant/capa-rules/compare/v7.0.1...master)
+- [capa v7.0.1...v7.1.0](https://github.com/mandiant/capa/compare/v7.0.1...v7.1.0)
+- [capa-rules v7.0.1...v7.1.0](https://github.com/mandiant/capa-rules/compare/v7.0.1...v7.1.0)

 ## v7.0.1

--- a/capa/features/extractors/dnfile/helpers.py
+++ b/capa/features/extractors/dnfile/helpers.py
@@ -83,7 +83,7 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str
        return None

    try:
-        user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
+        user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get(token.rid)
    except UnicodeDecodeError as e:
        logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
        return None
@@ -119,14 +119,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
        access: Optional[str]

        # assume .NET imports starting with get_/set_ are used to access a property
-        if member_ref.Name.startswith("get_"):
+        member_ref_name: str = str(member_ref.Name)
+        if member_ref_name.startswith("get_"):
            access = FeatureAccess.READ
-        elif member_ref.Name.startswith("set_"):
+        elif member_ref_name.startswith("set_"):
            access = FeatureAccess.WRITE
        else:
            access = None

-        member_ref_name: str = member_ref.Name
        if member_ref_name.startswith(("get_", "set_")):
            # remove get_/set_ from MemberRef name
            member_ref_name = member_ref_name[4:]
@@ -212,7 +212,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
            token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
            access: Optional[str] = accessor_map.get(token)

-            method_name: str = method.row.Name
+            method_name: str = str(method.row.Name)
            if method_name.startswith(("get_", "set_")):
                # remove get_/set_
                method_name = method_name[4:]
@@ -289,8 +289,8 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
            logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
            module = ""
        else:
-            module = impl_map.ImportScope.row.Name
-        method: str = impl_map.ImportName
+            module = str(impl_map.ImportScope.row.Name)
+        method: str = str(impl_map.ImportName)

        member_forward_table: int
        if impl_map.MemberForwarded.table is None:
@@ -320,8 +320,11 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
    if row_index - 1 <= 0:
        return None

+    table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(table_index)
+    if table is None:
+        return None
+
    try:
-        table = pe.net.mdtables.tables.get(table_index, [])
        return table[row_index - 1]
    except IndexError:
        return None
@@ -334,7 +337,7 @@ def resolve_nested_typedef_name(

    if index in nested_class_table:
        typedef_name = []
-        name = typedef.TypeName
+        name = str(typedef.TypeName)

        # Append the current typedef name
        typedef_name.append(name)
@@ -343,24 +346,24 @@ def resolve_nested_typedef_name(
            # Iterate through the typedef table to resolve the nested name
            table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
            if table_row is None:
-                return typedef.TypeNamespace, tuple(typedef_name[::-1])
+                return str(typedef.TypeNamespace), tuple(typedef_name[::-1])

-            name = table_row.TypeName
+            name = str(table_row.TypeName)
            typedef_name.append(name)
            index = nested_class_table[index]

        # Document the root enclosing details
        table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
        if table_row is None:
-            return typedef.TypeNamespace, tuple(typedef_name[::-1])
+            return str(typedef.TypeNamespace), tuple(typedef_name[::-1])

-        enclosing_name = table_row.TypeName
+        enclosing_name = str(table_row.TypeName)
        typedef_name.append(enclosing_name)

-        return table_row.TypeNamespace, tuple(typedef_name[::-1])
+        return str(table_row.TypeNamespace), tuple(typedef_name[::-1])

    else:
-        return typedef.TypeNamespace, (typedef.TypeName,)
+        return str(typedef.TypeNamespace), (str(typedef.TypeName),)


 def resolve_nested_typeref_name(
@@ -370,29 +373,29 @@ def resolve_nested_typeref_name(
    # If the ResolutionScope decodes to a typeRef type then it is nested
    if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
        typeref_name = []
-        name = typeref.TypeName
+        name = str(typeref.TypeName)
        # Not appending the current typeref name to avoid potential duplicate

        # Validate index
        table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index)
        if table_row is None:
-            return typeref.TypeNamespace, (typeref.TypeName,)
+            return str(typeref.TypeNamespace), (str(typeref.TypeName),)

        while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef):
            # Iterate through the typeref table to resolve the nested name
            typeref_name.append(name)
-            name = table_row.TypeName
+            name = str(table_row.TypeName)
            table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index)
            if table_row is None:
-                return typeref.TypeNamespace, tuple(typeref_name[::-1])
+                return str(typeref.TypeNamespace), tuple(typeref_name[::-1])

        # Document the root enclosing details
-        typeref_name.append(table_row.TypeName)
+        typeref_name.append(str(table_row.TypeName))

-        return table_row.TypeNamespace, tuple(typeref_name[::-1])
+        return str(table_row.TypeNamespace), tuple(typeref_name[::-1])

    else:
-        return typeref.TypeNamespace, (typeref.TypeName,)
+        return str(typeref.TypeNamespace), (str(typeref.TypeName),)


 def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
--- a/capa/features/extractors/dotnetfile.py
+++ b/capa/features/extractors/dotnetfile.py
@@ -78,12 +78,12 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
    for _, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
        # emit internal .NET namespaces
        assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
-        namespaces.add(typedef.TypeNamespace)
+        namespaces.add(str(typedef.TypeNamespace))

    for _, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
        # emit external .NET namespaces
        assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
-        namespaces.add(typeref.TypeNamespace)
+        namespaces.add(str(typeref.TypeNamespace))

    # namespaces may be empty, discard
    namespaces.discard("")
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -58,6 +58,10 @@ class OS(str, Enum):
    SYLLABLE = "syllable"
    NACL = "nacl"
    ANDROID = "android"
+    DRAGONFLYBSD = "dragonfly BSD"
+    ILLUMOS = "illumos"
+    ZOS = "z/os"
+    UNIX = "unix"


 # via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
@@ -81,6 +85,8 @@ class Phdr:
    paddr: int
    filesz: int
    buf: bytes
+    flags: int
+    memsz: int


@dataclass
@@ -315,24 +321,23 @@ class ELF:
        phent_offset = i * self.e_phentsize
        phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize]

-        (p_type,) = struct.unpack_from(self.endian + "I", phent, 0x0)
-        logger.debug("ph:p_type: 0x%04x", p_type)
-
        if self.bitness == 32:
-            p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "IIII", phent, 0x4)
+            p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz, p_flags = struct.unpack_from(
+                self.endian + "IIIIIII", phent, 0x0
+            )
        elif self.bitness == 64:
-            p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "QQQQ", phent, 0x8)
+            p_type, p_flags, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz = struct.unpack_from(
+                self.endian + "IIQQQQQ", phent, 0x0
+            )
        else:
            raise NotImplementedError()

-        logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
-
        self.f.seek(p_offset)
        buf = self.f.read(p_filesz)
        if len(buf) != p_filesz:
            raise ValueError("failed to read program header content")

-        return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf)
+        return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf, p_flags, p_memsz)

    @property
    def program_headers(self):
@@ -357,8 +362,6 @@ class ELF:
        else:
            raise NotImplementedError()

-        logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size)
-
        self.f.seek(sh_offset)
        buf = self.f.read(sh_size)
        if len(buf) != sh_size:
@@ -867,6 +870,8 @@ def guess_os_from_ident_directive(elf: ELF) -> Optional[OS]:
            return OS.LINUX
        elif "Red Hat" in comment:
            return OS.LINUX
+        elif "Alpine" in comment:
+            return OS.LINUX
        elif "Android" in comment:
            return OS.ANDROID

@@ -952,11 +957,506 @@ def guess_os_from_symtab(elf: ELF) -> Optional[OS]:

        for os, hints in keywords.items():
            if any(hint in sym_name for hint in hints):
+                logger.debug("symtab: %s looks like %s", sym_name, os)
                return os

    return None


+def is_go_binary(elf: ELF) -> bool:
+    for shdr in elf.section_headers:
+        if shdr.get_name(elf) == ".note.go.buildid":
+            logger.debug("go buildinfo: found section .note.go.buildid")
+            return True
+
+    # The `go version` command enumerates sections for the name `.go.buildinfo`
+    # (in addition to looking for the BUILDINFO_MAGIC) to check if an executable is go or not.
+    # See references to the `errNotGoExe` error here:
+    # https://github.com/golang/go/blob/master/src/debug/buildinfo/buildinfo.go#L41
+    for shdr in elf.section_headers:
+        if shdr.get_name(elf) == ".go.buildinfo":
+            logger.debug("go buildinfo: found section .go.buildinfo")
+            return True
+
+    # other strategy used by FLOSS: search for known runtime strings.
+    # https://github.com/mandiant/flare-floss/blob/b2ca8adfc5edf278861dd6bff67d73da39683b46/floss/language/identify.py#L88
+    return False
+
+
+def get_go_buildinfo_data(elf: ELF) -> Optional[bytes]:
+    for shdr in elf.section_headers:
+        if shdr.get_name(elf) == ".go.buildinfo":
+            logger.debug("go buildinfo: found section .go.buildinfo")
+            return shdr.buf
+
+    PT_LOAD = 0x1
+    PF_X = 1
+    PF_W = 2
+    for phdr in elf.program_headers:
+        if phdr.type != PT_LOAD:
+            continue
+
+        if (phdr.flags & (PF_X | PF_W)) == PF_W:
+            logger.debug("go buildinfo: found data segment")
+            return phdr.buf
+
+    return None
+
+
+def read_data(elf: ELF, rva: int, size: int) -> Optional[bytes]:
+    # ELF segments are for runtime data,
+    # ELF sections are for link-time data.
+    # So we want to read Program Headers/Segments.
+    for phdr in elf.program_headers:
+        if phdr.vaddr <= rva < phdr.vaddr + phdr.memsz:
+            segment_data = phdr.buf
+
+            # pad the section with NULLs
+            # assume page alignment is already handled.
+            # might need more hardening here.
+            if len(segment_data) < phdr.memsz:
+                segment_data += b"\x00" * (phdr.memsz - len(segment_data))
+
+            segment_offset = rva - phdr.vaddr
+            return segment_data[segment_offset : segment_offset + size]
+
+    return None
+
+
+def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
+    if elf.bitness == 32:
+        struct_size = 8
+        struct_format = elf.endian + "II"
+    elif elf.bitness == 64:
+        struct_size = 16
+        struct_format = elf.endian + "QQ"
+    else:
+        raise ValueError("invalid psize")
+
+    struct_buf = read_data(elf, rva, struct_size)
+    if not struct_buf:
+        return None
+
+    addr, length = struct.unpack_from(struct_format, struct_buf, 0)
+
+    return read_data(elf, addr, length)
+
+
+def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
+    """
+    In a binary compiled by Go, the buildinfo structure may contain
+    metadata about the build environment, including the configured
+    GOOS, which specifies the target operating system.
+
+    Search for and parse the buildinfo structure,
+    which may be found in the .go.buildinfo section,
+    and often contains this metadata inline. Otherwise,
+    follow a few byte slices to the relevant information.
+
+    This strategy is derived from GoReSym.
+    """
+    buf = get_go_buildinfo_data(elf)
+    if not buf:
+        logger.debug("go buildinfo: no buildinfo section")
+        return None
+
+    assert isinstance(buf, bytes)
+
+    # The build info blob left by the linker is identified by
+    # a 16-byte header, consisting of:
+    #  - buildInfoMagic (14 bytes),
+    #  - the binary's pointer size (1 byte), and
+    #  - whether the binary is big endian (1 byte).
+    #
+    # Then:
+    #  - virtual address to Go string: runtime.buildVersion
+    #  - virtual address to Go string: runtime.modinfo
+    #
+    #  On 32-bit platforms, the last 8 bytes are unused.
+    #
+    #  If the endianness has the 2 bit set, then the pointers are zero,
+    #  and the 32-byte header is followed by varint-prefixed string data
+    #  for the two string values we care about.
+    # https://github.com/mandiant/GoReSym/blob/0860a1b1b4f3495e9fb7e71eb4386bf3e0a7c500/buildinfo/buildinfo.go#L185-L193
+    BUILDINFO_MAGIC = b"\xFF Go buildinf:"
+
+    try:
+        index = buf.index(BUILDINFO_MAGIC)
+    except ValueError:
+        logger.debug("go buildinfo: no buildinfo magic")
+        return None
+
+    psize, flags = struct.unpack_from("<bb", buf, index + len(BUILDINFO_MAGIC))
+    assert psize in (4, 8)
+    is_big_endian = flags & 0b01
+    has_inline_strings = flags & 0b10
+    logger.debug("go buildinfo: psize: %d big endian: %s inline: %s", psize, is_big_endian, has_inline_strings)
+
+    GOOS_TO_OS = {
+        b"aix": OS.AIX,
+        b"android": OS.ANDROID,
+        b"dragonfly": OS.DRAGONFLYBSD,
+        b"freebsd": OS.FREEBSD,
+        b"hurd": OS.HURD,
+        b"illumos": OS.ILLUMOS,
+        b"linux": OS.LINUX,
+        b"netbsd": OS.NETBSD,
+        b"openbsd": OS.OPENBSD,
+        b"solaris": OS.SOLARIS,
+        b"zos": OS.ZOS,
+        b"windows": None,  # PE format
+        b"plan9": None,  # a.out format
+        b"ios": None,  # Mach-O format
+        b"darwin": None,  # Mach-O format
+        b"nacl": None,  # dropped in GO 1.14
+        b"js": None,
+    }
+
+    if has_inline_strings:
+        # This is the common case/path. Most samples will have an inline GOOS string.
+        #
+        # To find samples on VT, use these VTGrep searches:
+        #
+        #   content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 02}
+        #   content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 02}
+
+        # If present, the GOOS key will be found within
+        # the current buildinfo data region.
+        #
+        # Brute force the k-v pair, like `GOOS=linux`,
+        # rather than try to parse the data, which would be fragile.
+        for key, os in GOOS_TO_OS.items():
+            if (b"GOOS=" + key) in buf:
+                logger.debug("go buildinfo: found os: %s", os)
+                return os
+    else:
+        # This is the uncommon path. Most samples will have an inline GOOS string.
+        #
+        # To find samples on VT, use the referenced VTGrep content searches.
+        info_format = {
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
+            # like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
+            # in which the modinfo doesn't have GOOS.
+            (4, False): "<II",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
+            # like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
+            # in which the modinfo doesn't have GOOS.
+            (8, False): "<QQ",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
+            # (no matches on VT today)
+            (4, True): ">II",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
+            # like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
+            # in which the modinfo doesn't have GOOS.
+            (8, True): ">QQ",
+        }
+
+        build_version_address, modinfo_address = struct.unpack_from(
+            info_format[(psize, is_big_endian)], buf, index + 0x10
+        )
+        logger.debug("go buildinfo: build version address: 0x%x", build_version_address)
+        logger.debug("go buildinfo: modinfo address: 0x%x", modinfo_address)
+
+        build_version = read_go_slice(elf, build_version_address)
+        if build_version:
+            logger.debug("go buildinfo: build version: %s", build_version.decode("utf-8"))
+
+        modinfo = read_go_slice(elf, modinfo_address)
+        if modinfo:
+            if modinfo[-0x11] == ord("\n"):
+                # Strip module framing: sentinel strings delimiting the module info.
+                # These are cmd/go/internal/modload/build.infoStart and infoEnd.
+                # Which should probably be:
+                # 	infoStart, _ = hex.DecodeString("3077af0c9274080241e1c107e6d618e6")
+                #   infoEnd, _   = hex.DecodeString("f932433186182072008242104116d8f2")
+                modinfo = modinfo[0x10:-0x10]
+            logger.debug("go buildinfo: modinfo: %s", modinfo.decode("utf-8"))
+
+        if not modinfo:
+            return None
+
+        for key, os in GOOS_TO_OS.items():
+            # Brute force the k-v pair, like `GOOS=linux`,
+            # rather than try to parse the data, which would be fragile.
+            if (b"GOOS=" + key) in modinfo:
+                logger.debug("go buildinfo: found os: %s", os)
+                return os
+
+    return None
+
+
+def guess_os_from_go_source(elf: ELF) -> Optional[OS]:
+    """
+    In a binary compiled by Go, runtime metadata may contain
+    references to the source filenames, including the
+    src/runtime/os_* files, whose name indicates the
+    target operating system.
+
+    Confirm the given ELF seems to be built by Go,
+    and then look for strings that look like
+    Go source filenames.
+
+    This strategy is derived from GoReSym.
+    """
+    if not is_go_binary(elf):
+        return None
+
+    for phdr in elf.program_headers:
+        buf = phdr.buf
+        NEEDLE_OS = b"/src/runtime/os_"
+        try:
+            index = buf.index(NEEDLE_OS)
+        except ValueError:
+            continue
+
+        rest = buf[index + len(NEEDLE_OS) : index + len(NEEDLE_OS) + 32]
+        filename = rest.partition(b".go")[0].decode("utf-8")
+        logger.debug("go source: filename: /src/runtime/os_%s.go", filename)
+
+        # via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
+        # candidates today:
+        #   - aix
+        #   - android
+        #   - darwin
+        #   - darwin_arm64
+        #   - dragonfly
+        #   - freebsd
+        #   - freebsd2
+        #   - freebsd_amd64
+        #   - freebsd_arm
+        #   - freebsd_arm64
+        #   - freebsd_noauxv
+        #   - freebsd_riscv64
+        #   - illumos
+        #   - js
+        #   - linux
+        #   - linux_arm
+        #   - linux_arm64
+        #   - linux_be64
+        #   - linux_generic
+        #   - linux_loong64
+        #   - linux_mips64x
+        #   - linux_mipsx
+        #   - linux_noauxv
+        #   - linux_novdso
+        #   - linux_ppc64x
+        #   - linux_riscv64
+        #   - linux_s390x
+        #   - linux_x86
+        #   - netbsd
+        #   - netbsd_386
+        #   - netbsd_amd64
+        #   - netbsd_arm
+        #   - netbsd_arm64
+        #   - nonopenbsd
+        #   - only_solaris
+        #   - openbsd
+        #   - openbsd_arm
+        #   - openbsd_arm64
+        #   - openbsd_libc
+        #   - openbsd_mips64
+        #   - openbsd_syscall
+        #   - openbsd_syscall1
+        #   - openbsd_syscall2
+        #   - plan9
+        #   - plan9_arm
+        #   - solaris
+        #   - unix
+        #   - unix_nonlinux
+        #   - wasip1
+        #   - wasm
+        #   - windows
+        #   - windows_arm
+        #   - windows_arm64
+
+        OS_FILENAME_TO_OS = {
+            "aix": OS.AIX,
+            "android": OS.ANDROID,
+            "dragonfly": OS.DRAGONFLYBSD,
+            "freebsd": OS.FREEBSD,
+            "freebsd2": OS.FREEBSD,
+            "freebsd_": OS.FREEBSD,
+            "illumos": OS.ILLUMOS,
+            "linux": OS.LINUX,
+            "netbsd": OS.NETBSD,
+            "only_solaris": OS.SOLARIS,
+            "openbsd": OS.OPENBSD,
+            "solaris": OS.SOLARIS,
+            "unix_nonlinux": OS.UNIX,
+        }
+
+        for prefix, os in OS_FILENAME_TO_OS.items():
+            if filename.startswith(prefix):
+                return os
+
+    for phdr in elf.program_headers:
+        buf = phdr.buf
+        NEEDLE_RT0 = b"/src/runtime/rt0_"
+        try:
+            index = buf.index(NEEDLE_RT0)
+        except ValueError:
+            continue
+
+        rest = buf[index + len(NEEDLE_RT0) : index + len(NEEDLE_RT0) + 32]
+        filename = rest.partition(b".s")[0].decode("utf-8")
+        logger.debug("go source: filename: /src/runtime/rt0_%s.s", filename)
+
+        # via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
+        # candidates today:
+        #   - aix_ppc64
+        #   - android_386
+        #   - android_amd64
+        #   - android_arm
+        #   - android_arm64
+        #   - darwin_amd64
+        #   - darwin_arm64
+        #   - dragonfly_amd64
+        #   - freebsd_386
+        #   - freebsd_amd64
+        #   - freebsd_arm
+        #   - freebsd_arm64
+        #   - freebsd_riscv64
+        #   - illumos_amd64
+        #   - ios_amd64
+        #   - ios_arm64
+        #   - js_wasm
+        #   - linux_386
+        #   - linux_amd64
+        #   - linux_arm
+        #   - linux_arm64
+        #   - linux_loong64
+        #   - linux_mips64x
+        #   - linux_mipsx
+        #   - linux_ppc64
+        #   - linux_ppc64le
+        #   - linux_riscv64
+        #   - linux_s390x
+        #   - netbsd_386
+        #   - netbsd_amd64
+        #   - netbsd_arm
+        #   - netbsd_arm64
+        #   - openbsd_386
+        #   - openbsd_amd64
+        #   - openbsd_arm
+        #   - openbsd_arm64
+        #   - openbsd_mips64
+        #   - openbsd_ppc64
+        #   - openbsd_riscv64
+        #   - plan9_386
+        #   - plan9_amd64
+        #   - plan9_arm
+        #   - solaris_amd64
+        #   - wasip1_wasm
+        #   - windows_386
+        #   - windows_amd64
+        #   - windows_arm
+        #   - windows_arm64
+
+        RT0_FILENAME_TO_OS = {
+            "aix": OS.AIX,
+            "android": OS.ANDROID,
+            "dragonfly": OS.DRAGONFLYBSD,
+            "freebsd": OS.FREEBSD,
+            "illumos": OS.ILLUMOS,
+            "linux": OS.LINUX,
+            "netbsd": OS.NETBSD,
+            "openbsd": OS.OPENBSD,
+            "solaris": OS.SOLARIS,
+        }
+
+        for prefix, os in RT0_FILENAME_TO_OS.items():
+            if filename.startswith(prefix):
+                return os
+
+    return None
+
+
+def guess_os_from_vdso_strings(elf: ELF) -> Optional[OS]:
+    """
+    The "vDSO" (virtual dynamic shared object) is a small shared
+    library that the kernel automatically maps into the address space
+    of all user-space applications.
+
+    Some statically linked executables include small dynamic linker
+    routines that finds these vDSO symbols, using the ASCII
+    symbol name and version. We can therefore recognize the pairs
+    (symbol, version) to guess the binary targets Linux.
+    """
+    for phdr in elf.program_headers:
+        buf = phdr.buf
+
+        # We don't really use the arch, but its interesting for documentation
+        # I suppose we could restrict the arch here to what's in the ELF header,
+        # but that's even more work. Let's see if this is sufficient.
+        for arch, symbol, version in (
+            # via: https://man7.org/linux/man-pages/man7/vdso.7.html
+            ("arm", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("arm", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("aarch64", b"__kernel_rt_sigreturn", b"LINUX_2.6.39"),
+            ("aarch64", b"__kernel_gettimeofday", b"LINUX_2.6.39"),
+            ("aarch64", b"__kernel_clock_gettime", b"LINUX_2.6.39"),
+            ("aarch64", b"__kernel_clock_getres", b"LINUX_2.6.39"),
+            ("mips", b"__kernel_gettimeofday", b"LINUX_2.6"),
+            ("mips", b"__kernel_clock_gettime", b"LINUX_2.6"),
+            ("ia64", b"__kernel_sigtramp", b"LINUX_2.5"),
+            ("ia64", b"__kernel_syscall_via_break", b"LINUX_2.5"),
+            ("ia64", b"__kernel_syscall_via_epc", b"LINUX_2.5"),
+            ("ppc/32", b"__kernel_clock_getres", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_clock_gettime64", b"LINUX_5.11"),
+            ("ppc/32", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_getcpu", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sigtramp_rt32", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sigtramp32", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_clock_getres", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_getcpu", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_sigtramp_rt64", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
+            ("riscv", b"__vdso_rt_sigreturn", b"LINUX_4.15"),
+            ("riscv", b"__vdso_gettimeofday", b"LINUX_4.15"),
+            ("riscv", b"__vdso_clock_gettime", b"LINUX_4.15"),
+            ("riscv", b"__vdso_clock_getres", b"LINUX_4.15"),
+            ("riscv", b"__vdso_getcpu", b"LINUX_4.15"),
+            ("riscv", b"__vdso_flush_icache", b"LINUX_4.15"),
+            ("s390", b"__kernel_clock_getres", b"LINUX_2.6.29"),
+            ("s390", b"__kernel_clock_gettime", b"LINUX_2.6.29"),
+            ("s390", b"__kernel_gettimeofday", b"LINUX_2.6.29"),
+            ("superh", b"__kernel_rt_sigreturn", b"LINUX_2.6"),
+            ("superh", b"__kernel_sigreturn", b"LINUX_2.6"),
+            ("superh", b"__kernel_vsyscall", b"LINUX_2.6"),
+            ("i386", b"__kernel_sigreturn", b"LINUX_2.5"),
+            ("i386", b"__kernel_rt_sigreturn", b"LINUX_2.5"),
+            ("i386", b"__kernel_vsyscall", b"LINUX_2.5"),
+            ("i386", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("i386", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("i386", b"__vdso_time", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_getcpu", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_time", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_getcpu", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_time", b"LINUX_2.6"),
+        ):
+            if symbol in buf and version in buf:
+                logger.debug("vdso string: %s %s %s", arch, symbol.decode("ascii"), version.decode("ascii"))
+                return OS.LINUX
+
+    return None
+
+
 def detect_elf_os(f) -> str:
    """
    f: type Union[BinaryIO, IDAIO, GHIDRAIO]
@@ -1023,6 +1523,27 @@ def detect_elf_os(f) -> str:
        logger.warning("Error guessing OS from symbol table: %s", e)
        symtab_guess = None

+    try:
+        goos_guess = guess_os_from_go_buildinfo(elf)
+        logger.debug("guess: Go buildinfo: %s", goos_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from Go buildinfo: %s", e)
+        goos_guess = None
+
+    try:
+        gosrc_guess = guess_os_from_go_source(elf)
+        logger.debug("guess: Go source: %s", gosrc_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from Go source path: %s", e)
+        gosrc_guess = None
+
+    try:
+        vdso_guess = guess_os_from_vdso_strings(elf)
+        logger.debug("guess: vdso strings: %s", vdso_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from vdso strings: %s", e)
+        symtab_guess = None
+
    ret = None

    if osabi_guess:
@@ -1046,11 +1567,24 @@ def detect_elf_os(f) -> str:
    elif symtab_guess:
        ret = symtab_guess

+    elif goos_guess:
+        ret = goos_guess
+
+    elif gosrc_guess:
+        # prefer goos_guess to this method,
+        # which is just string interpretation.
+        ret = gosrc_guess
+
    elif ident_guess:
        # at the bottom because we don't trust this too much
        # due to potential for bugs with cross-compilation.
        ret = ident_guess

+    elif vdso_guess:
+        # at the bottom because this is just scanning strings,
+        # which isn't very authoritative.
+        ret = vdso_guess
+
    return ret.value if ret is not None else "unknown"


--- a/capa/features/extractors/elffile.py
+++ b/capa/features/extractors/elffile.py
@@ -10,8 +10,7 @@ import logging
 from typing import Tuple, Iterator
 from pathlib import Path

-from elftools.elf.elffile import ELFFile, SymbolTableSection
-from elftools.elf.relocation import RelocationSection
+from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection

 import capa.features.extractors.common
 from capa.features.file import Export, Import, Section
@@ -47,17 +46,37 @@ def extract_file_export_names(elf: ELFFile, **kwargs):

            yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)

+    for segment in elf.iter_segments():
+        if not isinstance(segment, DynamicSegment):
+            continue
+
+        logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
+
+        for symbol in segment.iter_symbols():
+            # The following conditions are based on the following article
+            # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
+            if not symbol.name:
+                continue
+            if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
+                continue
+            if symbol.entry.st_value == 0:
+                continue
+            if symbol.entry.st_shndx == "SHN_UNDEF":
+                continue
+
+            yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
+

 def extract_file_import_names(elf: ELFFile, **kwargs):
    # Create a dictionary to store symbol names by their index
    symbol_names = {}

    # Extract symbol names and store them in the dictionary
-    for section in elf.iter_sections():
-        if not isinstance(section, SymbolTableSection):
+    for segment in elf.iter_segments():
+        if not isinstance(segment, DynamicSegment):
            continue

-        for _, symbol in enumerate(section.iter_symbols()):
+        for _, symbol in enumerate(segment.iter_symbols()):
            # The following conditions are based on the following article
            # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
            if not symbol.name:
@@ -73,21 +92,19 @@ def extract_file_import_names(elf: ELFFile, **kwargs):

            symbol_names[_] = symbol.name

-    for section in elf.iter_sections():
-        if not isinstance(section, RelocationSection):
+    for segment in elf.iter_segments():
+        if not isinstance(segment, DynamicSegment):
            continue

-        if section["sh_entsize"] == 0:
-            logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
-            continue
+        relocation_tables = segment.get_relocation_tables()
+        logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))

-        logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())
-
-        for relocation in section.iter_relocations():
-            # Extract the symbol name from the symbol table using the symbol index in the relocation
-            if relocation["r_info_sym"] not in symbol_names:
-                continue
-            yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
+        for relocation_table in relocation_tables.values():
+            for relocation in relocation_table.iter_relocations():
+                # Extract the symbol name from the symbol table using the symbol index in the relocation
+                if relocation["r_info_sym"] not in symbol_names:
+                    continue
+                yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])


 def extract_file_section_names(elf: ELFFile, **kwargs):
--- a/capa/loader.py
+++ b/capa/loader.py
@@ -8,6 +8,7 @@
 import sys
 import logging
 import datetime
+import contextlib
 from typing import Set, Dict, List, Optional
 from pathlib import Path

@@ -154,6 +155,18 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):

    viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])

+    with contextlib.suppress(Exception):
+        # unfortuately viv raises a raw Exception (not any subclass).
+        # This happens when the module isn't found, such as with a viv upgrade.
+        #
+        # Remove the symbolic switch case solver.
+        # This is only enabled for ELF files, not PE files.
+        # During the following performance investigation, this analysis module
+        # had some terrible worst-case behavior.
+        # We can put up with slightly worse CFG reconstruction in order to avoid this.
+        # https://github.com/mandiant/capa/issues/1989#issuecomment-1948022767
+        vw.delFuncAnalysisModule("vivisect.analysis.generic.symswitchcase")
+
    vw.analyze()

    logger.debug("%s", get_meta_str(vw))
--- a/capa/version.py
+++ b/capa/version.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-__version__ = "7.0.1"
+__version__ = "7.1.0"


 def get_major_version():
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -107,7 +107,8 @@ We use the following tools to ensure consistent code style and formatting:

 To install these development dependencies, run:

-`$ pip install -e /local/path/to/src[dev]`
+- `$ pip install -e /local/path/to/src[dev]` or
+- `$ pip install -e /local/path/to/src[dev,scripts]` to also install all script dependencies

 We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same linters & configuration locally as in CI.

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,9 +101,7 @@ dependencies = [
    # https://github.com/mandiant/capa/issues/1966
    "networkx>=3,<3.2",

-    # TODO(williballenthin): dnfile 0.15 changes UserString API and we havent updated yet.
-    # https://github.com/mandiant/capa/pull/2037
-    "dnfile>=0.14.1,<0.15",
+    "dnfile>=0.15.0",
 ]
 dynamic = ["version"]

@@ -136,14 +134,12 @@ dev = [
    "flake8-simplify==0.21.0",
    "flake8-use-pathlib==0.3.0",
    "flake8-copyright==0.2.4",
-    "ruff==0.4.7",
+    "ruff==0.4.8",
    "black==24.4.2",
    "isort==5.13.2",
    "mypy==1.10.0",
-    "psutil==5.9.2",
-    "stix2==3.0.1",
-    "requests==2.31.0",
    "mypy-protobuf==3.6.0",
+    "PyGithub==2.3.0",
    # type stubs for mypy
    "types-backports==0.1.3",
    "types-colorama==0.4.15.11",
@@ -164,6 +160,13 @@ build = [
    "setuptools==70.0.0",
    "build==1.2.1"
 ]
+scripts = [
+    "jschema_to_python==1.2.3",
+    "psutil==5.9.2",
+    "stix2==3.0.1",
+    "sarif_om==1.0.4",
+    "requests==2.31.0",
+]

 [tool.deptry]
 extend_exclude = [
@@ -213,6 +216,7 @@ DEP002 = [
    "mypy",
    "mypy-protobuf",
    "pre-commit",
+    "PyGithub",
    "pyinstaller",
    "pytest",
    "pytest-cov",
@@ -236,6 +240,9 @@ DEP003 = [
    "typing_extensions" # TODO(s-ff): remove when Python 3.9 is deprecated, see #1699
 ]

+[tool.deptry.package_module_name_map]
+PyGithub = "github"
+
 [project.urls]
 Homepage = "https://github.com/mandiant/capa"
 Repository = "https://github.com/mandiant/capa.git"
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,7 +30,7 @@ pydantic==2.7.3
 pydantic-core==2.18.4
 pyelftools==0.31
 pygments==2.18.0
-python-flirt==0.8.6
+python-flirt==0.8.10
 pyyaml==6.0.1
 rich==13.7.1
 ruamel-yaml==0.18.6
@@ -41,6 +41,6 @@ sortedcontainers==2.4.0
 tabulate==0.9.0
 termcolor==2.4.0
 tqdm==4.66.4
-viv-utils==0.7.9
+viv-utils==0.7.11
 vivisect==1.1.1
 wcwidth==0.2.13
--- a/2
+++ b/2
--- a/scripts/capa2sarif.py
+++ b/scripts/capa2sarif.py
@@ -72,9 +72,7 @@ def _parse_args() -> argparse.Namespace:
        help="Compatibility for Radare r2sarif plugin v2.0",
    )
    parser.add_argument("-t", "--tag", help="Filter on rule meta field values (ruleid)")
-    parser.add_argument(
-        "--version", action="version", version=f"%(prog)s {__version__}"
-    )
+    parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")

    return parser.parse_args()

@@ -89,24 +87,18 @@ def main() -> int:
        with Path(args.capa_output).open() as capa_output:
            json_data = json.load(capa_output)
    except ValueError:
-        logger.error(
-            "Input data was not valid JSON, input should be a capa json output file."
-        )
+        logger.error("Input data was not valid JSON, input should be a capa json output file.")
        return -1
    except json.JSONDecodeError:
        # An exception has occured
-        logger.error(
-            "Input data was not valid JSON, input should be a capa json output file."
-        )
+        logger.error("Input data was not valid JSON, input should be a capa json output file.")
        return -2

    # Marshall json into Sarif
    # Create baseline sarif structure to be populated from json data
-    sarif_structure: Optional[dict] = _sarif_boilerplate(
-        json_data["meta"], json_data["rules"]
-    )
+    sarif_structure: Optional[dict] = _sarif_boilerplate(json_data["meta"], json_data["rules"])
    if sarif_structure is None:
-        logger.errort("An Error has occured creating default sarif structure.")
+        logger.error("An Error has occured creating default sarif structure.")
        return -3

    _populate_artifact(sarif_structure, json_data["meta"])
@@ -120,9 +112,7 @@ def main() -> int:

        # artifacts must include a description as well with a text field.
        if "artifacts" in sarif_structure["runs"][0]:
-            sarif_structure["runs"][0]["artifacts"][0]["description"] = {
-                "text": "placeholder"
-            }
+            sarif_structure["runs"][0]["artifacts"][0]["description"] = {"text": "placeholder"}

        # For better compliance with Ghidra table. Iteraction through properties['additionalProperties']
        """
@@ -170,13 +160,9 @@ def _sarif_boilerplate(data_meta: dict, data_rules: dict) -> Optional[dict]:
                "id": id,
                "name": data_rules[key]["meta"]["name"],
                "shortDescription": {"text": data_rules[key]["meta"]["name"]},
-                "messageStrings": {
-                    "default": {"text": data_rules[key]["meta"]["name"]}
-                },
+                "messageStrings": {"default": {"text": data_rules[key]["meta"]["name"]}},
                "properties": {
-                    "namespace": data_rules[key]["meta"]["namespace"]
-                    if "namespace" in data_rules[key]["meta"]
-                    else [],
+                    "namespace": data_rules[key]["meta"]["namespace"] if "namespace" in data_rules[key]["meta"] else [],
                    "scopes": data_rules[key]["meta"]["scopes"],
                    "references": data_rules[key]["meta"]["references"],
                    "lib": data_rules[key]["meta"]["lib"],
@@ -258,39 +244,36 @@ def _populate_invocations(sarif_log: dict, meta_data: dict) -> None:

 def _enumerate_evidence(node: dict, related_count: int) -> List[dict]:
    related_locations = []
-    if node.get("success") and node.get("node").get("type") != "statement":
+    if node.get("success") and node.get("node", {}).get("type") != "statement":
        label = ""
-        if node.get("node").get("type") == "feature":
-            if node.get("node").get("feature").get("type") == "api":
-                label = "api: " + node.get("node").get("feature").get("api")
-            elif node.get("node").get("feature").get("type") == "match":
-                label = "match: " + node.get("node").get("feature").get("match")
-            elif node.get("node").get("feature").get("type") == "number":
-                label = f"number: {node.get('node').get('feature').get('description')} ({node.get('node').get('feature').get('number')})"
-            elif node.get("node").get("feature").get("type") == "offset":
-                label = f"offset: {node.get('node').get('feature').get('description')} ({node.get('node').get('feature').get('offset')})"
-            elif node.get("node").get("feature").get("type") == "mnemonic":
-                label = f"mnemonic: {node.get('node').get('feature').get('mnemonic')}"
-            elif node.get("node").get("feature").get("type") == "characteristic":
-                label = f"characteristic: {node.get('node').get('feature').get('characteristic')}"
-            elif node.get("node").get("feature").get("type") == "os":
-                label = f"os: {node.get('node').get('feature').get('os')}"
-            elif node.get("node").get("feature").get("type") == "operand number":
-                label = f"operand: ({node.get('node').get('feature').get('index')} ) {node.get('node').get('feature').get('description')} ({node.get('node').get('feature').get('operand_number')})"
+        if node.get("node", {}).get("type") == "feature":
+            if node.get("node", {}).get("feature", {}).get("type") == "api":
+                label = "api: " + node.get("node", {}).get("feature", {}).get("api")
+            elif node.get("node", {}).get("feature", {}).get("type") == "match":
+                label = "match: " + node.get("node", {}).get("feature", {}).get("match")
+            elif node.get("node", {}).get("feature", {}).get("type") == "number":
+                label = f"number: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('number')})"
+            elif node.get("node", {}).get("feature", {}).get("type") == "offset":
+                label = f"offset: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('offset')})"
+            elif node.get("node", {}).get("feature", {}).get("type") == "mnemonic":
+                label = f"mnemonic: {node.get('node', {}).get('feature', {}).get('mnemonic')}"
+            elif node.get("node", {}).get("feature", {}).get("type") == "characteristic":
+                label = f"characteristic: {node.get('node', {}).get('feature', {}).get('characteristic')}"
+            elif node.get("node", {}).get("feature", {}).get("type") == "os":
+                label = f"os: {node.get('node', {}).get('feature', {}).get('os')}"
+            elif node.get("node", {}).get("feature", {}).get("type") == "operand number":
+                label = f"operand: ({node.get('node', {}).get('feature', {}).get('index')} ) {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('operand_number')})"
            else:
                logger.error(
                    "Not implemented %s",
-                    node.get("node").get("feature").get("type"),
-                    file=sys.stderr,
+                    node.get("node", {}).get("feature", {}).get("type"),
                )
                return []
        else:
-            logger.error(
-                "Not implemented %s", node.get("node").get("type"), file=sys.stderr
-            )
+            logger.error("Not implemented %s", node.get("node", {}).get("type"))
            return []

-        for loc in node.get("locations"):
+        for loc in node.get("locations", []):
            if loc["type"] != "absolute":
                continue

@@ -303,8 +286,8 @@ def _enumerate_evidence(node: dict, related_count: int) -> List[dict]:
            )
            related_count += 1

-    if node.get("success") and node.get("node").get("type") == "statement":
-        for child in node.get("children"):
+    if node.get("success") and node.get("node", {}).get("type") == "statement":
+        for child in node.get("children", []):
            related_locations += _enumerate_evidence(child, related_count)

    return related_locations
--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -389,6 +389,8 @@ def get_data_path_by_name(name) -> Path:
        return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
    elif name.startswith("1038a2"):
        return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
+    elif name.startswith("3da7c"):
+        return CD / "data" / "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_"
    elif name.startswith("nested_typedef"):
        return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_"
    elif name.startswith("nested_typeref"):
--- a/tests/test_elffile_features.py
+++ b/tests/test_elffile_features.py
@@ -14,17 +14,11 @@ from capa.features.extractors.elffile import extract_file_export_names, extract_

 CD = Path(__file__).resolve().parent
 SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"
+STRIPPED_SAMPLE_PATH = CD / "data" / "bb38149ff4b5c95722b83f24ca27a42b.elf_"


-def test_elffile_import_features():
-    expected_imports = [
-        "memfrob",
-        "puts",
-        "__libc_start_main",
-        "malloc",
-        "__cxa_finalize",
-    ]
-    path = Path(SAMPLE_PATH)
+def check_import_features(sample_path, expected_imports):
+    path = Path(sample_path)
    elf = ELFFile(io.BytesIO(path.read_bytes()))
    # Extract imports
    imports = list(extract_file_import_names(elf))
@@ -40,6 +34,52 @@ def test_elffile_import_features():
        assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."


+def check_export_features(sample_path, expected_exports):
+    path = Path(sample_path)
+    elf = ELFFile(io.BytesIO(path.read_bytes()))
+    # Extract imports
+    exports = list(extract_file_export_names(elf))
+
+    # Verify that at least one export was found
+    assert len(exports) > 0, "No exports were found."
+
+    # Extract the symbol names from the extracted imports
+    extracted_symbol_names = [exported[0].value for exported in exports]
+
+    # Check if all expected symbol names are found
+    for symbol_name in expected_exports:
+        assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
+
+
+def test_stripped_elffile_import_features():
+    expected_imports = ["__cxa_atexit", "__cxa_finalize", "__stack_chk_fail", "fclose", "fopen", "__android_log_print"]
+    check_import_features(STRIPPED_SAMPLE_PATH, expected_imports)
+
+
+def test_stripped_elffile_export_features():
+    expected_exports = [
+        "_ZN7_JNIEnv14GetArrayLengthEP7_jarray",
+        "Java_o_ac_a",
+        "Java_o_ac_b",
+        "_Z6existsPKc",
+        "_ZN7_JNIEnv17GetStringUTFCharsEP8_jstringPh",
+        "_ZN7_JNIEnv21GetObjectArrayElementEP13_jobjectArrayi",
+        "_ZN7_JNIEnv21ReleaseStringUTFCharsEP8_jstringPKc",
+    ]
+    check_export_features(STRIPPED_SAMPLE_PATH, expected_exports)
+
+
+def test_elffile_import_features():
+    expected_imports = [
+        "memfrob",
+        "puts",
+        "__libc_start_main",
+        "malloc",
+        "__cxa_finalize",
+    ]
+    check_import_features(SAMPLE_PATH, expected_imports)
+
+
 def test_elffile_export_features():
    expected_exports = [
        "deregister_tm_clones",
@@ -55,17 +95,4 @@ def test_elffile_export_features():
        "_IO_stdin_used",
        "__libc_csu_init",
    ]
-    path = Path(SAMPLE_PATH)
-    elf = ELFFile(io.BytesIO(path.read_bytes()))
-    # Extract imports
-    exports = list(extract_file_export_names(elf))
-
-    # Verify that at least one export was found
-    assert len(exports) > 0, "No exports were found."
-
-    # Extract the symbol names from the extracted imports
-    extracted_symbol_names = [exported[0].value for exported in exports]
-
-    # Check if all expected symbol names are found
-    for symbol_name in expected_exports:
-        assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
+    check_export_features(SAMPLE_PATH, expected_exports)
--- a/tests/test_os_detection.py
+++ b/tests/test_os_detection.py
@@ -92,6 +92,12 @@ def test_elf_android_notes():
        assert capa.features.extractors.elf.detect_elf_os(f) == "android"


+def test_elf_go_buildinfo():
+    path = get_data_path_by_name("3da7c")
+    with Path(path).open("rb") as f:
+        assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
+
+
 def test_elf_parse_capa_pyinstaller_header():
    # error after misidentified large pydata section with address 0; fixed in #1454
    # compressed ELF header of capa-v5.1.0-linux
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -40,7 +40,10 @@ def get_rule_path():
    [
        pytest.param("capa2yara.py", [get_rules_path()]),
        pytest.param("capafmt.py", [get_rule_path()]),
-        pytest.param("capa2sarif.py", [Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"]),
+        pytest.param(
+            "capa2sarif.py",
+            [Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"],
+        ),
        # testing some variations of linter script
        pytest.param("lint.py", ["-t", "create directory", get_rules_path()]),
        # `create directory` rule has native and .NET example PEs