merge upstream

2025-12-05 20:40:05 -08:00 · 2024-06-18 10:57:57 -06:00
parent ac6924552b 1360e08389
commit 08cf8f8d03
20 changed files with 1185 additions and 88 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -51,7 +51,9 @@ jobs:
      - name: Upgrade pip, setuptools
        run: python -m pip install --upgrade pip setuptools
      - name: Install capa with build requirements
-        run: pip install -e .[build]
+        run: |
+          pip install -r requirements.txt
+          pip install -e .[build]
      - name: Build standalone executable
        run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec
      - name: Does it run (PE)?
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -25,6 +25,7 @@ jobs:
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
+          pip install -r requirements.txt
          pip install -e .[build]
      - name: build package
        run: |
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -35,7 +35,9 @@ jobs:
      with:
        python-version: "3.11"
    - name: Install dependencies
-      run: pip install -e .[dev]
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: Lint with ruff
      run: pre-commit run ruff
    - name: Lint with isort
@@ -61,7 +63,9 @@ jobs:
      with:
        python-version: "3.11"
    - name: Install capa
-      run: pip install -e .[dev]
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: Run rule linter
      run: python scripts/lint.py rules/

@@ -96,7 +100,9 @@ jobs:
      if: matrix.os == 'ubuntu-20.04'
      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
-      run: pip install -e .[dev]
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: Run tests (fast)
      # this set of tests runs about 80% of the cases in 20% of the time,
      # and should catch most errors quickly.
@@ -131,7 +137,9 @@ jobs:
      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
      if: ${{ env.BN_SERIAL != 0 }}
-      run: pip install -e .[dev]
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: install Binary Ninja
      if: ${{ env.BN_SERIAL != 0 }}
      run: |
@@ -188,7 +196,9 @@ jobs:
    - name: Install pyyaml
      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
-      run: pip install -e .[dev] 
+      run: |
+        pip install -r requirements.txt
+        pip install -e .[dev,scripts]
    - name: Run tests
      run: | 
        mkdir ./.github/ghidra/project
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -110,6 +110,16 @@ repos:
        always_run: true
        pass_filenames: false

+-   repo: local
+    hooks:
+    -   id: deptry
+        name: deptry
+        stages: [push, manual]
+        language: system
+        entry: deptry .
+        always_run: true
+        pass_filenames: false
+
 -   repo: local
    hooks:
    -   id: pytest-fast
@@ -128,12 +138,3 @@ repos:
        always_run: true
        pass_filenames: false

-   repo: local
-    hooks:
-    -   id: deptry
-        name: deptry
-        stages: [push, manual]
-        language: system
-        entry: deptry .
-        always_run: true
-        pass_filenames: false
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,20 +1,55 @@
 # Change Log

 ## master (unreleased)
- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls

 ### New Features

+### Breaking Changes
+
+### New Rules (0)
+
+-
+
+### Bug Fixes
+
+### capa explorer IDA Pro plugin
+
+### Development
+
+### Raw diffs
+- [capa v7.1.0...master](https://github.com/mandiant/capa/compare/v7.1.0...master)
+- [capa-rules v7.1.0...master](https://github.com/mandiant/capa-rules/compare/v7.1.0...master)
+
+## v7.1.0
+The v7.1.0 release brings large performance improvements to capa's rule matching engine.
+Additionally, we've fixed various bugs and added new features for people using and developing capa.
+
+Special thanks to our repeat and new contributors:
+* @sjha2048 made their first contribution in https://github.com/mandiant/capa/pull/2000
+* @Rohit1123 made their first contribution in https://github.com/mandiant/capa/pull/1990
+* @psahithireddy made their first contribution in https://github.com/mandiant/capa/pull/2020
+* @Atlas-64 made their first contribution in https://github.com/mandiant/capa/pull/2018
+* @s-ff made their first contribution in https://github.com/mandiant/capa/pull/2011
+* @samadpls made their first contribution in https://github.com/mandiant/capa/pull/2024
+* @acelynnzhang made their first contribution in https://github.com/mandiant/capa/pull/2044
+* @RainRat made their first contribution in https://github.com/mandiant/capa/pull/2058
+* @ReversingWithMe made their first contribution in https://github.com/mandiant/capa/pull/2093
+* @malwarefrank made their first contribution in https://github.com/mandiant/capa/pull/2037
+
+### New Features
+- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls
+- ELF: detect OS from statically-linked Go binaries #1978 @williballenthin
 - add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
 - document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
+- Add json to sarif conversion script @reversingwithme
 - render maec/* fields #843 @s-ff
 - replace Halo spinner with Rich #2086 @s-ff
 - optimize rule matching #2080 @williballenthin
+- add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin
+- relax dependency version requirements for the capa library #2053 @williballenthin
+- add scripts dependency group and update documentation #2145 @mr-tz

-### Breaking Changes
-
-
-### New Rules (17)
+### New Rules (25)

 - impact/wipe-disk/delete-drive-layout-via-ioctl william.ballenthin@mandiant.com
 - host-interaction/driver/interact-with-driver-via-ioctl moritz.raabe@mandiant.com
@@ -33,7 +68,14 @@
 - persistence/act-as-time-provider-dll jakub.jozwiak@mandiant.com
 - host-interaction/gui/window/hide/hide-graphical-window-from-taskbar jakub.jozwiak@mandiant.com
 - compiler/dart/compiled-with-dart jakub.jozwiak@mandiant.com
-
+- nursery/bypass-hidden-api-restrictions-via-jni-on-android mehunhoff@google.com
+- nursery/get-current-process-filesystem-mounts-on-linux mehunhoff@google.com
+- nursery/get-current-process-memory-mapping-on-linux mehunhoff@google.com
+- nursery/get-system-property-on-android mehunhoff@google.com
+- nursery/hook-routines-via-lsplant mehunhoff@google.com
+- nursery/load-packed-dex-via-jiagu-on-android mehunhoff@google.com
+- nursery/modify-api-blacklist-or-denylist-via-jni-on-android mehunhoff@google.com
+- nursery/truncate-file-on-linux mehunhoff@google.com

 ### Bug Fixes

@@ -43,7 +85,6 @@
 - cape: support more report formats #2035 @mr-tz
 - elf: extract import / export symbols from stripped binaries #2096 @ygasparis

-
 ### capa explorer IDA Pro plugin
 - replace deprecated IDA API find_binary with bin_search #1606 @s-ff

@@ -58,8 +99,8 @@
 - add deptry support #1497 @s-ff

 ### Raw diffs
- [capa v7.0.1...master](https://github.com/mandiant/capa/compare/v7.0.1...master)
- [capa-rules v7.0.1...master](https://github.com/mandiant/capa-rules/compare/v7.0.1...master)
+- [capa v7.0.1...v7.1.0](https://github.com/mandiant/capa/compare/v7.0.1...v7.1.0)
+- [capa-rules v7.0.1...v7.1.0](https://github.com/mandiant/capa-rules/compare/v7.0.1...v7.1.0)

 ## v7.0.1

--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -409,9 +409,10 @@ class Bytes(Feature):
 # other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
 ARCH_I386 = "i386"
 ARCH_AMD64 = "amd64"
+ARCH_AARCH64 = "aarch64"
 # dotnet
 ARCH_ANY = "any"
-VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)
+VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_AARCH64, ARCH_ANY)


 class Arch(Feature):
--- a/capa/features/extractors/dnfile/helpers.py
+++ b/capa/features/extractors/dnfile/helpers.py
@@ -83,7 +83,7 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str
        return None

    try:
-        user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
+        user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get(token.rid)
    except UnicodeDecodeError as e:
        logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
        return None
@@ -119,14 +119,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
        access: Optional[str]

        # assume .NET imports starting with get_/set_ are used to access a property
-        if member_ref.Name.startswith("get_"):
+        member_ref_name: str = str(member_ref.Name)
+        if member_ref_name.startswith("get_"):
            access = FeatureAccess.READ
-        elif member_ref.Name.startswith("set_"):
+        elif member_ref_name.startswith("set_"):
            access = FeatureAccess.WRITE
        else:
            access = None

-        member_ref_name: str = member_ref.Name
        if member_ref_name.startswith(("get_", "set_")):
            # remove get_/set_ from MemberRef name
            member_ref_name = member_ref_name[4:]
@@ -212,7 +212,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
            token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
            access: Optional[str] = accessor_map.get(token)

-            method_name: str = method.row.Name
+            method_name: str = str(method.row.Name)
            if method_name.startswith(("get_", "set_")):
                # remove get_/set_
                method_name = method_name[4:]
@@ -289,8 +289,8 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
            logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
            module = ""
        else:
-            module = impl_map.ImportScope.row.Name
-        method: str = impl_map.ImportName
+            module = str(impl_map.ImportScope.row.Name)
+        method: str = str(impl_map.ImportName)

        member_forward_table: int
        if impl_map.MemberForwarded.table is None:
@@ -320,8 +320,11 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
    if row_index - 1 <= 0:
        return None

+    table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(table_index)
+    if table is None:
+        return None
+
    try:
-        table = pe.net.mdtables.tables.get(table_index, [])
        return table[row_index - 1]
    except IndexError:
        return None
@@ -334,7 +337,7 @@ def resolve_nested_typedef_name(

    if index in nested_class_table:
        typedef_name = []
-        name = typedef.TypeName
+        name = str(typedef.TypeName)

        # Append the current typedef name
        typedef_name.append(name)
@@ -343,24 +346,24 @@ def resolve_nested_typedef_name(
            # Iterate through the typedef table to resolve the nested name
            table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
            if table_row is None:
-                return typedef.TypeNamespace, tuple(typedef_name[::-1])
+                return str(typedef.TypeNamespace), tuple(typedef_name[::-1])

-            name = table_row.TypeName
+            name = str(table_row.TypeName)
            typedef_name.append(name)
            index = nested_class_table[index]

        # Document the root enclosing details
        table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
        if table_row is None:
-            return typedef.TypeNamespace, tuple(typedef_name[::-1])
+            return str(typedef.TypeNamespace), tuple(typedef_name[::-1])

-        enclosing_name = table_row.TypeName
+        enclosing_name = str(table_row.TypeName)
        typedef_name.append(enclosing_name)

-        return table_row.TypeNamespace, tuple(typedef_name[::-1])
+        return str(table_row.TypeNamespace), tuple(typedef_name[::-1])

    else:
-        return typedef.TypeNamespace, (typedef.TypeName,)
+        return str(typedef.TypeNamespace), (str(typedef.TypeName),)


 def resolve_nested_typeref_name(
@@ -370,29 +373,29 @@ def resolve_nested_typeref_name(
    # If the ResolutionScope decodes to a typeRef type then it is nested
    if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
        typeref_name = []
-        name = typeref.TypeName
+        name = str(typeref.TypeName)
        # Not appending the current typeref name to avoid potential duplicate

        # Validate index
        table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index)
        if table_row is None:
-            return typeref.TypeNamespace, (typeref.TypeName,)
+            return str(typeref.TypeNamespace), (str(typeref.TypeName),)

        while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef):
            # Iterate through the typeref table to resolve the nested name
            typeref_name.append(name)
-            name = table_row.TypeName
+            name = str(table_row.TypeName)
            table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index)
            if table_row is None:
-                return typeref.TypeNamespace, tuple(typeref_name[::-1])
+                return str(typeref.TypeNamespace), tuple(typeref_name[::-1])

        # Document the root enclosing details
-        typeref_name.append(table_row.TypeName)
+        typeref_name.append(str(table_row.TypeName))

-        return table_row.TypeNamespace, tuple(typeref_name[::-1])
+        return str(table_row.TypeNamespace), tuple(typeref_name[::-1])

    else:
-        return typeref.TypeNamespace, (typeref.TypeName,)
+        return str(typeref.TypeNamespace), (str(typeref.TypeName),)


 def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
--- a/capa/features/extractors/dotnetfile.py
+++ b/capa/features/extractors/dotnetfile.py
@@ -78,12 +78,12 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
    for _, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
        # emit internal .NET namespaces
        assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
-        namespaces.add(typedef.TypeNamespace)
+        namespaces.add(str(typedef.TypeNamespace))

    for _, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
        # emit external .NET namespaces
        assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
-        namespaces.add(typeref.TypeNamespace)
+        namespaces.add(str(typeref.TypeNamespace))

    # namespaces may be empty, discard
    namespaces.discard("")
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -58,6 +58,10 @@ class OS(str, Enum):
    SYLLABLE = "syllable"
    NACL = "nacl"
    ANDROID = "android"
+    DRAGONFLYBSD = "dragonfly BSD"
+    ILLUMOS = "illumos"
+    ZOS = "z/os"
+    UNIX = "unix"


 # via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
@@ -81,6 +85,8 @@ class Phdr:
    paddr: int
    filesz: int
    buf: bytes
+    flags: int
+    memsz: int


@dataclass
@@ -315,24 +321,23 @@ class ELF:
        phent_offset = i * self.e_phentsize
        phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize]

-        (p_type,) = struct.unpack_from(self.endian + "I", phent, 0x0)
-        logger.debug("ph:p_type: 0x%04x", p_type)
-
        if self.bitness == 32:
-            p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "IIII", phent, 0x4)
+            p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz, p_flags = struct.unpack_from(
+                self.endian + "IIIIIII", phent, 0x0
+            )
        elif self.bitness == 64:
-            p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "QQQQ", phent, 0x8)
+            p_type, p_flags, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz = struct.unpack_from(
+                self.endian + "IIQQQQQ", phent, 0x0
+            )
        else:
            raise NotImplementedError()

-        logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
-
        self.f.seek(p_offset)
        buf = self.f.read(p_filesz)
        if len(buf) != p_filesz:
            raise ValueError("failed to read program header content")

-        return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf)
+        return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf, p_flags, p_memsz)

    @property
    def program_headers(self):
@@ -357,8 +362,6 @@ class ELF:
        else:
            raise NotImplementedError()

-        logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size)
-
        self.f.seek(sh_offset)
        buf = self.f.read(sh_size)
        if len(buf) != sh_size:
@@ -867,6 +870,8 @@ def guess_os_from_ident_directive(elf: ELF) -> Optional[OS]:
            return OS.LINUX
        elif "Red Hat" in comment:
            return OS.LINUX
+        elif "Alpine" in comment:
+            return OS.LINUX
        elif "Android" in comment:
            return OS.ANDROID

@@ -952,11 +957,506 @@ def guess_os_from_symtab(elf: ELF) -> Optional[OS]:

        for os, hints in keywords.items():
            if any(hint in sym_name for hint in hints):
+                logger.debug("symtab: %s looks like %s", sym_name, os)
                return os

    return None


+def is_go_binary(elf: ELF) -> bool:
+    for shdr in elf.section_headers:
+        if shdr.get_name(elf) == ".note.go.buildid":
+            logger.debug("go buildinfo: found section .note.go.buildid")
+            return True
+
+    # The `go version` command enumerates sections for the name `.go.buildinfo`
+    # (in addition to looking for the BUILDINFO_MAGIC) to check if an executable is go or not.
+    # See references to the `errNotGoExe` error here:
+    # https://github.com/golang/go/blob/master/src/debug/buildinfo/buildinfo.go#L41
+    for shdr in elf.section_headers:
+        if shdr.get_name(elf) == ".go.buildinfo":
+            logger.debug("go buildinfo: found section .go.buildinfo")
+            return True
+
+    # other strategy used by FLOSS: search for known runtime strings.
+    # https://github.com/mandiant/flare-floss/blob/b2ca8adfc5edf278861dd6bff67d73da39683b46/floss/language/identify.py#L88
+    return False
+
+
+def get_go_buildinfo_data(elf: ELF) -> Optional[bytes]:
+    for shdr in elf.section_headers:
+        if shdr.get_name(elf) == ".go.buildinfo":
+            logger.debug("go buildinfo: found section .go.buildinfo")
+            return shdr.buf
+
+    PT_LOAD = 0x1
+    PF_X = 1
+    PF_W = 2
+    for phdr in elf.program_headers:
+        if phdr.type != PT_LOAD:
+            continue
+
+        if (phdr.flags & (PF_X | PF_W)) == PF_W:
+            logger.debug("go buildinfo: found data segment")
+            return phdr.buf
+
+    return None
+
+
+def read_data(elf: ELF, rva: int, size: int) -> Optional[bytes]:
+    # ELF segments are for runtime data,
+    # ELF sections are for link-time data.
+    # So we want to read Program Headers/Segments.
+    for phdr in elf.program_headers:
+        if phdr.vaddr <= rva < phdr.vaddr + phdr.memsz:
+            segment_data = phdr.buf
+
+            # pad the section with NULLs
+            # assume page alignment is already handled.
+            # might need more hardening here.
+            if len(segment_data) < phdr.memsz:
+                segment_data += b"\x00" * (phdr.memsz - len(segment_data))
+
+            segment_offset = rva - phdr.vaddr
+            return segment_data[segment_offset : segment_offset + size]
+
+    return None
+
+
+def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
+    if elf.bitness == 32:
+        struct_size = 8
+        struct_format = elf.endian + "II"
+    elif elf.bitness == 64:
+        struct_size = 16
+        struct_format = elf.endian + "QQ"
+    else:
+        raise ValueError("invalid psize")
+
+    struct_buf = read_data(elf, rva, struct_size)
+    if not struct_buf:
+        return None
+
+    addr, length = struct.unpack_from(struct_format, struct_buf, 0)
+
+    return read_data(elf, addr, length)
+
+
+def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
+    """
+    In a binary compiled by Go, the buildinfo structure may contain
+    metadata about the build environment, including the configured
+    GOOS, which specifies the target operating system.
+
+    Search for and parse the buildinfo structure,
+    which may be found in the .go.buildinfo section,
+    and often contains this metadata inline. Otherwise,
+    follow a few byte slices to the relevant information.
+
+    This strategy is derived from GoReSym.
+    """
+    buf = get_go_buildinfo_data(elf)
+    if not buf:
+        logger.debug("go buildinfo: no buildinfo section")
+        return None
+
+    assert isinstance(buf, bytes)
+
+    # The build info blob left by the linker is identified by
+    # a 16-byte header, consisting of:
+    #  - buildInfoMagic (14 bytes),
+    #  - the binary's pointer size (1 byte), and
+    #  - whether the binary is big endian (1 byte).
+    #
+    # Then:
+    #  - virtual address to Go string: runtime.buildVersion
+    #  - virtual address to Go string: runtime.modinfo
+    #
+    #  On 32-bit platforms, the last 8 bytes are unused.
+    #
+    #  If the endianness has the 2 bit set, then the pointers are zero,
+    #  and the 32-byte header is followed by varint-prefixed string data
+    #  for the two string values we care about.
+    # https://github.com/mandiant/GoReSym/blob/0860a1b1b4f3495e9fb7e71eb4386bf3e0a7c500/buildinfo/buildinfo.go#L185-L193
+    BUILDINFO_MAGIC = b"\xFF Go buildinf:"
+
+    try:
+        index = buf.index(BUILDINFO_MAGIC)
+    except ValueError:
+        logger.debug("go buildinfo: no buildinfo magic")
+        return None
+
+    psize, flags = struct.unpack_from("<bb", buf, index + len(BUILDINFO_MAGIC))
+    assert psize in (4, 8)
+    is_big_endian = flags & 0b01
+    has_inline_strings = flags & 0b10
+    logger.debug("go buildinfo: psize: %d big endian: %s inline: %s", psize, is_big_endian, has_inline_strings)
+
+    GOOS_TO_OS = {
+        b"aix": OS.AIX,
+        b"android": OS.ANDROID,
+        b"dragonfly": OS.DRAGONFLYBSD,
+        b"freebsd": OS.FREEBSD,
+        b"hurd": OS.HURD,
+        b"illumos": OS.ILLUMOS,
+        b"linux": OS.LINUX,
+        b"netbsd": OS.NETBSD,
+        b"openbsd": OS.OPENBSD,
+        b"solaris": OS.SOLARIS,
+        b"zos": OS.ZOS,
+        b"windows": None,  # PE format
+        b"plan9": None,  # a.out format
+        b"ios": None,  # Mach-O format
+        b"darwin": None,  # Mach-O format
+        b"nacl": None,  # dropped in GO 1.14
+        b"js": None,
+    }
+
+    if has_inline_strings:
+        # This is the common case/path. Most samples will have an inline GOOS string.
+        #
+        # To find samples on VT, use these VTGrep searches:
+        #
+        #   content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 02}
+        #   content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 02}
+
+        # If present, the GOOS key will be found within
+        # the current buildinfo data region.
+        #
+        # Brute force the k-v pair, like `GOOS=linux`,
+        # rather than try to parse the data, which would be fragile.
+        for key, os in GOOS_TO_OS.items():
+            if (b"GOOS=" + key) in buf:
+                logger.debug("go buildinfo: found os: %s", os)
+                return os
+    else:
+        # This is the uncommon path. Most samples will have an inline GOOS string.
+        #
+        # To find samples on VT, use the referenced VTGrep content searches.
+        info_format = {
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
+            # like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
+            # in which the modinfo doesn't have GOOS.
+            (4, False): "<II",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
+            # like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
+            # in which the modinfo doesn't have GOOS.
+            (8, False): "<QQ",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
+            # (no matches on VT today)
+            (4, True): ">II",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
+            # like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
+            # in which the modinfo doesn't have GOOS.
+            (8, True): ">QQ",
+        }
+
+        build_version_address, modinfo_address = struct.unpack_from(
+            info_format[(psize, is_big_endian)], buf, index + 0x10
+        )
+        logger.debug("go buildinfo: build version address: 0x%x", build_version_address)
+        logger.debug("go buildinfo: modinfo address: 0x%x", modinfo_address)
+
+        build_version = read_go_slice(elf, build_version_address)
+        if build_version:
+            logger.debug("go buildinfo: build version: %s", build_version.decode("utf-8"))
+
+        modinfo = read_go_slice(elf, modinfo_address)
+        if modinfo:
+            if modinfo[-0x11] == ord("\n"):
+                # Strip module framing: sentinel strings delimiting the module info.
+                # These are cmd/go/internal/modload/build.infoStart and infoEnd.
+                # Which should probably be:
+                # 	infoStart, _ = hex.DecodeString("3077af0c9274080241e1c107e6d618e6")
+                #   infoEnd, _   = hex.DecodeString("f932433186182072008242104116d8f2")
+                modinfo = modinfo[0x10:-0x10]
+            logger.debug("go buildinfo: modinfo: %s", modinfo.decode("utf-8"))
+
+        if not modinfo:
+            return None
+
+        for key, os in GOOS_TO_OS.items():
+            # Brute force the k-v pair, like `GOOS=linux`,
+            # rather than try to parse the data, which would be fragile.
+            if (b"GOOS=" + key) in modinfo:
+                logger.debug("go buildinfo: found os: %s", os)
+                return os
+
+    return None
+
+
+def guess_os_from_go_source(elf: ELF) -> Optional[OS]:
+    """
+    In a binary compiled by Go, runtime metadata may contain
+    references to the source filenames, including the
+    src/runtime/os_* files, whose name indicates the
+    target operating system.
+
+    Confirm the given ELF seems to be built by Go,
+    and then look for strings that look like
+    Go source filenames.
+
+    This strategy is derived from GoReSym.
+    """
+    if not is_go_binary(elf):
+        return None
+
+    for phdr in elf.program_headers:
+        buf = phdr.buf
+        NEEDLE_OS = b"/src/runtime/os_"
+        try:
+            index = buf.index(NEEDLE_OS)
+        except ValueError:
+            continue
+
+        rest = buf[index + len(NEEDLE_OS) : index + len(NEEDLE_OS) + 32]
+        filename = rest.partition(b".go")[0].decode("utf-8")
+        logger.debug("go source: filename: /src/runtime/os_%s.go", filename)
+
+        # via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
+        # candidates today:
+        #   - aix
+        #   - android
+        #   - darwin
+        #   - darwin_arm64
+        #   - dragonfly
+        #   - freebsd
+        #   - freebsd2
+        #   - freebsd_amd64
+        #   - freebsd_arm
+        #   - freebsd_arm64
+        #   - freebsd_noauxv
+        #   - freebsd_riscv64
+        #   - illumos
+        #   - js
+        #   - linux
+        #   - linux_arm
+        #   - linux_arm64
+        #   - linux_be64
+        #   - linux_generic
+        #   - linux_loong64
+        #   - linux_mips64x
+        #   - linux_mipsx
+        #   - linux_noauxv
+        #   - linux_novdso
+        #   - linux_ppc64x
+        #   - linux_riscv64
+        #   - linux_s390x
+        #   - linux_x86
+        #   - netbsd
+        #   - netbsd_386
+        #   - netbsd_amd64
+        #   - netbsd_arm
+        #   - netbsd_arm64
+        #   - nonopenbsd
+        #   - only_solaris
+        #   - openbsd
+        #   - openbsd_arm
+        #   - openbsd_arm64
+        #   - openbsd_libc
+        #   - openbsd_mips64
+        #   - openbsd_syscall
+        #   - openbsd_syscall1
+        #   - openbsd_syscall2
+        #   - plan9
+        #   - plan9_arm
+        #   - solaris
+        #   - unix
+        #   - unix_nonlinux
+        #   - wasip1
+        #   - wasm
+        #   - windows
+        #   - windows_arm
+        #   - windows_arm64
+
+        OS_FILENAME_TO_OS = {
+            "aix": OS.AIX,
+            "android": OS.ANDROID,
+            "dragonfly": OS.DRAGONFLYBSD,
+            "freebsd": OS.FREEBSD,
+            "freebsd2": OS.FREEBSD,
+            "freebsd_": OS.FREEBSD,
+            "illumos": OS.ILLUMOS,
+            "linux": OS.LINUX,
+            "netbsd": OS.NETBSD,
+            "only_solaris": OS.SOLARIS,
+            "openbsd": OS.OPENBSD,
+            "solaris": OS.SOLARIS,
+            "unix_nonlinux": OS.UNIX,
+        }
+
+        for prefix, os in OS_FILENAME_TO_OS.items():
+            if filename.startswith(prefix):
+                return os
+
+    for phdr in elf.program_headers:
+        buf = phdr.buf
+        NEEDLE_RT0 = b"/src/runtime/rt0_"
+        try:
+            index = buf.index(NEEDLE_RT0)
+        except ValueError:
+            continue
+
+        rest = buf[index + len(NEEDLE_RT0) : index + len(NEEDLE_RT0) + 32]
+        filename = rest.partition(b".s")[0].decode("utf-8")
+        logger.debug("go source: filename: /src/runtime/rt0_%s.s", filename)
+
+        # via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
+        # candidates today:
+        #   - aix_ppc64
+        #   - android_386
+        #   - android_amd64
+        #   - android_arm
+        #   - android_arm64
+        #   - darwin_amd64
+        #   - darwin_arm64
+        #   - dragonfly_amd64
+        #   - freebsd_386
+        #   - freebsd_amd64
+        #   - freebsd_arm
+        #   - freebsd_arm64
+        #   - freebsd_riscv64
+        #   - illumos_amd64
+        #   - ios_amd64
+        #   - ios_arm64
+        #   - js_wasm
+        #   - linux_386
+        #   - linux_amd64
+        #   - linux_arm
+        #   - linux_arm64
+        #   - linux_loong64
+        #   - linux_mips64x
+        #   - linux_mipsx
+        #   - linux_ppc64
+        #   - linux_ppc64le
+        #   - linux_riscv64
+        #   - linux_s390x
+        #   - netbsd_386
+        #   - netbsd_amd64
+        #   - netbsd_arm
+        #   - netbsd_arm64
+        #   - openbsd_386
+        #   - openbsd_amd64
+        #   - openbsd_arm
+        #   - openbsd_arm64
+        #   - openbsd_mips64
+        #   - openbsd_ppc64
+        #   - openbsd_riscv64
+        #   - plan9_386
+        #   - plan9_amd64
+        #   - plan9_arm
+        #   - solaris_amd64
+        #   - wasip1_wasm
+        #   - windows_386
+        #   - windows_amd64
+        #   - windows_arm
+        #   - windows_arm64
+
+        RT0_FILENAME_TO_OS = {
+            "aix": OS.AIX,
+            "android": OS.ANDROID,
+            "dragonfly": OS.DRAGONFLYBSD,
+            "freebsd": OS.FREEBSD,
+            "illumos": OS.ILLUMOS,
+            "linux": OS.LINUX,
+            "netbsd": OS.NETBSD,
+            "openbsd": OS.OPENBSD,
+            "solaris": OS.SOLARIS,
+        }
+
+        for prefix, os in RT0_FILENAME_TO_OS.items():
+            if filename.startswith(prefix):
+                return os
+
+    return None
+
+
+def guess_os_from_vdso_strings(elf: ELF) -> Optional[OS]:
+    """
+    The "vDSO" (virtual dynamic shared object) is a small shared
+    library that the kernel automatically maps into the address space
+    of all user-space applications.
+
+    Some statically linked executables include small dynamic linker
+    routines that finds these vDSO symbols, using the ASCII
+    symbol name and version. We can therefore recognize the pairs
+    (symbol, version) to guess the binary targets Linux.
+    """
+    for phdr in elf.program_headers:
+        buf = phdr.buf
+
+        # We don't really use the arch, but its interesting for documentation
+        # I suppose we could restrict the arch here to what's in the ELF header,
+        # but that's even more work. Let's see if this is sufficient.
+        for arch, symbol, version in (
+            # via: https://man7.org/linux/man-pages/man7/vdso.7.html
+            ("arm", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("arm", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("aarch64", b"__kernel_rt_sigreturn", b"LINUX_2.6.39"),
+            ("aarch64", b"__kernel_gettimeofday", b"LINUX_2.6.39"),
+            ("aarch64", b"__kernel_clock_gettime", b"LINUX_2.6.39"),
+            ("aarch64", b"__kernel_clock_getres", b"LINUX_2.6.39"),
+            ("mips", b"__kernel_gettimeofday", b"LINUX_2.6"),
+            ("mips", b"__kernel_clock_gettime", b"LINUX_2.6"),
+            ("ia64", b"__kernel_sigtramp", b"LINUX_2.5"),
+            ("ia64", b"__kernel_syscall_via_break", b"LINUX_2.5"),
+            ("ia64", b"__kernel_syscall_via_epc", b"LINUX_2.5"),
+            ("ppc/32", b"__kernel_clock_getres", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_clock_gettime64", b"LINUX_5.11"),
+            ("ppc/32", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_getcpu", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sigtramp_rt32", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sigtramp32", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
+            ("ppc/32", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_clock_getres", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_getcpu", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_sigtramp_rt64", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
+            ("ppc/64", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
+            ("riscv", b"__vdso_rt_sigreturn", b"LINUX_4.15"),
+            ("riscv", b"__vdso_gettimeofday", b"LINUX_4.15"),
+            ("riscv", b"__vdso_clock_gettime", b"LINUX_4.15"),
+            ("riscv", b"__vdso_clock_getres", b"LINUX_4.15"),
+            ("riscv", b"__vdso_getcpu", b"LINUX_4.15"),
+            ("riscv", b"__vdso_flush_icache", b"LINUX_4.15"),
+            ("s390", b"__kernel_clock_getres", b"LINUX_2.6.29"),
+            ("s390", b"__kernel_clock_gettime", b"LINUX_2.6.29"),
+            ("s390", b"__kernel_gettimeofday", b"LINUX_2.6.29"),
+            ("superh", b"__kernel_rt_sigreturn", b"LINUX_2.6"),
+            ("superh", b"__kernel_sigreturn", b"LINUX_2.6"),
+            ("superh", b"__kernel_vsyscall", b"LINUX_2.6"),
+            ("i386", b"__kernel_sigreturn", b"LINUX_2.5"),
+            ("i386", b"__kernel_rt_sigreturn", b"LINUX_2.5"),
+            ("i386", b"__kernel_vsyscall", b"LINUX_2.5"),
+            ("i386", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("i386", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("i386", b"__vdso_time", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_getcpu", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("x86-64", b"__vdso_time", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_clock_gettime", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_getcpu", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_gettimeofday", b"LINUX_2.6"),
+            ("x86/32", b"__vdso_time", b"LINUX_2.6"),
+        ):
+            if symbol in buf and version in buf:
+                logger.debug("vdso string: %s %s %s", arch, symbol.decode("ascii"), version.decode("ascii"))
+                return OS.LINUX
+
+    return None
+
+
 def detect_elf_os(f) -> str:
    """
    f: type Union[BinaryIO, IDAIO, GHIDRAIO]
@@ -1023,6 +1523,27 @@ def detect_elf_os(f) -> str:
        logger.warning("Error guessing OS from symbol table: %s", e)
        symtab_guess = None

+    try:
+        goos_guess = guess_os_from_go_buildinfo(elf)
+        logger.debug("guess: Go buildinfo: %s", goos_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from Go buildinfo: %s", e)
+        goos_guess = None
+
+    try:
+        gosrc_guess = guess_os_from_go_source(elf)
+        logger.debug("guess: Go source: %s", gosrc_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from Go source path: %s", e)
+        gosrc_guess = None
+
+    try:
+        vdso_guess = guess_os_from_vdso_strings(elf)
+        logger.debug("guess: vdso strings: %s", vdso_guess)
+    except Exception as e:
+        logger.warning("Error guessing OS from vdso strings: %s", e)
+        symtab_guess = None
+
    ret = None

    if osabi_guess:
@@ -1046,11 +1567,24 @@ def detect_elf_os(f) -> str:
    elif symtab_guess:
        ret = symtab_guess

+    elif goos_guess:
+        ret = goos_guess
+
+    elif gosrc_guess:
+        # prefer goos_guess to this method,
+        # which is just string interpretation.
+        ret = gosrc_guess
+
    elif ident_guess:
        # at the bottom because we don't trust this too much
        # due to potential for bugs with cross-compilation.
        ret = ident_guess

+    elif vdso_guess:
+        # at the bottom because this is just scanning strings,
+        # which isn't very authoritative.
+        ret = vdso_guess
+
    return ret.value if ret is not None else "unknown"


--- a/capa/loader.py
+++ b/capa/loader.py
@@ -8,6 +8,7 @@
 import sys
 import logging
 import datetime
+import contextlib
 from typing import Set, Dict, List, Optional
 from pathlib import Path

@@ -154,6 +155,18 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):

    viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])

+    with contextlib.suppress(Exception):
+        # unfortuately viv raises a raw Exception (not any subclass).
+        # This happens when the module isn't found, such as with a viv upgrade.
+        #
+        # Remove the symbolic switch case solver.
+        # This is only enabled for ELF files, not PE files.
+        # During the following performance investigation, this analysis module
+        # had some terrible worst-case behavior.
+        # We can put up with slightly worse CFG reconstruction in order to avoid this.
+        # https://github.com/mandiant/capa/issues/1989#issuecomment-1948022767
+        vw.delFuncAnalysisModule("vivisect.analysis.generic.symswitchcase")
+
    vw.analyze()

    logger.debug("%s", get_meta_str(vw))
--- a/capa/version.py
+++ b/capa/version.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-__version__ = "7.0.1"
+__version__ = "7.1.0"


 def get_major_version():
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -91,6 +91,12 @@ For more details about creating and using virtual environments, check out the [v

 ##### Install development dependencies

+When developing capa, please use the pinned dependencies found in `requirements.txt`.
+This ensures that everyone has the exact same, reproducible environment.
+Please install these dependencies before install capa (from source or from PyPI):
+
+`$ pip install -r requirements.txt`
+
 We use the following tools to ensure consistent code style and formatting:
  - [black](https://github.com/psf/black) code formatter
  - [isort](https://pypi.org/project/isort/) code formatter
@@ -101,7 +107,8 @@ We use the following tools to ensure consistent code style and formatting:

 To install these development dependencies, run:

-`$ pip install -e /local/path/to/src[dev]`
+- `$ pip install -e /local/path/to/src[dev]` or
+- `$ pip install -e /local/path/to/src[dev,scripts]` to also install all script dependencies

 We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same linters & configuration locally as in CI.

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,25 +32,76 @@ classifiers = [
    "Topic :: Security",
 ]
 dependencies = [
-    "tqdm==4.66.4",
-    "pyyaml==6.0.1",
-    "tabulate==0.9.0",
-    "colorama==0.4.6",
-    "termcolor==2.4.0",
-    "wcwidth==0.2.13",
-    "ida-settings==2.1.0",
-    "viv-utils[flirt]==0.7.9",
-    "networkx==3.1",
-    "ruamel.yaml==0.18.6",
-    "vivisect==1.1.1",
-    "pefile==2023.2.7",
-    "pyelftools==0.31",
-    "dnfile==0.14.1",
-    "dncil==1.0.2",
-    "pydantic==2.7.1",
-    "rich==13.7.1",
-    "humanize==4.9.0",
-    "protobuf==5.27.0",
+    # ---------------------------------------
+    # As a library, capa uses lower version bounds
+    # when specifying its dependencies. This lets
+    # other programs that use capa (and other libraries)
+    # to find a compatible set of dependency versions.
+    #
+    # We can optionally pin to specific versions or
+    # limit the upper bound when there's a good reason;
+    # but the default is to assume all greater versions
+    # probably work with capa until proven otherwise.
+    #
+    # The following link provides good background:
+    # https://iscinumpy.dev/post/bound-version-constraints/
+    #
+    # When we develop capa, and when we distribute it as
+    # a standalone binary, we'll use specific versions
+    # that are pinned in requirements.txt.
+    # But the requirements for a library are specified here
+    # and are looser.
+    #
+    # Related discussions:
+    # 
+    #   - https://github.com/mandiant/capa/issues/2053
+    #   - https://github.com/mandiant/capa/pull/2059
+    #   - https://github.com/mandiant/capa/pull/2079
+    #
+    # ---------------------------------------
+    # The following dependency versions were imported
+    # during June 2024 by truncating specific versions to
+    # their major-most version (major version when possible, 
+    # or minor otherwise).
+    # As specific constraints are identified, please provide
+    # comments and context.
+    "tqdm>=4",
+    "pyyaml>=6",
+    "tabulate>=0.9",
+    "colorama>=0.4",
+    "termcolor>=2",
+    "wcwidth>=0.2",
+    "ida-settings>=2",
+    "ruamel.yaml>=0.18",
+    "pefile>=2023.2.7",
+    "pyelftools>=0.31",
+    "pydantic>=2",
+    "rich>=13",
+    "humanize>=4",
+    "protobuf>=5",
+
+    # ---------------------------------------
+    # Dependencies that we develop
+    #
+    # These dependencies are often actively influenced by capa,
+    # so we provide a minimum patch version that includes the
+    # latest bug fixes we need here.
+    "viv-utils[flirt]>=0.7.9",
+    "vivisect>=1.1.1",
+    "dncil>=1.0.2",
+
+    # ---------------------------------------
+    # Dependencies with version caps
+    #
+    # These dependencies must not exceed the version cap,
+    # typically due to dropping support for python releases
+    # we still support.
+
+    # TODO(williballenthin): networkx 3.2 doesn't support python 3.8 while capa does.
+    # https://github.com/mandiant/capa/issues/1966
+    "networkx>=3,<3.2",
+
+    "dnfile>=0.15.0",
 ]
 dynamic = ["version"]

@@ -63,6 +114,10 @@ namespaces = false

 [project.optional-dependencies]
 dev = [
+    # Dev and build dependencies are not relaxed because
+    # we want all developer environments to be consistent.
+    # These dependencies are not used in production environments
+    # and should not conflict with other libraries/tooling.
    "pre-commit==3.5.0",
    "pytest==8.0.0",
    "pytest-sugar==1.0.0",
@@ -79,14 +134,12 @@ dev = [
    "flake8-simplify==0.21.0",
    "flake8-use-pathlib==0.3.0",
    "flake8-copyright==0.2.4",
-    "ruff==0.4.7",
+    "ruff==0.4.8",
    "black==24.4.2",
    "isort==5.13.2",
    "mypy==1.10.0",
-    "psutil==5.9.2",
-    "stix2==3.0.1",
-    "requests==2.31.0",
    "mypy-protobuf==3.6.0",
+    "PyGithub==2.3.0",
    # type stubs for mypy
    "types-backports==0.1.3",
    "types-colorama==0.4.15.11",
@@ -99,10 +152,21 @@ dev = [
    "deptry==0.16.1"
 ]
 build = [
-    "pyinstaller==6.7.0",
-    "setuptools==69.5.1",
+    # Dev and build dependencies are not relaxed because
+    # we want all developer environments to be consistent.
+    # These dependencies are not used in production environments
+    # and should not conflict with other libraries/tooling.
+    "pyinstaller==6.8.0",
+    "setuptools==70.0.0",
    "build==1.2.1"
 ]
+scripts = [
+    "jschema_to_python==1.2.3",
+    "psutil==5.9.2",
+    "stix2==3.0.1",
+    "sarif_om==1.0.4",
+    "requests==2.31.0",
+]

 [tool.deptry]
 extend_exclude = [
@@ -152,6 +216,7 @@ DEP002 = [
    "mypy",
    "mypy-protobuf",
    "pre-commit",
+    "PyGithub",
    "pyinstaller",
    "pytest",
    "pytest-cov",
@@ -175,6 +240,9 @@ DEP003 = [
    "typing_extensions" # TODO(s-ff): remove when Python 3.9 is deprecated, see #1699
 ]

+[tool.deptry.package_module_name_map]
+PyGithub = "github"
+
 [project.urls]
 Homepage = "https://github.com/mandiant/capa"
 Repository = "https://github.com/mandiant/capa.git"
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,46 @@
+# Dependencies with specific version constraints
+# used during development and building the standalone executables.
+# For these environments, use `pip install -r requirements.txt`
+# before installing capa from source/pypi. This will ensure
+# the following specific versions are used.
+#
+# Initially generated via: pip freeze | grep -v -- "-e"
+# Kept up to date by dependabot.
+annotated-types==0.7.0
+colorama==0.4.6
+cxxfilt==0.2.2
+dncil==1.0.2
+dnfile==0.15.0
+funcy==2.0
+humanize==4.9.0
+ida-netnode==3.0
+ida-settings==2.1.0
+intervaltree==3.1.0
+markdown-it-py==3.0.0
+mdurl==0.1.2
+msgpack==1.0.8
+networkx==3.1
+pefile==2023.2.7
+pip==24.0
+protobuf==5.27.1
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.22
+pydantic==2.7.3
+pydantic-core==2.18.4
+pyelftools==0.31
+pygments==2.18.0
+python-flirt==0.8.10
+pyyaml==6.0.1
+rich==13.7.1
+ruamel-yaml==0.18.6
+ruamel-yaml-clib==0.2.8
+setuptools==70.0.0
+six==1.16.0
+sortedcontainers==2.4.0
+tabulate==0.9.0
+termcolor==2.4.0
+tqdm==4.66.4
+viv-utils==0.7.11
+vivisect==1.1.1
+wcwidth==0.2.13
--- a/2
+++ b/2
--- a/scripts/capa2sarif.py
+++ b/scripts/capa2sarif.py
@@ -0,0 +1,358 @@
+# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+"""
+Convert capa json output to sarif schema
+    usage: capa2sarif.py [-h] [-g] [-r] [-t TAG] [--version] capa_output
+
+Capa to SARIF analysis file
+positional arguments:
+    capa_output         Path to capa JSON output file
+optional arguments:
+  -h, --help            show this help message and exit
+  --version             show program's version number and exit
+  -t TAG, --tag TAG     filter on rule meta field values (ruleid)
+
+Requires:
+    - sarif_om 1.0.4
+    - jschema_to_python 1.2.3
+"""
+import sys
+import json
+import logging
+import argparse
+from typing import List, Optional
+from pathlib import Path
+
+from capa.version import __version__
+
+logger = logging.getLogger("capa2sarif")
+
+# Dependencies
+try:
+    from sarif_om import Run, Tool, SarifLog, ToolComponent
+except ImportError as e:
+    logger.error(
+        "Required import `sarif_om` is not installed. This is solved by installing `python3 -m pip install sarif_om>=1.0.4`. %s",
+        e,
+    )
+    exit(-4)
+
+try:
+    from jschema_to_python.to_json import to_json
+except ImportError as e:
+    logger.error(
+        "Required import `jschema_to_python` is not installed. This is solved by installing `python3 -m pip install jschema_to_python>=1.2.3`, %s",
+        e,
+    )
+    exit(-4)
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Capa to SARIF analysis file")
+
+    # Positional argument
+    parser.add_argument("capa_output", help="Path to capa JSON output file")
+
+    # Optional arguments
+    parser.add_argument(
+        "-g",
+        "--ghidra-compat",
+        action="store_true",
+        help="Compatibility for Ghidra 11.0.X",
+    )
+    parser.add_argument(
+        "-r",
+        "--radare-compat",
+        action="store_true",
+        help="Compatibility for Radare r2sarif plugin v2.0",
+    )
+    parser.add_argument("-t", "--tag", help="Filter on rule meta field values (ruleid)")
+    parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
+
+    return parser.parse_args()
+
+
+def main() -> int:
+    logging.basicConfig(level=logging.INFO)
+    logging.getLogger().setLevel(logging.INFO)
+
+    args = _parse_args()
+
+    try:
+        with Path(args.capa_output).open() as capa_output:
+            json_data = json.load(capa_output)
+    except ValueError:
+        logger.error("Input data was not valid JSON, input should be a capa json output file.")
+        return -1
+    except json.JSONDecodeError:
+        # An exception has occured
+        logger.error("Input data was not valid JSON, input should be a capa json output file.")
+        return -2
+
+    # Marshall json into Sarif
+    # Create baseline sarif structure to be populated from json data
+    sarif_structure: Optional[dict] = _sarif_boilerplate(json_data["meta"], json_data["rules"])
+    if sarif_structure is None:
+        logger.error("An Error has occured creating default sarif structure.")
+        return -3
+
+    _populate_artifact(sarif_structure, json_data["meta"])
+    _populate_invocations(sarif_structure, json_data["meta"])
+    _populate_results(sarif_structure, json_data["rules"], args.ghidra_compat)
+
+    if args.ghidra_compat:
+        # Ghidra can't handle this structure as of 11.0.x
+        if "invocations" in sarif_structure["runs"][0]:
+            del sarif_structure["runs"][0]["invocations"]
+
+        # artifacts must include a description as well with a text field.
+        if "artifacts" in sarif_structure["runs"][0]:
+            sarif_structure["runs"][0]["artifacts"][0]["description"] = {"text": "placeholder"}
+
+        # For better compliance with Ghidra table. Iteraction through properties['additionalProperties']
+        """
+        "additionalProperties": {
+            "to": "<hex addr>",
+            "offset": 0,
+            "primary": true,
+            "index": <>"",
+            "kind": "<kind column value>",
+            "opIndex": 0,
+            "sourceType": ""
+        }
+    """
+
+    if args.radare_compat:
+        # Add just enough for passing tests
+        _add_filler_optional(json_data, sarif_structure)
+
+    print(json.dumps(sarif_structure, indent=4))  # noqa: T201
+    return 0
+
+
+def _sarif_boilerplate(data_meta: dict, data_rules: dict) -> Optional[dict]:
+    # Only track rules that appear in this log, not full 1k
+    rules = []
+    # Parse rules from parsed sarif structure
+    for key in data_rules:
+        # Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
+        #   this is not good practice to use long name for ruleID
+        attack_length = len(data_rules[key]["meta"]["attack"])
+        mbc_length = len(data_rules[key]["meta"]["mbc"])
+        if attack_length or mbc_length:
+            id = (
+                data_rules[key]["meta"]["attack"][0]["id"]
+                if attack_length > 0
+                else data_rules[key]["meta"]["mbc"][0]["id"]
+            )
+        else:
+            id = data_rules[key]["meta"]["name"]
+
+        # Append current rule
+        rules.append(
+            {
+                # Default to attack identifier, fall back to MBC, mainly relevant if both are present
+                "id": id,
+                "name": data_rules[key]["meta"]["name"],
+                "shortDescription": {"text": data_rules[key]["meta"]["name"]},
+                "messageStrings": {"default": {"text": data_rules[key]["meta"]["name"]}},
+                "properties": {
+                    "namespace": data_rules[key]["meta"]["namespace"] if "namespace" in data_rules[key]["meta"] else [],
+                    "scopes": data_rules[key]["meta"]["scopes"],
+                    "references": data_rules[key]["meta"]["references"],
+                    "lib": data_rules[key]["meta"]["lib"],
+                },
+            }
+        )
+
+    tool = Tool(
+        driver=ToolComponent(
+            name="Capa",
+            version=__version__,
+            information_uri="https://github.com/mandiant/capa",
+            rules=rules,
+        )
+    )
+
+    # Create a SARIF Log object, populate with a single run
+    sarif_log = SarifLog(
+        version="2.1.0",
+        schema_uri="https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json",
+        runs=[Run(tool=tool, results=[], artifacts=[], invocations=[])],
+    )
+
+    # Convert the SARIF log to a dictionary and then to a JSON string
+    try:
+        sarif_outline = json.loads(to_json(sarif_log))
+    except json.JSONDecodeError:
+        # An exception has occured
+        return None
+
+    return sarif_outline
+
+
+def _populate_artifact(sarif_log: dict, meta_data: dict) -> None:
+    """
+    @param sarif_log: dict - sarif data structure including runs
+    @param meta_data: dict - Capa meta output
+    @returns None, updates sarif_log via side-effects
+    """
+    sample = meta_data["sample"]
+    artifact = {
+        "location": {"uri": sample["path"]},
+        "roles": ["analysisTarget"],
+        "hashes": {
+            "md5": sample["md5"],
+            "sha-1": sample["sha1"],
+            "sha-256": sample["sha256"],
+        },
+    }
+    sarif_log["runs"][0]["artifacts"].append(artifact)
+
+
+def _populate_invocations(sarif_log: dict, meta_data: dict) -> None:
+    """
+    @param sarif_log: dict - sarif data structure including runs
+    @param meta_data: dict - Capa meta output
+    @returns None, updates sarif_log via side-effects
+    """
+    analysis_time = meta_data["timestamp"]
+    argv = meta_data["argv"]
+    analysis = meta_data["analysis"]
+    invoke = {
+        "commandLine": "capa " + " ".join(argv),
+        "arguments": argv if len(argv) > 0 else [],
+        # Format in Zulu time, this may require a conversion from local timezone
+        "endTimeUtc": f"{analysis_time}Z",
+        "executionSuccessful": True,
+        "properties": {
+            "format": analysis["format"],
+            "arch": analysis["arch"],
+            "os": analysis["os"],
+            "extractor": analysis["extractor"],
+            "rule_location": analysis["rules"],
+            "base_address": analysis["base_address"],
+        },
+    }
+    sarif_log["runs"][0]["invocations"].append(invoke)
+
+
+def _enumerate_evidence(node: dict, related_count: int) -> List[dict]:
+    related_locations = []
+    if node.get("success") and node.get("node", {}).get("type") != "statement":
+        label = ""
+        if node.get("node", {}).get("type") == "feature":
+            if node.get("node", {}).get("feature", {}).get("type") == "api":
+                label = "api: " + node.get("node", {}).get("feature", {}).get("api")
+            elif node.get("node", {}).get("feature", {}).get("type") == "match":
+                label = "match: " + node.get("node", {}).get("feature", {}).get("match")
+            elif node.get("node", {}).get("feature", {}).get("type") == "number":
+                label = f"number: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('number')})"
+            elif node.get("node", {}).get("feature", {}).get("type") == "offset":
+                label = f"offset: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('offset')})"
+            elif node.get("node", {}).get("feature", {}).get("type") == "mnemonic":
+                label = f"mnemonic: {node.get('node', {}).get('feature', {}).get('mnemonic')}"
+            elif node.get("node", {}).get("feature", {}).get("type") == "characteristic":
+                label = f"characteristic: {node.get('node', {}).get('feature', {}).get('characteristic')}"
+            elif node.get("node", {}).get("feature", {}).get("type") == "os":
+                label = f"os: {node.get('node', {}).get('feature', {}).get('os')}"
+            elif node.get("node", {}).get("feature", {}).get("type") == "operand number":
+                label = f"operand: ({node.get('node', {}).get('feature', {}).get('index')} ) {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('operand_number')})"
+            else:
+                logger.error(
+                    "Not implemented %s",
+                    node.get("node", {}).get("feature", {}).get("type"),
+                )
+                return []
+        else:
+            logger.error("Not implemented %s", node.get("node", {}).get("type"))
+            return []
+
+        for loc in node.get("locations", []):
+            if loc["type"] != "absolute":
+                continue
+
+            related_locations.append(
+                {
+                    "id": related_count,
+                    "message": {"text": label},
+                    "physicalLocation": {"address": {"absoluteAddress": loc["value"]}},
+                }
+            )
+            related_count += 1
+
+    if node.get("success") and node.get("node", {}).get("type") == "statement":
+        for child in node.get("children", []):
+            related_locations += _enumerate_evidence(child, related_count)
+
+    return related_locations
+
+
+def _populate_results(sarif_log: dict, data_rules: dict, ghidra_compat: bool) -> None:
+    """
+    @param sarif_log: dict - sarif data structure including runs
+    @param meta_data: dict - Capa meta output
+    @returns None, updates sarif_log via side-effects
+    """
+    results = sarif_log["runs"][0]["results"]
+
+    # Parse rules from parsed sarif structure
+    for key in data_rules:
+        # Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
+        #   this is not good practice to use long name for ruleID.
+        attack_length = len(data_rules[key]["meta"]["attack"])
+        mbc_length = len(data_rules[key]["meta"]["mbc"])
+        if attack_length or mbc_length:
+            id = (
+                data_rules[key]["meta"]["attack"][0]["id"]
+                if attack_length > 0
+                else data_rules[key]["meta"]["mbc"][0]["id"]
+            )
+        else:
+            id = data_rules[key]["meta"]["name"]
+
+        for address, details in data_rules[key]["matches"]:
+            related_cnt = 0
+            related_locations = _enumerate_evidence(details, related_cnt)
+
+            res = {
+                "ruleId": id,
+                "level": "none" if not ghidra_compat else "NONE",
+                "message": {"text": data_rules[key]["meta"]["name"]},
+                "kind": "informational" if not ghidra_compat else "INFORMATIONAL",
+                "locations": [
+                    {
+                        "physicalLocation": {
+                            "address": {
+                                "absoluteAddress": address["value"],
+                            }
+                        },
+                    }
+                ],
+            }
+            if not ghidra_compat:
+                res["relatedLocations"] = related_locations
+
+            results.append(res)
+
+
+def _add_filler_optional(capa_result: dict, sarif_log: dict) -> None:
+    """Update sarif file with just enough fields to pass radare tests"""
+    base_address = capa_result["meta"]["analysis"]["base_address"]["value"]
+    # Assume there is only one run, and one binary artifact
+    artifact = sarif_log["runs"][0]["artifacts"][0]
+    if "properties" not in artifact:
+        artifact["properties"] = {}
+    if "additionalProperties" not in artifact["properties"]:
+        artifact["properties"]["additionalProperties"] = {}
+    if "imageBase" not in artifact["properties"]["additionalProperties"]:
+        artifact["properties"]["additionalProperties"]["imageBase"] = base_address
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -389,6 +389,8 @@ def get_data_path_by_name(name) -> Path:
        return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
    elif name.startswith("1038a2"):
        return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
+    elif name.startswith("3da7c"):
+        return CD / "data" / "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_"
    elif name.startswith("nested_typedef"):
        return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_"
    elif name.startswith("nested_typeref"):
--- a/tests/test_os_detection.py
+++ b/tests/test_os_detection.py
@@ -92,6 +92,12 @@ def test_elf_android_notes():
        assert capa.features.extractors.elf.detect_elf_os(f) == "android"


+def test_elf_go_buildinfo():
+    path = get_data_path_by_name("3da7c")
+    with Path(path).open("rb") as f:
+        assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
+
+
 def test_elf_parse_capa_pyinstaller_header():
    # error after misidentified large pydata section with address 0; fixed in #1454
    # compressed ELF header of capa-v5.1.0-linux
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -40,6 +40,10 @@ def get_rule_path():
    [
        pytest.param("capa2yara.py", [get_rules_path()]),
        pytest.param("capafmt.py", [get_rule_path()]),
+        pytest.param(
+            "capa2sarif.py",
+            [Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"],
+        ),
        # testing some variations of linter script
        pytest.param("lint.py", ["-t", "create directory", get_rules_path()]),
        # `create directory` rule has native and .NET example PEs