fix lints

features: add BinExport2 declarations
2026-03-14 22:17:13 -07:00 · 2024-06-10 14:49:03 -06:00 · 2024-06-10 14:48:36 -06:00
27 changed files with 168 additions and 1333 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -32,7 +32,7 @@ jobs:
            artifact_name: capa.exe
            asset_name: windows
            python_version: 3.8
-          - os: macos-12
+          - os: macos-11
            # use older macOS for assumed better portability
            artifact_name: capa
            asset_name: macos
@@ -51,9 +51,7 @@ jobs:
      - name: Upgrade pip, setuptools
        run: python -m pip install --upgrade pip setuptools
      - name: Install capa with build requirements
-        run: |
-          pip install -r requirements.txt
-          pip install -e .[build]
+        run: pip install -e .[build]
      - name: Build standalone executable
        run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec
      - name: Does it run (PE)?
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -25,7 +25,6 @@ jobs:
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install -r requirements.txt
          pip install -e .[build]
      - name: build package
        run: |
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -35,9 +35,7 @@ jobs:
      with:
        python-version: "3.11"
    - name: Install dependencies
-      run: |
-        pip install -r requirements.txt
-        pip install -e .[dev,scripts]
+      run: pip install -e .[dev]
    - name: Lint with ruff
      run: pre-commit run ruff
    - name: Lint with isort
@@ -63,9 +61,7 @@ jobs:
      with:
        python-version: "3.11"
    - name: Install capa
-      run: |
-        pip install -r requirements.txt
-        pip install -e .[dev,scripts]
+      run: pip install -e .[dev]
    - name: Run rule linter
      run: python scripts/lint.py rules/

@@ -76,7 +72,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-20.04, windows-2019, macos-12]
+        os: [ubuntu-20.04, windows-2019, macos-11]
        # across all operating systems
        python-version: ["3.8", "3.11"]
        include:
@@ -100,9 +96,7 @@ jobs:
      if: matrix.os == 'ubuntu-20.04'
      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
-      run: |
-        pip install -r requirements.txt
-        pip install -e .[dev,scripts]
+      run: pip install -e .[dev]
    - name: Run tests (fast)
      # this set of tests runs about 80% of the cases in 20% of the time,
      # and should catch most errors quickly.
@@ -137,9 +131,7 @@ jobs:
      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
      if: ${{ env.BN_SERIAL != 0 }}
-      run: |
-        pip install -r requirements.txt
-        pip install -e .[dev,scripts]
+      run: pip install -e .[dev]
    - name: install Binary Ninja
      if: ${{ env.BN_SERIAL != 0 }}
      run: |
@@ -196,9 +188,7 @@ jobs:
    - name: Install pyyaml
      run: sudo apt-get install -y libyaml-dev
    - name: Install capa
-      run: |
-        pip install -r requirements.txt
-        pip install -e .[dev,scripts]
+      run: pip install -e .[dev] 
    - name: Run tests
      run: | 
        mkdir ./.github/ghidra/project
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -110,16 +110,6 @@ repos:
        always_run: true
        pass_filenames: false

-   repo: local
-    hooks:
-    -   id: deptry
-        name: deptry
-        stages: [push, manual]
-        language: system
-        entry: deptry .
-        always_run: true
-        pass_filenames: false
-
 -   repo: local
    hooks:
    -   id: pytest-fast
@@ -138,3 +128,12 @@ repos:
        always_run: true
        pass_filenames: false

+-   repo: local
+    hooks:
+    -   id: deptry
+        name: deptry
+        stages: [push, manual]
+        language: system
+        entry: deptry .
+        always_run: true
+        pass_filenames: false
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,58 +1,20 @@
 # Change Log

 ## master (unreleased)
-
-### New Features
-
-### Breaking Changes
-
-### New Rules (0)
-
-
-
-### Bug Fixes
-
- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
-
-### capa explorer IDA Pro plugin
-
-### Development
- CI: use macos-12 since macos-11 is deprecated and will be removed on June 28th, 2024 #2173 @mr-tz
-
-### Raw diffs
- [capa v7.1.0...master](https://github.com/mandiant/capa/compare/v7.1.0...master)
- [capa-rules v7.1.0...master](https://github.com/mandiant/capa-rules/compare/v7.1.0...master)
-
-## v7.1.0
-The v7.1.0 release brings large performance improvements to capa's rule matching engine.
-Additionally, we've fixed various bugs and added new features for people using and developing capa.
-
-Special thanks to our repeat and new contributors:
-* @sjha2048 made their first contribution in https://github.com/mandiant/capa/pull/2000
-* @Rohit1123 made their first contribution in https://github.com/mandiant/capa/pull/1990
-* @psahithireddy made their first contribution in https://github.com/mandiant/capa/pull/2020
-* @Atlas-64 made their first contribution in https://github.com/mandiant/capa/pull/2018
-* @s-ff made their first contribution in https://github.com/mandiant/capa/pull/2011
-* @samadpls made their first contribution in https://github.com/mandiant/capa/pull/2024
-* @acelynnzhang made their first contribution in https://github.com/mandiant/capa/pull/2044
-* @RainRat made their first contribution in https://github.com/mandiant/capa/pull/2058
-* @ReversingWithMe made their first contribution in https://github.com/mandiant/capa/pull/2093
-* @malwarefrank made their first contribution in https://github.com/mandiant/capa/pull/2037
-
-### New Features
 - Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls
- ELF: detect OS from statically-linked Go binaries #1978 @williballenthin
+
+### New Features
+
 - add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
 - document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
- Add json to sarif conversion script @reversingwithme
 - render maec/* fields #843 @s-ff
 - replace Halo spinner with Rich #2086 @s-ff
 - optimize rule matching #2080 @williballenthin
- add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin
- relax dependency version requirements for the capa library #2053 @williballenthin
- add scripts dependency group and update documentation #2145 @mr-tz

-### New Rules (25)
+### Breaking Changes
+
+
+### New Rules (17)

 - impact/wipe-disk/delete-drive-layout-via-ioctl william.ballenthin@mandiant.com
 - host-interaction/driver/interact-with-driver-via-ioctl moritz.raabe@mandiant.com
@@ -71,14 +33,7 @@ Special thanks to our repeat and new contributors:
 - persistence/act-as-time-provider-dll jakub.jozwiak@mandiant.com
 - host-interaction/gui/window/hide/hide-graphical-window-from-taskbar jakub.jozwiak@mandiant.com
 - compiler/dart/compiled-with-dart jakub.jozwiak@mandiant.com
- nursery/bypass-hidden-api-restrictions-via-jni-on-android mehunhoff@google.com
- nursery/get-current-process-filesystem-mounts-on-linux mehunhoff@google.com
- nursery/get-current-process-memory-mapping-on-linux mehunhoff@google.com
- nursery/get-system-property-on-android mehunhoff@google.com
- nursery/hook-routines-via-lsplant mehunhoff@google.com
- nursery/load-packed-dex-via-jiagu-on-android mehunhoff@google.com
- nursery/modify-api-blacklist-or-denylist-via-jni-on-android mehunhoff@google.com
- nursery/truncate-file-on-linux mehunhoff@google.com
+-

 ### Bug Fixes

@@ -87,6 +42,7 @@ Special thanks to our repeat and new contributors:
 - binja: add support for forwarded export #1646 @xusheng6
 - cape: support more report formats #2035 @mr-tz

+
 ### capa explorer IDA Pro plugin
 - replace deprecated IDA API find_binary with bin_search #1606 @s-ff

@@ -101,8 +57,8 @@ Special thanks to our repeat and new contributors:
 - add deptry support #1497 @s-ff

 ### Raw diffs
- [capa v7.0.1...v7.1.0](https://github.com/mandiant/capa/compare/v7.0.1...v7.1.0)
- [capa-rules v7.0.1...v7.1.0](https://github.com/mandiant/capa-rules/compare/v7.0.1...v7.1.0)
+- [capa v7.0.1...master](https://github.com/mandiant/capa/compare/v7.0.1...master)
+- [capa-rules v7.0.1...master](https://github.com/mandiant/capa-rules/compare/v7.0.1...master)

 ## v7.0.1

--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -412,7 +412,7 @@ ARCH_AMD64 = "amd64"
 ARCH_AARCH64 = "aarch64"
 # dotnet
 ARCH_ANY = "any"
-VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_AARCH64, ARCH_ANY)
+VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY, ARCH_AARCH64)


 class Arch(Feature):
@@ -460,6 +460,7 @@ VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
 FORMAT_AUTO = "auto"
 FORMAT_SC32 = "sc32"
 FORMAT_SC64 = "sc64"
+FORMAT_BINEXPORT2 = "binexport2"
 FORMAT_CAPE = "cape"
 FORMAT_FREEZE = "freeze"
 FORMAT_RESULT = "result"
@@ -471,6 +472,7 @@ STATIC_FORMATS = {
    FORMAT_DOTNET,
    FORMAT_FREEZE,
    FORMAT_RESULT,
+    FORMAT_BINEXPORT2,
 }
 DYNAMIC_FORMATS = {
    FORMAT_CAPE,
--- a/capa/features/extractors/binja/extractor.py
+++ b/capa/features/extractors/binja/extractor.py
@@ -28,7 +28,7 @@ from capa.features.extractors.base_extractor import (

 class BinjaFeatureExtractor(StaticFeatureExtractor):
    def __init__(self, bv: binja.BinaryView):
-        super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
+        super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, len(bv.file.raw))))
        self.bv = bv
        self.global_features: List[Tuple[Feature, Address]] = []
        self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
--- a/capa/features/extractors/cape/global_.py
+++ b/capa/features/extractors/cape/global_.py
@@ -48,7 +48,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
    else:
        logger.warning("unknown file format, file command output: %s", report.target.file.type)
        raise ValueError(
-            f"unrecognized file format from the CAPE report; output of file command: {report.target.file.type}"
+            "unrecognized file format from the CAPE report; output of file command: {report.target.file.type}"
        )


@@ -73,7 +73,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
        else:
            # if the operating system information is missing from the cape report, it's likely a bug
            logger.warning("unrecognized OS: %s", file_output)
-            raise ValueError(f"unrecognized OS from the CAPE report; output of file command: {file_output}")
+            raise ValueError("unrecognized OS from the CAPE report; output of file command: {file_output}")
    else:
        # the sample is shellcode
        logger.debug("unsupported file format, file command output: %s", file_output)
--- a/capa/features/extractors/dnfile/helpers.py
+++ b/capa/features/extractors/dnfile/helpers.py
@@ -83,7 +83,7 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str
        return None

    try:
-        user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get(token.rid)
+        user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
    except UnicodeDecodeError as e:
        logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
        return None
@@ -119,14 +119,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
        access: Optional[str]

        # assume .NET imports starting with get_/set_ are used to access a property
-        member_ref_name: str = str(member_ref.Name)
-        if member_ref_name.startswith("get_"):
+        if member_ref.Name.startswith("get_"):
            access = FeatureAccess.READ
-        elif member_ref_name.startswith("set_"):
+        elif member_ref.Name.startswith("set_"):
            access = FeatureAccess.WRITE
        else:
            access = None

+        member_ref_name: str = member_ref.Name
        if member_ref_name.startswith(("get_", "set_")):
            # remove get_/set_ from MemberRef name
            member_ref_name = member_ref_name[4:]
@@ -212,7 +212,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
            token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
            access: Optional[str] = accessor_map.get(token)

-            method_name: str = str(method.row.Name)
+            method_name: str = method.row.Name
            if method_name.startswith(("get_", "set_")):
                # remove get_/set_
                method_name = method_name[4:]
@@ -289,8 +289,8 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
            logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
            module = ""
        else:
-            module = str(impl_map.ImportScope.row.Name)
-        method: str = str(impl_map.ImportName)
+            module = impl_map.ImportScope.row.Name
+        method: str = impl_map.ImportName

        member_forward_table: int
        if impl_map.MemberForwarded.table is None:
@@ -320,11 +320,8 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
    if row_index - 1 <= 0:
        return None

-    table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(table_index)
-    if table is None:
-        return None
-
    try:
+        table = pe.net.mdtables.tables.get(table_index, [])
        return table[row_index - 1]
    except IndexError:
        return None
@@ -337,7 +334,7 @@ def resolve_nested_typedef_name(

    if index in nested_class_table:
        typedef_name = []
-        name = str(typedef.TypeName)
+        name = typedef.TypeName

        # Append the current typedef name
        typedef_name.append(name)
@@ -346,24 +343,24 @@ def resolve_nested_typedef_name(
            # Iterate through the typedef table to resolve the nested name
            table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
            if table_row is None:
-                return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
+                return typedef.TypeNamespace, tuple(typedef_name[::-1])

-            name = str(table_row.TypeName)
+            name = table_row.TypeName
            typedef_name.append(name)
            index = nested_class_table[index]

        # Document the root enclosing details
        table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
        if table_row is None:
-            return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
+            return typedef.TypeNamespace, tuple(typedef_name[::-1])

-        enclosing_name = str(table_row.TypeName)
+        enclosing_name = table_row.TypeName
        typedef_name.append(enclosing_name)

-        return str(table_row.TypeNamespace), tuple(typedef_name[::-1])
+        return table_row.TypeNamespace, tuple(typedef_name[::-1])

    else:
-        return str(typedef.TypeNamespace), (str(typedef.TypeName),)
+        return typedef.TypeNamespace, (typedef.TypeName,)


 def resolve_nested_typeref_name(
@@ -373,29 +370,29 @@ def resolve_nested_typeref_name(
    # If the ResolutionScope decodes to a typeRef type then it is nested
    if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
        typeref_name = []
-        name = str(typeref.TypeName)
+        name = typeref.TypeName
        # Not appending the current typeref name to avoid potential duplicate

        # Validate index
        table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index)
        if table_row is None:
-            return str(typeref.TypeNamespace), (str(typeref.TypeName),)
+            return typeref.TypeNamespace, (typeref.TypeName,)

        while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef):
            # Iterate through the typeref table to resolve the nested name
            typeref_name.append(name)
-            name = str(table_row.TypeName)
+            name = table_row.TypeName
            table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index)
            if table_row is None:
-                return str(typeref.TypeNamespace), tuple(typeref_name[::-1])
+                return typeref.TypeNamespace, tuple(typeref_name[::-1])

        # Document the root enclosing details
-        typeref_name.append(str(table_row.TypeName))
+        typeref_name.append(table_row.TypeName)

-        return str(table_row.TypeNamespace), tuple(typeref_name[::-1])
+        return table_row.TypeNamespace, tuple(typeref_name[::-1])

    else:
-        return str(typeref.TypeNamespace), (str(typeref.TypeName),)
+        return typeref.TypeNamespace, (typeref.TypeName,)


 def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
--- a/capa/features/extractors/dotnetfile.py
+++ b/capa/features/extractors/dotnetfile.py
@@ -78,12 +78,12 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
    for _, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
        # emit internal .NET namespaces
        assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
-        namespaces.add(str(typedef.TypeNamespace))
+        namespaces.add(typedef.TypeNamespace)

    for _, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
        # emit external .NET namespaces
        assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
-        namespaces.add(str(typeref.TypeNamespace))
+        namespaces.add(typeref.TypeNamespace)

    # namespaces may be empty, discard
    namespaces.discard("")
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -58,10 +58,6 @@ class OS(str, Enum):
    SYLLABLE = "syllable"
    NACL = "nacl"
    ANDROID = "android"
-    DRAGONFLYBSD = "dragonfly BSD"
-    ILLUMOS = "illumos"
-    ZOS = "z/os"
-    UNIX = "unix"


 # via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
@@ -85,8 +81,6 @@ class Phdr:
    paddr: int
    filesz: int
    buf: bytes
-    flags: int
-    memsz: int


@dataclass
@@ -321,23 +315,24 @@ class ELF:
        phent_offset = i * self.e_phentsize
        phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize]

+        (p_type,) = struct.unpack_from(self.endian + "I", phent, 0x0)
+        logger.debug("ph:p_type: 0x%04x", p_type)
+
        if self.bitness == 32:
-            p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz, p_flags = struct.unpack_from(
-                self.endian + "IIIIIII", phent, 0x0
-            )
+            p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "IIII", phent, 0x4)
        elif self.bitness == 64:
-            p_type, p_flags, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz = struct.unpack_from(
-                self.endian + "IIQQQQQ", phent, 0x0
-            )
+            p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "QQQQ", phent, 0x8)
        else:
            raise NotImplementedError()

+        logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
+
        self.f.seek(p_offset)
        buf = self.f.read(p_filesz)
        if len(buf) != p_filesz:
            raise ValueError("failed to read program header content")

-        return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf, p_flags, p_memsz)
+        return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf)

    @property
    def program_headers(self):
@@ -362,6 +357,8 @@ class ELF:
        else:
            raise NotImplementedError()

+        logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size)
+
        self.f.seek(sh_offset)
        buf = self.f.read(sh_size)
        if len(buf) != sh_size:
@@ -870,8 +867,6 @@ def guess_os_from_ident_directive(elf: ELF) -> Optional[OS]:
            return OS.LINUX
        elif "Red Hat" in comment:
            return OS.LINUX
-        elif "Alpine" in comment:
-            return OS.LINUX
        elif "Android" in comment:
            return OS.ANDROID

@@ -957,506 +952,11 @@ def guess_os_from_symtab(elf: ELF) -> Optional[OS]:

        for os, hints in keywords.items():
            if any(hint in sym_name for hint in hints):
-                logger.debug("symtab: %s looks like %s", sym_name, os)
                return os

    return None


-def is_go_binary(elf: ELF) -> bool:
-    for shdr in elf.section_headers:
-        if shdr.get_name(elf) == ".note.go.buildid":
-            logger.debug("go buildinfo: found section .note.go.buildid")
-            return True
-
-    # The `go version` command enumerates sections for the name `.go.buildinfo`
-    # (in addition to looking for the BUILDINFO_MAGIC) to check if an executable is go or not.
-    # See references to the `errNotGoExe` error here:
-    # https://github.com/golang/go/blob/master/src/debug/buildinfo/buildinfo.go#L41
-    for shdr in elf.section_headers:
-        if shdr.get_name(elf) == ".go.buildinfo":
-            logger.debug("go buildinfo: found section .go.buildinfo")
-            return True
-
-    # other strategy used by FLOSS: search for known runtime strings.
-    # https://github.com/mandiant/flare-floss/blob/b2ca8adfc5edf278861dd6bff67d73da39683b46/floss/language/identify.py#L88
-    return False
-
-
-def get_go_buildinfo_data(elf: ELF) -> Optional[bytes]:
-    for shdr in elf.section_headers:
-        if shdr.get_name(elf) == ".go.buildinfo":
-            logger.debug("go buildinfo: found section .go.buildinfo")
-            return shdr.buf
-
-    PT_LOAD = 0x1
-    PF_X = 1
-    PF_W = 2
-    for phdr in elf.program_headers:
-        if phdr.type != PT_LOAD:
-            continue
-
-        if (phdr.flags & (PF_X | PF_W)) == PF_W:
-            logger.debug("go buildinfo: found data segment")
-            return phdr.buf
-
-    return None
-
-
-def read_data(elf: ELF, rva: int, size: int) -> Optional[bytes]:
-    # ELF segments are for runtime data,
-    # ELF sections are for link-time data.
-    # So we want to read Program Headers/Segments.
-    for phdr in elf.program_headers:
-        if phdr.vaddr <= rva < phdr.vaddr + phdr.memsz:
-            segment_data = phdr.buf
-
-            # pad the section with NULLs
-            # assume page alignment is already handled.
-            # might need more hardening here.
-            if len(segment_data) < phdr.memsz:
-                segment_data += b"\x00" * (phdr.memsz - len(segment_data))
-
-            segment_offset = rva - phdr.vaddr
-            return segment_data[segment_offset : segment_offset + size]
-
-    return None
-
-
-def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
-    if elf.bitness == 32:
-        struct_size = 8
-        struct_format = elf.endian + "II"
-    elif elf.bitness == 64:
-        struct_size = 16
-        struct_format = elf.endian + "QQ"
-    else:
-        raise ValueError("invalid psize")
-
-    struct_buf = read_data(elf, rva, struct_size)
-    if not struct_buf:
-        return None
-
-    addr, length = struct.unpack_from(struct_format, struct_buf, 0)
-
-    return read_data(elf, addr, length)
-
-
-def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
-    """
-    In a binary compiled by Go, the buildinfo structure may contain
-    metadata about the build environment, including the configured
-    GOOS, which specifies the target operating system.
-
-    Search for and parse the buildinfo structure,
-    which may be found in the .go.buildinfo section,
-    and often contains this metadata inline. Otherwise,
-    follow a few byte slices to the relevant information.
-
-    This strategy is derived from GoReSym.
-    """
-    buf = get_go_buildinfo_data(elf)
-    if not buf:
-        logger.debug("go buildinfo: no buildinfo section")
-        return None
-
-    assert isinstance(buf, bytes)
-
-    # The build info blob left by the linker is identified by
-    # a 16-byte header, consisting of:
-    #  - buildInfoMagic (14 bytes),
-    #  - the binary's pointer size (1 byte), and
-    #  - whether the binary is big endian (1 byte).
-    #
-    # Then:
-    #  - virtual address to Go string: runtime.buildVersion
-    #  - virtual address to Go string: runtime.modinfo
-    #
-    #  On 32-bit platforms, the last 8 bytes are unused.
-    #
-    #  If the endianness has the 2 bit set, then the pointers are zero,
-    #  and the 32-byte header is followed by varint-prefixed string data
-    #  for the two string values we care about.
-    # https://github.com/mandiant/GoReSym/blob/0860a1b1b4f3495e9fb7e71eb4386bf3e0a7c500/buildinfo/buildinfo.go#L185-L193
-    BUILDINFO_MAGIC = b"\xFF Go buildinf:"
-
-    try:
-        index = buf.index(BUILDINFO_MAGIC)
-    except ValueError:
-        logger.debug("go buildinfo: no buildinfo magic")
-        return None
-
-    psize, flags = struct.unpack_from("<bb", buf, index + len(BUILDINFO_MAGIC))
-    assert psize in (4, 8)
-    is_big_endian = flags & 0b01
-    has_inline_strings = flags & 0b10
-    logger.debug("go buildinfo: psize: %d big endian: %s inline: %s", psize, is_big_endian, has_inline_strings)
-
-    GOOS_TO_OS = {
-        b"aix": OS.AIX,
-        b"android": OS.ANDROID,
-        b"dragonfly": OS.DRAGONFLYBSD,
-        b"freebsd": OS.FREEBSD,
-        b"hurd": OS.HURD,
-        b"illumos": OS.ILLUMOS,
-        b"linux": OS.LINUX,
-        b"netbsd": OS.NETBSD,
-        b"openbsd": OS.OPENBSD,
-        b"solaris": OS.SOLARIS,
-        b"zos": OS.ZOS,
-        b"windows": None,  # PE format
-        b"plan9": None,  # a.out format
-        b"ios": None,  # Mach-O format
-        b"darwin": None,  # Mach-O format
-        b"nacl": None,  # dropped in GO 1.14
-        b"js": None,
-    }
-
-    if has_inline_strings:
-        # This is the common case/path. Most samples will have an inline GOOS string.
-        #
-        # To find samples on VT, use these VTGrep searches:
-        #
-        #   content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 02}
-        #   content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 02}
-
-        # If present, the GOOS key will be found within
-        # the current buildinfo data region.
-        #
-        # Brute force the k-v pair, like `GOOS=linux`,
-        # rather than try to parse the data, which would be fragile.
-        for key, os in GOOS_TO_OS.items():
-            if (b"GOOS=" + key) in buf:
-                logger.debug("go buildinfo: found os: %s", os)
-                return os
-    else:
-        # This is the uncommon path. Most samples will have an inline GOOS string.
-        #
-        # To find samples on VT, use the referenced VTGrep content searches.
-        info_format = {
-            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
-            # like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
-            # in which the modinfo doesn't have GOOS.
-            (4, False): "<II",
-            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
-            # like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
-            # in which the modinfo doesn't have GOOS.
-            (8, False): "<QQ",
-            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
-            # (no matches on VT today)
-            (4, True): ">II",
-            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
-            # like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
-            # in which the modinfo doesn't have GOOS.
-            (8, True): ">QQ",
-        }
-
-        build_version_address, modinfo_address = struct.unpack_from(
-            info_format[(psize, is_big_endian)], buf, index + 0x10
-        )
-        logger.debug("go buildinfo: build version address: 0x%x", build_version_address)
-        logger.debug("go buildinfo: modinfo address: 0x%x", modinfo_address)
-
-        build_version = read_go_slice(elf, build_version_address)
-        if build_version:
-            logger.debug("go buildinfo: build version: %s", build_version.decode("utf-8"))
-
-        modinfo = read_go_slice(elf, modinfo_address)
-        if modinfo:
-            if modinfo[-0x11] == ord("\n"):
-                # Strip module framing: sentinel strings delimiting the module info.
-                # These are cmd/go/internal/modload/build.infoStart and infoEnd.
-                # Which should probably be:
-                # 	infoStart, _ = hex.DecodeString("3077af0c9274080241e1c107e6d618e6")
-                #   infoEnd, _   = hex.DecodeString("f932433186182072008242104116d8f2")
-                modinfo = modinfo[0x10:-0x10]
-            logger.debug("go buildinfo: modinfo: %s", modinfo.decode("utf-8"))
-
-        if not modinfo:
-            return None
-
-        for key, os in GOOS_TO_OS.items():
-            # Brute force the k-v pair, like `GOOS=linux`,
-            # rather than try to parse the data, which would be fragile.
-            if (b"GOOS=" + key) in modinfo:
-                logger.debug("go buildinfo: found os: %s", os)
-                return os
-
-    return None
-
-
-def guess_os_from_go_source(elf: ELF) -> Optional[OS]:
-    """
-    In a binary compiled by Go, runtime metadata may contain
-    references to the source filenames, including the
-    src/runtime/os_* files, whose name indicates the
-    target operating system.
-
-    Confirm the given ELF seems to be built by Go,
-    and then look for strings that look like
-    Go source filenames.
-
-    This strategy is derived from GoReSym.
-    """
-    if not is_go_binary(elf):
-        return None
-
-    for phdr in elf.program_headers:
-        buf = phdr.buf
-        NEEDLE_OS = b"/src/runtime/os_"
-        try:
-            index = buf.index(NEEDLE_OS)
-        except ValueError:
-            continue
-
-        rest = buf[index + len(NEEDLE_OS) : index + len(NEEDLE_OS) + 32]
-        filename = rest.partition(b".go")[0].decode("utf-8")
-        logger.debug("go source: filename: /src/runtime/os_%s.go", filename)
-
-        # via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
-        # candidates today:
-        #   - aix
-        #   - android
-        #   - darwin
-        #   - darwin_arm64
-        #   - dragonfly
-        #   - freebsd
-        #   - freebsd2
-        #   - freebsd_amd64
-        #   - freebsd_arm
-        #   - freebsd_arm64
-        #   - freebsd_noauxv
-        #   - freebsd_riscv64
-        #   - illumos
-        #   - js
-        #   - linux
-        #   - linux_arm
-        #   - linux_arm64
-        #   - linux_be64
-        #   - linux_generic
-        #   - linux_loong64
-        #   - linux_mips64x
-        #   - linux_mipsx
-        #   - linux_noauxv
-        #   - linux_novdso
-        #   - linux_ppc64x
-        #   - linux_riscv64
-        #   - linux_s390x
-        #   - linux_x86
-        #   - netbsd
-        #   - netbsd_386
-        #   - netbsd_amd64
-        #   - netbsd_arm
-        #   - netbsd_arm64
-        #   - nonopenbsd
-        #   - only_solaris
-        #   - openbsd
-        #   - openbsd_arm
-        #   - openbsd_arm64
-        #   - openbsd_libc
-        #   - openbsd_mips64
-        #   - openbsd_syscall
-        #   - openbsd_syscall1
-        #   - openbsd_syscall2
-        #   - plan9
-        #   - plan9_arm
-        #   - solaris
-        #   - unix
-        #   - unix_nonlinux
-        #   - wasip1
-        #   - wasm
-        #   - windows
-        #   - windows_arm
-        #   - windows_arm64
-
-        OS_FILENAME_TO_OS = {
-            "aix": OS.AIX,
-            "android": OS.ANDROID,
-            "dragonfly": OS.DRAGONFLYBSD,
-            "freebsd": OS.FREEBSD,
-            "freebsd2": OS.FREEBSD,
-            "freebsd_": OS.FREEBSD,
-            "illumos": OS.ILLUMOS,
-            "linux": OS.LINUX,
-            "netbsd": OS.NETBSD,
-            "only_solaris": OS.SOLARIS,
-            "openbsd": OS.OPENBSD,
-            "solaris": OS.SOLARIS,
-            "unix_nonlinux": OS.UNIX,
-        }
-
-        for prefix, os in OS_FILENAME_TO_OS.items():
-            if filename.startswith(prefix):
-                return os
-
-    for phdr in elf.program_headers:
-        buf = phdr.buf
-        NEEDLE_RT0 = b"/src/runtime/rt0_"
-        try:
-            index = buf.index(NEEDLE_RT0)
-        except ValueError:
-            continue
-
-        rest = buf[index + len(NEEDLE_RT0) : index + len(NEEDLE_RT0) + 32]
-        filename = rest.partition(b".s")[0].decode("utf-8")
-        logger.debug("go source: filename: /src/runtime/rt0_%s.s", filename)
-
-        # via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
-        # candidates today:
-        #   - aix_ppc64
-        #   - android_386
-        #   - android_amd64
-        #   - android_arm
-        #   - android_arm64
-        #   - darwin_amd64
-        #   - darwin_arm64
-        #   - dragonfly_amd64
-        #   - freebsd_386
-        #   - freebsd_amd64
-        #   - freebsd_arm
-        #   - freebsd_arm64
-        #   - freebsd_riscv64
-        #   - illumos_amd64
-        #   - ios_amd64
-        #   - ios_arm64
-        #   - js_wasm
-        #   - linux_386
-        #   - linux_amd64
-        #   - linux_arm
-        #   - linux_arm64
-        #   - linux_loong64
-        #   - linux_mips64x
-        #   - linux_mipsx
-        #   - linux_ppc64
-        #   - linux_ppc64le
-        #   - linux_riscv64
-        #   - linux_s390x
-        #   - netbsd_386
-        #   - netbsd_amd64
-        #   - netbsd_arm
-        #   - netbsd_arm64
-        #   - openbsd_386
-        #   - openbsd_amd64
-        #   - openbsd_arm
-        #   - openbsd_arm64
-        #   - openbsd_mips64
-        #   - openbsd_ppc64
-        #   - openbsd_riscv64
-        #   - plan9_386
-        #   - plan9_amd64
-        #   - plan9_arm
-        #   - solaris_amd64
-        #   - wasip1_wasm
-        #   - windows_386
-        #   - windows_amd64
-        #   - windows_arm
-        #   - windows_arm64
-
-        RT0_FILENAME_TO_OS = {
-            "aix": OS.AIX,
-            "android": OS.ANDROID,
-            "dragonfly": OS.DRAGONFLYBSD,
-            "freebsd": OS.FREEBSD,
-            "illumos": OS.ILLUMOS,
-            "linux": OS.LINUX,
-            "netbsd": OS.NETBSD,
-            "openbsd": OS.OPENBSD,
-            "solaris": OS.SOLARIS,
-        }
-
-        for prefix, os in RT0_FILENAME_TO_OS.items():
-            if filename.startswith(prefix):
-                return os
-
-    return None
-
-
-def guess_os_from_vdso_strings(elf: ELF) -> Optional[OS]:
-    """
-    The "vDSO" (virtual dynamic shared object) is a small shared
-    library that the kernel automatically maps into the address space
-    of all user-space applications.
-
-    Some statically linked executables include small dynamic linker
-    routines that finds these vDSO symbols, using the ASCII
-    symbol name and version. We can therefore recognize the pairs
-    (symbol, version) to guess the binary targets Linux.
-    """
-    for phdr in elf.program_headers:
-        buf = phdr.buf
-
-        # We don't really use the arch, but its interesting for documentation
-        # I suppose we could restrict the arch here to what's in the ELF header,
-        # but that's even more work. Let's see if this is sufficient.
-        for arch, symbol, version in (
-            # via: https://man7.org/linux/man-pages/man7/vdso.7.html
-            ("arm", b"__vdso_gettimeofday", b"LINUX_2.6"),
-            ("arm", b"__vdso_clock_gettime", b"LINUX_2.6"),
-            ("aarch64", b"__kernel_rt_sigreturn", b"LINUX_2.6.39"),
-            ("aarch64", b"__kernel_gettimeofday", b"LINUX_2.6.39"),
-            ("aarch64", b"__kernel_clock_gettime", b"LINUX_2.6.39"),
-            ("aarch64", b"__kernel_clock_getres", b"LINUX_2.6.39"),
-            ("mips", b"__kernel_gettimeofday", b"LINUX_2.6"),
-            ("mips", b"__kernel_clock_gettime", b"LINUX_2.6"),
-            ("ia64", b"__kernel_sigtramp", b"LINUX_2.5"),
-            ("ia64", b"__kernel_syscall_via_break", b"LINUX_2.5"),
-            ("ia64", b"__kernel_syscall_via_epc", b"LINUX_2.5"),
-            ("ppc/32", b"__kernel_clock_getres", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_clock_gettime64", b"LINUX_5.11"),
-            ("ppc/32", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_getcpu", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_sigtramp_rt32", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_sigtramp32", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
-            ("ppc/32", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_clock_getres", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_getcpu", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_sigtramp_rt64", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
-            ("ppc/64", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
-            ("riscv", b"__vdso_rt_sigreturn", b"LINUX_4.15"),
-            ("riscv", b"__vdso_gettimeofday", b"LINUX_4.15"),
-            ("riscv", b"__vdso_clock_gettime", b"LINUX_4.15"),
-            ("riscv", b"__vdso_clock_getres", b"LINUX_4.15"),
-            ("riscv", b"__vdso_getcpu", b"LINUX_4.15"),
-            ("riscv", b"__vdso_flush_icache", b"LINUX_4.15"),
-            ("s390", b"__kernel_clock_getres", b"LINUX_2.6.29"),
-            ("s390", b"__kernel_clock_gettime", b"LINUX_2.6.29"),
-            ("s390", b"__kernel_gettimeofday", b"LINUX_2.6.29"),
-            ("superh", b"__kernel_rt_sigreturn", b"LINUX_2.6"),
-            ("superh", b"__kernel_sigreturn", b"LINUX_2.6"),
-            ("superh", b"__kernel_vsyscall", b"LINUX_2.6"),
-            ("i386", b"__kernel_sigreturn", b"LINUX_2.5"),
-            ("i386", b"__kernel_rt_sigreturn", b"LINUX_2.5"),
-            ("i386", b"__kernel_vsyscall", b"LINUX_2.5"),
-            ("i386", b"__vdso_clock_gettime", b"LINUX_2.6"),
-            ("i386", b"__vdso_gettimeofday", b"LINUX_2.6"),
-            ("i386", b"__vdso_time", b"LINUX_2.6"),
-            ("x86-64", b"__vdso_clock_gettime", b"LINUX_2.6"),
-            ("x86-64", b"__vdso_getcpu", b"LINUX_2.6"),
-            ("x86-64", b"__vdso_gettimeofday", b"LINUX_2.6"),
-            ("x86-64", b"__vdso_time", b"LINUX_2.6"),
-            ("x86/32", b"__vdso_clock_gettime", b"LINUX_2.6"),
-            ("x86/32", b"__vdso_getcpu", b"LINUX_2.6"),
-            ("x86/32", b"__vdso_gettimeofday", b"LINUX_2.6"),
-            ("x86/32", b"__vdso_time", b"LINUX_2.6"),
-        ):
-            if symbol in buf and version in buf:
-                logger.debug("vdso string: %s %s %s", arch, symbol.decode("ascii"), version.decode("ascii"))
-                return OS.LINUX
-
-    return None
-
-
 def detect_elf_os(f) -> str:
    """
    f: type Union[BinaryIO, IDAIO, GHIDRAIO]
@@ -1523,27 +1023,6 @@ def detect_elf_os(f) -> str:
        logger.warning("Error guessing OS from symbol table: %s", e)
        symtab_guess = None

-    try:
-        goos_guess = guess_os_from_go_buildinfo(elf)
-        logger.debug("guess: Go buildinfo: %s", goos_guess)
-    except Exception as e:
-        logger.warning("Error guessing OS from Go buildinfo: %s", e)
-        goos_guess = None
-
-    try:
-        gosrc_guess = guess_os_from_go_source(elf)
-        logger.debug("guess: Go source: %s", gosrc_guess)
-    except Exception as e:
-        logger.warning("Error guessing OS from Go source path: %s", e)
-        gosrc_guess = None
-
-    try:
-        vdso_guess = guess_os_from_vdso_strings(elf)
-        logger.debug("guess: vdso strings: %s", vdso_guess)
-    except Exception as e:
-        logger.warning("Error guessing OS from vdso strings: %s", e)
-        symtab_guess = None
-
    ret = None

    if osabi_guess:
@@ -1567,24 +1046,11 @@ def detect_elf_os(f) -> str:
    elif symtab_guess:
        ret = symtab_guess

-    elif goos_guess:
-        ret = goos_guess
-
-    elif gosrc_guess:
-        # prefer goos_guess to this method,
-        # which is just string interpretation.
-        ret = gosrc_guess
-
    elif ident_guess:
        # at the bottom because we don't trust this too much
        # due to potential for bugs with cross-compilation.
        ret = ident_guess

-    elif vdso_guess:
-        # at the bottom because this is just scanning strings,
-        # which isn't very authoritative.
-        ret = vdso_guess
-
    return ret.value if ret is not None else "unknown"


--- a/capa/features/extractors/elffile.py
+++ b/capa/features/extractors/elffile.py
@@ -10,7 +10,8 @@ import logging
 from typing import Tuple, Iterator
 from pathlib import Path

-from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
+from elftools.elf.elffile import ELFFile, SymbolTableSection
+from elftools.elf.relocation import RelocationSection

 import capa.features.extractors.common
 from capa.features.file import Export, Import, Section
@@ -46,37 +47,17 @@ def extract_file_export_names(elf: ELFFile, **kwargs):

            yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)

-    for segment in elf.iter_segments():
-        if not isinstance(segment, DynamicSegment):
-            continue
-
-        logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
-
-        for symbol in segment.iter_symbols():
-            # The following conditions are based on the following article
-            # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
-            if not symbol.name:
-                continue
-            if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
-                continue
-            if symbol.entry.st_value == 0:
-                continue
-            if symbol.entry.st_shndx == "SHN_UNDEF":
-                continue
-
-            yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
-

 def extract_file_import_names(elf: ELFFile, **kwargs):
    # Create a dictionary to store symbol names by their index
    symbol_names = {}

    # Extract symbol names and store them in the dictionary
-    for segment in elf.iter_segments():
-        if not isinstance(segment, DynamicSegment):
+    for section in elf.iter_sections():
+        if not isinstance(section, SymbolTableSection):
            continue

-        for _, symbol in enumerate(segment.iter_symbols()):
+        for _, symbol in enumerate(section.iter_symbols()):
            # The following conditions are based on the following article
            # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
            if not symbol.name:
@@ -92,19 +73,21 @@ def extract_file_import_names(elf: ELFFile, **kwargs):

            symbol_names[_] = symbol.name

-    for segment in elf.iter_segments():
-        if not isinstance(segment, DynamicSegment):
+    for section in elf.iter_sections():
+        if not isinstance(section, RelocationSection):
            continue

-        relocation_tables = segment.get_relocation_tables()
-        logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))
+        if section["sh_entsize"] == 0:
+            logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
+            continue

-        for relocation_table in relocation_tables.values():
-            for relocation in relocation_table.iter_relocations():
-                # Extract the symbol name from the symbol table using the symbol index in the relocation
-                if relocation["r_info_sym"] not in symbol_names:
-                    continue
-                yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
+        logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())
+
+        for relocation in section.iter_relocations():
+            # Extract the symbol name from the symbol table using the symbol index in the relocation
+            if relocation["r_info_sym"] not in symbol_names:
+                continue
+            yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])


 def extract_file_section_names(elf: ELFFile, **kwargs):
--- a/capa/helpers.py
+++ b/capa/helpers.py
@@ -26,11 +26,13 @@ from capa.features.common import (
    FORMAT_DOTNET,
    FORMAT_FREEZE,
    FORMAT_UNKNOWN,
+    FORMAT_BINEXPORT2,
    Format,
 )

 EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
 EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
+EXTENSIONS_BINEXPORT2 = ("BinExport", "BinExport2")
 EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz")
 EXTENSIONS_ELF = "elf_"
 EXTENSIONS_FREEZE = "frz"
@@ -105,15 +107,8 @@ def get_format_from_extension(sample: Path) -> str:
        format_ = get_format_from_report(sample)
    elif sample.name.endswith(EXTENSIONS_FREEZE):
        format_ = FORMAT_FREEZE
-    return format_
-
-
-def get_auto_format(path: Path) -> str:
-    format_ = get_format(path)
-    if format_ == FORMAT_UNKNOWN:
-        format_ = get_format_from_extension(path)
-    if format_ == FORMAT_UNKNOWN:
-        raise UnsupportedFormatError()
+    elif sample.name.endswith(EXTENSIONS_BINEXPORT2):
+        format_ = FORMAT_BINEXPORT2
    return format_


@@ -136,6 +131,15 @@ def get_format(sample: Path) -> str:
    return FORMAT_UNKNOWN


+def get_auto_format(path: Path) -> str:
+    format_ = get_format(path)
+    if format_ == FORMAT_UNKNOWN:
+        format_ = get_format_from_extension(path)
+    if format_ == FORMAT_UNKNOWN:
+        raise UnsupportedFormatError()
+    return format_
+
+
@contextlib.contextmanager
 def redirecting_print_to_tqdm(disable_progress):
    """
--- a/capa/loader.py
+++ b/capa/loader.py
@@ -8,7 +8,6 @@
 import sys
 import logging
 import datetime
-import contextlib
 from typing import Set, Dict, List, Optional
 from pathlib import Path

@@ -155,18 +154,6 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):

    viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])

-    with contextlib.suppress(Exception):
-        # unfortuately viv raises a raw Exception (not any subclass).
-        # This happens when the module isn't found, such as with a viv upgrade.
-        #
-        # Remove the symbolic switch case solver.
-        # This is only enabled for ELF files, not PE files.
-        # During the following performance investigation, this analysis module
-        # had some terrible worst-case behavior.
-        # We can put up with slightly worse CFG reconstruction in order to avoid this.
-        # https://github.com/mandiant/capa/issues/1989#issuecomment-1948022767
-        vw.delFuncAnalysisModule("vivisect.analysis.generic.symswitchcase")
-
    vw.analyze()

    logger.debug("%s", get_meta_str(vw))
--- a/capa/version.py
+++ b/capa/version.py
@@ -5,7 +5,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-__version__ = "7.1.0"
+__version__ = "7.0.1"


 def get_major_version():
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -91,12 +91,6 @@ For more details about creating and using virtual environments, check out the [v

 ##### Install development dependencies

-When developing capa, please use the pinned dependencies found in `requirements.txt`.
-This ensures that everyone has the exact same, reproducible environment.
-Please install these dependencies before install capa (from source or from PyPI):
-
-`$ pip install -r requirements.txt`
-
 We use the following tools to ensure consistent code style and formatting:
  - [black](https://github.com/psf/black) code formatter
  - [isort](https://pypi.org/project/isort/) code formatter
@@ -107,8 +101,7 @@ We use the following tools to ensure consistent code style and formatting:

 To install these development dependencies, run:

- `$ pip install -e /local/path/to/src[dev]` or
- `$ pip install -e /local/path/to/src[dev,scripts]` to also install all script dependencies
+`$ pip install -e /local/path/to/src[dev]`

 We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same linters & configuration locally as in CI.

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,76 +32,25 @@ classifiers = [
    "Topic :: Security",
 ]
 dependencies = [
-    # ---------------------------------------
-    # As a library, capa uses lower version bounds
-    # when specifying its dependencies. This lets
-    # other programs that use capa (and other libraries)
-    # to find a compatible set of dependency versions.
-    #
-    # We can optionally pin to specific versions or
-    # limit the upper bound when there's a good reason;
-    # but the default is to assume all greater versions
-    # probably work with capa until proven otherwise.
-    #
-    # The following link provides good background:
-    # https://iscinumpy.dev/post/bound-version-constraints/
-    #
-    # When we develop capa, and when we distribute it as
-    # a standalone binary, we'll use specific versions
-    # that are pinned in requirements.txt.
-    # But the requirements for a library are specified here
-    # and are looser.
-    #
-    # Related discussions:
-    # 
-    #   - https://github.com/mandiant/capa/issues/2053
-    #   - https://github.com/mandiant/capa/pull/2059
-    #   - https://github.com/mandiant/capa/pull/2079
-    #
-    # ---------------------------------------
-    # The following dependency versions were imported
-    # during June 2024 by truncating specific versions to
-    # their major-most version (major version when possible, 
-    # or minor otherwise).
-    # As specific constraints are identified, please provide
-    # comments and context.
-    "tqdm>=4",
-    "pyyaml>=6",
-    "tabulate>=0.9",
-    "colorama>=0.4",
-    "termcolor>=2",
-    "wcwidth>=0.2",
-    "ida-settings>=2",
-    "ruamel.yaml>=0.18",
-    "pefile>=2023.2.7",
-    "pyelftools>=0.31",
-    "pydantic>=2",
-    "rich>=13",
-    "humanize>=4",
-    "protobuf>=5",
-
-    # ---------------------------------------
-    # Dependencies that we develop
-    #
-    # These dependencies are often actively influenced by capa,
-    # so we provide a minimum patch version that includes the
-    # latest bug fixes we need here.
-    "viv-utils[flirt]>=0.7.9",
-    "vivisect>=1.1.1",
-    "dncil>=1.0.2",
-
-    # ---------------------------------------
-    # Dependencies with version caps
-    #
-    # These dependencies must not exceed the version cap,
-    # typically due to dropping support for python releases
-    # we still support.
-
-    # TODO(williballenthin): networkx 3.2 doesn't support python 3.8 while capa does.
-    # https://github.com/mandiant/capa/issues/1966
-    "networkx>=3,<3.2",
-
-    "dnfile>=0.15.0",
+    "tqdm==4.66.4",
+    "pyyaml==6.0.1",
+    "tabulate==0.9.0",
+    "colorama==0.4.6",
+    "termcolor==2.4.0",
+    "wcwidth==0.2.13",
+    "ida-settings==2.1.0",
+    "viv-utils[flirt]==0.7.9",
+    "networkx==3.1",
+    "ruamel.yaml==0.18.6",
+    "vivisect==1.1.1",
+    "pefile==2023.2.7",
+    "pyelftools==0.31",
+    "dnfile==0.14.1",
+    "dncil==1.0.2",
+    "pydantic==2.7.1",
+    "rich==13.7.1",
+    "humanize==4.9.0",
+    "protobuf==5.27.0",
 ]
 dynamic = ["version"]

@@ -114,19 +63,15 @@ namespaces = false

 [project.optional-dependencies]
 dev = [
-    # Dev and build dependencies are not relaxed because
-    # we want all developer environments to be consistent.
-    # These dependencies are not used in production environments
-    # and should not conflict with other libraries/tooling.
    "pre-commit==3.5.0",
    "pytest==8.0.0",
    "pytest-sugar==1.0.0",
    "pytest-instafail==0.5.0",
    "pytest-cov==5.0.0",
-    "flake8==7.1.0",
+    "flake8==7.0.0",
    "flake8-bugbear==24.4.26",
    "flake8-encodings==0.5.1",
-    "flake8-comprehensions==3.15.0",
+    "flake8-comprehensions==3.14.0",
    "flake8-logging-format==0.9.0",
    "flake8-no-implicit-concat==0.3.5",
    "flake8-print==5.0.0",
@@ -134,12 +79,14 @@ dev = [
    "flake8-simplify==0.21.0",
    "flake8-use-pathlib==0.3.0",
    "flake8-copyright==0.2.4",
-    "ruff==0.5.0",
+    "ruff==0.4.7",
    "black==24.4.2",
    "isort==5.13.2",
    "mypy==1.10.0",
+    "psutil==5.9.2",
+    "stix2==3.0.1",
+    "requests==2.31.0",
    "mypy-protobuf==3.6.0",
-    "PyGithub==2.3.0",
    # type stubs for mypy
    "types-backports==0.1.3",
    "types-colorama==0.4.15.11",
@@ -152,21 +99,10 @@ dev = [
    "deptry==0.16.1"
 ]
 build = [
-    # Dev and build dependencies are not relaxed because
-    # we want all developer environments to be consistent.
-    # These dependencies are not used in production environments
-    # and should not conflict with other libraries/tooling.
-    "pyinstaller==6.8.0",
-    "setuptools==70.0.0",
+    "pyinstaller==6.7.0",
+    "setuptools==69.5.1",
    "build==1.2.1"
 ]
-scripts = [
-    "jschema_to_python==1.2.3",
-    "psutil==6.0.0",
-    "stix2==3.0.1",
-    "sarif_om==1.0.4",
-    "requests==2.32.3",
-]

 [tool.deptry]
 extend_exclude = [
@@ -216,7 +152,6 @@ DEP002 = [
    "mypy",
    "mypy-protobuf",
    "pre-commit",
-    "PyGithub",
    "pyinstaller",
    "pytest",
    "pytest-cov",
@@ -240,9 +175,6 @@ DEP003 = [
    "typing_extensions" # TODO(s-ff): remove when Python 3.9 is deprecated, see #1699
 ]

-[tool.deptry.package_module_name_map]
-PyGithub = "github"
-
 [project.urls]
 Homepage = "https://github.com/mandiant/capa"
 Repository = "https://github.com/mandiant/capa.git"
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,46 +0,0 @@
-# Dependencies with specific version constraints
-# used during development and building the standalone executables.
-# For these environments, use `pip install -r requirements.txt`
-# before installing capa from source/pypi. This will ensure
-# the following specific versions are used.
-#
-# Initially generated via: pip freeze | grep -v -- "-e"
-# Kept up to date by dependabot.
-annotated-types==0.7.0
-colorama==0.4.6
-cxxfilt==0.2.2
-dncil==1.0.2
-dnfile==0.15.0
-funcy==2.0
-humanize==4.9.0
-ida-netnode==3.0
-ida-settings==2.1.0
-intervaltree==3.1.0
-markdown-it-py==3.0.0
-mdurl==0.1.2
-msgpack==1.0.8
-networkx==3.1
-pefile==2023.2.7
-pip==24.0
-protobuf==5.27.1
-pyasn1==0.4.8
-pyasn1-modules==0.2.8
-pycparser==2.22
-pydantic==2.7.3
-pydantic-core==2.18.4
-pyelftools==0.31
-pygments==2.18.0
-python-flirt==0.8.10
-pyyaml==6.0.1
-rich==13.7.1
-ruamel-yaml==0.18.6
-ruamel-yaml-clib==0.2.8
-setuptools==70.0.0
-six==1.16.0
-sortedcontainers==2.4.0
-tabulate==0.9.0
-termcolor==2.4.0
-tqdm==4.66.4
-viv-utils==0.7.11
-vivisect==1.1.1
-wcwidth==0.2.13
--- a/2
+++ b/2
--- a/scripts/capa2sarif.py
+++ b/scripts/capa2sarif.py
@@ -1,358 +0,0 @@
-# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at: [package root]/LICENSE.txt
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and limitations under the License.
-"""
-Convert capa json output to sarif schema
-    usage: capa2sarif.py [-h] [-g] [-r] [-t TAG] [--version] capa_output
-
-Capa to SARIF analysis file
-positional arguments:
-    capa_output         Path to capa JSON output file
-optional arguments:
-  -h, --help            show this help message and exit
-  --version             show program's version number and exit
-  -t TAG, --tag TAG     filter on rule meta field values (ruleid)
-
-Requires:
-    - sarif_om 1.0.4
-    - jschema_to_python 1.2.3
-"""
-import sys
-import json
-import logging
-import argparse
-from typing import List, Optional
-from pathlib import Path
-
-from capa.version import __version__
-
-logger = logging.getLogger("capa2sarif")
-
-# Dependencies
-try:
-    from sarif_om import Run, Tool, SarifLog, ToolComponent
-except ImportError as e:
-    logger.error(
-        "Required import `sarif_om` is not installed. This is solved by installing `python3 -m pip install sarif_om>=1.0.4`. %s",
-        e,
-    )
-    exit(-4)
-
-try:
-    from jschema_to_python.to_json import to_json
-except ImportError as e:
-    logger.error(
-        "Required import `jschema_to_python` is not installed. This is solved by installing `python3 -m pip install jschema_to_python>=1.2.3`, %s",
-        e,
-    )
-    exit(-4)
-
-
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Capa to SARIF analysis file")
-
-    # Positional argument
-    parser.add_argument("capa_output", help="Path to capa JSON output file")
-
-    # Optional arguments
-    parser.add_argument(
-        "-g",
-        "--ghidra-compat",
-        action="store_true",
-        help="Compatibility for Ghidra 11.0.X",
-    )
-    parser.add_argument(
-        "-r",
-        "--radare-compat",
-        action="store_true",
-        help="Compatibility for Radare r2sarif plugin v2.0",
-    )
-    parser.add_argument("-t", "--tag", help="Filter on rule meta field values (ruleid)")
-    parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
-
-    return parser.parse_args()
-
-
-def main() -> int:
-    logging.basicConfig(level=logging.INFO)
-    logging.getLogger().setLevel(logging.INFO)
-
-    args = _parse_args()
-
-    try:
-        with Path(args.capa_output).open() as capa_output:
-            json_data = json.load(capa_output)
-    except ValueError:
-        logger.error("Input data was not valid JSON, input should be a capa json output file.")
-        return -1
-    except json.JSONDecodeError:
-        # An exception has occured
-        logger.error("Input data was not valid JSON, input should be a capa json output file.")
-        return -2
-
-    # Marshall json into Sarif
-    # Create baseline sarif structure to be populated from json data
-    sarif_structure: Optional[dict] = _sarif_boilerplate(json_data["meta"], json_data["rules"])
-    if sarif_structure is None:
-        logger.error("An Error has occured creating default sarif structure.")
-        return -3
-
-    _populate_artifact(sarif_structure, json_data["meta"])
-    _populate_invocations(sarif_structure, json_data["meta"])
-    _populate_results(sarif_structure, json_data["rules"], args.ghidra_compat)
-
-    if args.ghidra_compat:
-        # Ghidra can't handle this structure as of 11.0.x
-        if "invocations" in sarif_structure["runs"][0]:
-            del sarif_structure["runs"][0]["invocations"]
-
-        # artifacts must include a description as well with a text field.
-        if "artifacts" in sarif_structure["runs"][0]:
-            sarif_structure["runs"][0]["artifacts"][0]["description"] = {"text": "placeholder"}
-
-        # For better compliance with Ghidra table. Iteraction through properties['additionalProperties']
-        """
-        "additionalProperties": {
-            "to": "<hex addr>",
-            "offset": 0,
-            "primary": true,
-            "index": <>"",
-            "kind": "<kind column value>",
-            "opIndex": 0,
-            "sourceType": ""
-        }
-    """
-
-    if args.radare_compat:
-        # Add just enough for passing tests
-        _add_filler_optional(json_data, sarif_structure)
-
-    print(json.dumps(sarif_structure, indent=4))  # noqa: T201
-    return 0
-
-
-def _sarif_boilerplate(data_meta: dict, data_rules: dict) -> Optional[dict]:
-    # Only track rules that appear in this log, not full 1k
-    rules = []
-    # Parse rules from parsed sarif structure
-    for key in data_rules:
-        # Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
-        #   this is not good practice to use long name for ruleID
-        attack_length = len(data_rules[key]["meta"]["attack"])
-        mbc_length = len(data_rules[key]["meta"]["mbc"])
-        if attack_length or mbc_length:
-            id = (
-                data_rules[key]["meta"]["attack"][0]["id"]
-                if attack_length > 0
-                else data_rules[key]["meta"]["mbc"][0]["id"]
-            )
-        else:
-            id = data_rules[key]["meta"]["name"]
-
-        # Append current rule
-        rules.append(
-            {
-                # Default to attack identifier, fall back to MBC, mainly relevant if both are present
-                "id": id,
-                "name": data_rules[key]["meta"]["name"],
-                "shortDescription": {"text": data_rules[key]["meta"]["name"]},
-                "messageStrings": {"default": {"text": data_rules[key]["meta"]["name"]}},
-                "properties": {
-                    "namespace": data_rules[key]["meta"]["namespace"] if "namespace" in data_rules[key]["meta"] else [],
-                    "scopes": data_rules[key]["meta"]["scopes"],
-                    "references": data_rules[key]["meta"]["references"],
-                    "lib": data_rules[key]["meta"]["lib"],
-                },
-            }
-        )
-
-    tool = Tool(
-        driver=ToolComponent(
-            name="Capa",
-            version=__version__,
-            information_uri="https://github.com/mandiant/capa",
-            rules=rules,
-        )
-    )
-
-    # Create a SARIF Log object, populate with a single run
-    sarif_log = SarifLog(
-        version="2.1.0",
-        schema_uri="https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json",
-        runs=[Run(tool=tool, results=[], artifacts=[], invocations=[])],
-    )
-
-    # Convert the SARIF log to a dictionary and then to a JSON string
-    try:
-        sarif_outline = json.loads(to_json(sarif_log))
-    except json.JSONDecodeError:
-        # An exception has occured
-        return None
-
-    return sarif_outline
-
-
-def _populate_artifact(sarif_log: dict, meta_data: dict) -> None:
-    """
-    @param sarif_log: dict - sarif data structure including runs
-    @param meta_data: dict - Capa meta output
-    @returns None, updates sarif_log via side-effects
-    """
-    sample = meta_data["sample"]
-    artifact = {
-        "location": {"uri": sample["path"]},
-        "roles": ["analysisTarget"],
-        "hashes": {
-            "md5": sample["md5"],
-            "sha-1": sample["sha1"],
-            "sha-256": sample["sha256"],
-        },
-    }
-    sarif_log["runs"][0]["artifacts"].append(artifact)
-
-
-def _populate_invocations(sarif_log: dict, meta_data: dict) -> None:
-    """
-    @param sarif_log: dict - sarif data structure including runs
-    @param meta_data: dict - Capa meta output
-    @returns None, updates sarif_log via side-effects
-    """
-    analysis_time = meta_data["timestamp"]
-    argv = meta_data["argv"]
-    analysis = meta_data["analysis"]
-    invoke = {
-        "commandLine": "capa " + " ".join(argv),
-        "arguments": argv if len(argv) > 0 else [],
-        # Format in Zulu time, this may require a conversion from local timezone
-        "endTimeUtc": f"{analysis_time}Z",
-        "executionSuccessful": True,
-        "properties": {
-            "format": analysis["format"],
-            "arch": analysis["arch"],
-            "os": analysis["os"],
-            "extractor": analysis["extractor"],
-            "rule_location": analysis["rules"],
-            "base_address": analysis["base_address"],
-        },
-    }
-    sarif_log["runs"][0]["invocations"].append(invoke)
-
-
-def _enumerate_evidence(node: dict, related_count: int) -> List[dict]:
-    related_locations = []
-    if node.get("success") and node.get("node", {}).get("type") != "statement":
-        label = ""
-        if node.get("node", {}).get("type") == "feature":
-            if node.get("node", {}).get("feature", {}).get("type") == "api":
-                label = "api: " + node.get("node", {}).get("feature", {}).get("api")
-            elif node.get("node", {}).get("feature", {}).get("type") == "match":
-                label = "match: " + node.get("node", {}).get("feature", {}).get("match")
-            elif node.get("node", {}).get("feature", {}).get("type") == "number":
-                label = f"number: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('number')})"
-            elif node.get("node", {}).get("feature", {}).get("type") == "offset":
-                label = f"offset: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('offset')})"
-            elif node.get("node", {}).get("feature", {}).get("type") == "mnemonic":
-                label = f"mnemonic: {node.get('node', {}).get('feature', {}).get('mnemonic')}"
-            elif node.get("node", {}).get("feature", {}).get("type") == "characteristic":
-                label = f"characteristic: {node.get('node', {}).get('feature', {}).get('characteristic')}"
-            elif node.get("node", {}).get("feature", {}).get("type") == "os":
-                label = f"os: {node.get('node', {}).get('feature', {}).get('os')}"
-            elif node.get("node", {}).get("feature", {}).get("type") == "operand number":
-                label = f"operand: ({node.get('node', {}).get('feature', {}).get('index')} ) {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('operand_number')})"
-            else:
-                logger.error(
-                    "Not implemented %s",
-                    node.get("node", {}).get("feature", {}).get("type"),
-                )
-                return []
-        else:
-            logger.error("Not implemented %s", node.get("node", {}).get("type"))
-            return []
-
-        for loc in node.get("locations", []):
-            if loc["type"] != "absolute":
-                continue
-
-            related_locations.append(
-                {
-                    "id": related_count,
-                    "message": {"text": label},
-                    "physicalLocation": {"address": {"absoluteAddress": loc["value"]}},
-                }
-            )
-            related_count += 1
-
-    if node.get("success") and node.get("node", {}).get("type") == "statement":
-        for child in node.get("children", []):
-            related_locations += _enumerate_evidence(child, related_count)
-
-    return related_locations
-
-
-def _populate_results(sarif_log: dict, data_rules: dict, ghidra_compat: bool) -> None:
-    """
-    @param sarif_log: dict - sarif data structure including runs
-    @param meta_data: dict - Capa meta output
-    @returns None, updates sarif_log via side-effects
-    """
-    results = sarif_log["runs"][0]["results"]
-
-    # Parse rules from parsed sarif structure
-    for key in data_rules:
-        # Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
-        #   this is not good practice to use long name for ruleID.
-        attack_length = len(data_rules[key]["meta"]["attack"])
-        mbc_length = len(data_rules[key]["meta"]["mbc"])
-        if attack_length or mbc_length:
-            id = (
-                data_rules[key]["meta"]["attack"][0]["id"]
-                if attack_length > 0
-                else data_rules[key]["meta"]["mbc"][0]["id"]
-            )
-        else:
-            id = data_rules[key]["meta"]["name"]
-
-        for address, details in data_rules[key]["matches"]:
-            related_cnt = 0
-            related_locations = _enumerate_evidence(details, related_cnt)
-
-            res = {
-                "ruleId": id,
-                "level": "none" if not ghidra_compat else "NONE",
-                "message": {"text": data_rules[key]["meta"]["name"]},
-                "kind": "informational" if not ghidra_compat else "INFORMATIONAL",
-                "locations": [
-                    {
-                        "physicalLocation": {
-                            "address": {
-                                "absoluteAddress": address["value"],
-                            }
-                        },
-                    }
-                ],
-            }
-            if not ghidra_compat:
-                res["relatedLocations"] = related_locations
-
-            results.append(res)
-
-
-def _add_filler_optional(capa_result: dict, sarif_log: dict) -> None:
-    """Update sarif file with just enough fields to pass radare tests"""
-    base_address = capa_result["meta"]["analysis"]["base_address"]["value"]
-    # Assume there is only one run, and one binary artifact
-    artifact = sarif_log["runs"][0]["artifacts"][0]
-    if "properties" not in artifact:
-        artifact["properties"] = {}
-    if "additionalProperties" not in artifact["properties"]:
-        artifact["properties"]["additionalProperties"] = {}
-    if "imageBase" not in artifact["properties"]["additionalProperties"]:
-        artifact["properties"]["additionalProperties"]["imageBase"] = base_address
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/scripts/import-to-bn.py
+++ b/scripts/import-to-bn.py
@@ -69,8 +69,7 @@ def load_analysis(bv):
        return 0
    binaryninja.log_info(f"Using capa file {path}")

-    with Path(path).open("r", encoding="utf-8") as file:
-        doc = json.load(file)
+    doc = json.loads(path.read_bytes().decode("utf-8"))

    if "meta" not in doc or "rules" not in doc:
        binaryninja.log_error("doesn't appear to be a capa report")
@@ -84,35 +83,20 @@ def load_analysis(bv):
        binaryninja.log_error("sample mismatch")
        return -2

-    # Retreive base address
-    capa_base_address = 0
-    if "analysis" in doc["meta"] and "base_address" in doc["meta"]["analysis"]:
-        if doc["meta"]["analysis"]["base_address"]["type"] == "absolute":
-            capa_base_address = int(doc["meta"]["analysis"]["base_address"]["value"])
-
    rows = []
    for rule in doc["rules"].values():
        if rule["meta"].get("lib"):
            continue
        if rule["meta"].get("capa/subscope"):
            continue
-        if rule["meta"]["scopes"].get("static") != "function":
+        if rule["meta"]["scope"] != "function":
            continue

        name = rule["meta"]["name"]
        ns = rule["meta"].get("namespace", "")
-        for matches in rule["matches"]:
-            for match in matches:
-                if "type" not in match.keys():
-                    continue
-                if "value" not in match.keys():
-                    continue
-                va = match["value"]
-                # Substract va and CAPA base_address
-                va = int(va) - capa_base_address
-                # Add binja base address
-                va = va + bv.start
-                rows.append((ns, name, va))
+        for va in rule["matches"].keys():
+            va = int(va)
+            rows.append((ns, name, va))

    # order by (namespace, name) so that like things show up together
    rows = sorted(rows)
--- a/scripts/show-features.py
+++ b/scripts/show-features.py
@@ -171,8 +171,8 @@ def print_dynamic_analysis(extractor: DynamicFeatureExtractor, args):
    process_handles = tuple(extractor.get_processes())

    if args.process:
-        process_handles = tuple(filter(lambda ph: extractor.get_process_name(ph) == args.process, process_handles))
-        if args.process not in [extractor.get_process_name(ph) for ph in process_handles]:
+        process_handles = tuple(filter(lambda ph: ph.inner["name"] == args.process, process_handles))
+        if args.process not in [ph.inner["name"] for ph in args.process]:
            print(f"{args.process} not a process")
            return -1

@@ -227,13 +227,13 @@ def print_static_features(functions, extractor: StaticFeatureExtractor):

 def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
    for p in processes:
-        print(f"proc: {extractor.get_process_name(p)} (ppid={p.address.ppid}, pid={p.address.pid})")
+        print(f"proc: {p.inner.process_name} (ppid={p.address.ppid}, pid={p.address.pid})")

        for feature, addr in extractor.extract_process_features(p):
            if is_global_feature(feature):
                continue

-            print(f" proc: {extractor.get_process_name(p)}: {feature}")
+            print(f" proc: {p.inner.process_name}: {feature}")

            for t in extractor.get_threads(p):
                print(f"  thread: {t.address.tid}")
--- a/tests/data
+++ b/tests/data
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -389,8 +389,6 @@ def get_data_path_by_name(name) -> Path:
        return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
    elif name.startswith("1038a2"):
        return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
-    elif name.startswith("3da7c"):
-        return CD / "data" / "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_"
    elif name.startswith("nested_typedef"):
        return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_"
    elif name.startswith("nested_typeref"):
--- a/tests/test_elffile_features.py
+++ b/tests/test_elffile_features.py
@@ -14,11 +14,17 @@ from capa.features.extractors.elffile import extract_file_export_names, extract_

 CD = Path(__file__).resolve().parent
 SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"
-STRIPPED_SAMPLE_PATH = CD / "data" / "bb38149ff4b5c95722b83f24ca27a42b.elf_"


-def check_import_features(sample_path, expected_imports):
-    path = Path(sample_path)
+def test_elffile_import_features():
+    expected_imports = [
+        "memfrob",
+        "puts",
+        "__libc_start_main",
+        "malloc",
+        "__cxa_finalize",
+    ]
+    path = Path(SAMPLE_PATH)
    elf = ELFFile(io.BytesIO(path.read_bytes()))
    # Extract imports
    imports = list(extract_file_import_names(elf))
@@ -34,52 +40,6 @@ def check_import_features(sample_path, expected_imports):
        assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."


-def check_export_features(sample_path, expected_exports):
-    path = Path(sample_path)
-    elf = ELFFile(io.BytesIO(path.read_bytes()))
-    # Extract imports
-    exports = list(extract_file_export_names(elf))
-
-    # Verify that at least one export was found
-    assert len(exports) > 0, "No exports were found."
-
-    # Extract the symbol names from the extracted imports
-    extracted_symbol_names = [exported[0].value for exported in exports]
-
-    # Check if all expected symbol names are found
-    for symbol_name in expected_exports:
-        assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
-
-
-def test_stripped_elffile_import_features():
-    expected_imports = ["__cxa_atexit", "__cxa_finalize", "__stack_chk_fail", "fclose", "fopen", "__android_log_print"]
-    check_import_features(STRIPPED_SAMPLE_PATH, expected_imports)
-
-
-def test_stripped_elffile_export_features():
-    expected_exports = [
-        "_ZN7_JNIEnv14GetArrayLengthEP7_jarray",
-        "Java_o_ac_a",
-        "Java_o_ac_b",
-        "_Z6existsPKc",
-        "_ZN7_JNIEnv17GetStringUTFCharsEP8_jstringPh",
-        "_ZN7_JNIEnv21GetObjectArrayElementEP13_jobjectArrayi",
-        "_ZN7_JNIEnv21ReleaseStringUTFCharsEP8_jstringPKc",
-    ]
-    check_export_features(STRIPPED_SAMPLE_PATH, expected_exports)
-
-
-def test_elffile_import_features():
-    expected_imports = [
-        "memfrob",
-        "puts",
-        "__libc_start_main",
-        "malloc",
-        "__cxa_finalize",
-    ]
-    check_import_features(SAMPLE_PATH, expected_imports)
-
-
 def test_elffile_export_features():
    expected_exports = [
        "deregister_tm_clones",
@@ -95,4 +55,17 @@ def test_elffile_export_features():
        "_IO_stdin_used",
        "__libc_csu_init",
    ]
-    check_export_features(SAMPLE_PATH, expected_exports)
+    path = Path(SAMPLE_PATH)
+    elf = ELFFile(io.BytesIO(path.read_bytes()))
+    # Extract imports
+    exports = list(extract_file_export_names(elf))
+
+    # Verify that at least one export was found
+    assert len(exports) > 0, "No exports were found."
+
+    # Extract the symbol names from the extracted imports
+    extracted_symbol_names = [exported[0].value for exported in exports]
+
+    # Check if all expected symbol names are found
+    for symbol_name in expected_exports:
+        assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
--- a/tests/test_os_detection.py
+++ b/tests/test_os_detection.py
@@ -92,12 +92,6 @@ def test_elf_android_notes():
        assert capa.features.extractors.elf.detect_elf_os(f) == "android"


-def test_elf_go_buildinfo():
-    path = get_data_path_by_name("3da7c")
-    with Path(path).open("rb") as f:
-        assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
-
-
 def test_elf_parse_capa_pyinstaller_header():
    # error after misidentified large pydata section with address 0; fixed in #1454
    # compressed ELF header of capa-v5.1.0-linux
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -23,21 +23,10 @@ def get_script_path(s: str):
    return str(CD / ".." / "scripts" / s)


-def get_binary_file_path():
+def get_file_path():
    return str(CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_")


-def get_report_file_path():
-    return str(
-        CD
-        / "data"
-        / "dynamic"
-        / "cape"
-        / "v2.4"
-        / "fb7ade52dc5a1d6128b9c217114a46d0089147610f99f5122face29e429a1e74.json.gz"
-    )
-
-
 def get_rules_path():
    return str(CD / ".." / "rules")

@@ -51,21 +40,16 @@ def get_rule_path():
    [
        pytest.param("capa2yara.py", [get_rules_path()]),
        pytest.param("capafmt.py", [get_rule_path()]),
-        pytest.param(
-            "capa2sarif.py",
-            [Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"],
-        ),
        # testing some variations of linter script
        pytest.param("lint.py", ["-t", "create directory", get_rules_path()]),
        # `create directory` rule has native and .NET example PEs
        pytest.param("lint.py", ["--thorough", "-t", "create directory", get_rules_path()]),
-        pytest.param("match-function-id.py", [get_binary_file_path()]),
-        pytest.param("show-capabilities-by-function.py", [get_binary_file_path()]),
-        pytest.param("show-features.py", [get_binary_file_path()]),
-        pytest.param("show-features.py", ["-F", "0x407970", get_binary_file_path()]),
-        pytest.param("show-features.py", ["-P", "MicrosoftEdgeUpdate.exe", get_report_file_path()]),
-        pytest.param("show-unused-features.py", [get_binary_file_path()]),
-        pytest.param("capa_as_library.py", [get_binary_file_path()]),
+        pytest.param("match-function-id.py", [get_file_path()]),
+        pytest.param("show-capabilities-by-function.py", [get_file_path()]),
+        pytest.param("show-features.py", [get_file_path()]),
+        pytest.param("show-features.py", ["-F", "0x407970", get_file_path()]),
+        pytest.param("show-unused-features.py", [get_file_path()]),
+        pytest.param("capa_as_library.py", [get_file_path()]),
    ],
 )
 def test_scripts(script, args):
Author	SHA1	Message	Date
Mike Hunhoff	fb72e5e8fd	fix lints	2024-06-10 14:49:03 -06:00
Willi Ballenthin	89ebfe6b0c	features: add BinExport2 declarations	2024-06-10 14:48:36 -06:00