Compare commits

..

2 Commits

Author SHA1 Message Date
Mike Hunhoff
fb72e5e8fd fix lints 2024-06-10 14:49:03 -06:00
Willi Ballenthin
89ebfe6b0c features: add BinExport2 declarations 2024-06-10 14:48:36 -06:00
27 changed files with 168 additions and 1333 deletions

View File

@@ -32,7 +32,7 @@ jobs:
artifact_name: capa.exe
asset_name: windows
python_version: 3.8
- os: macos-12
- os: macos-11
# use older macOS for assumed better portability
artifact_name: capa
asset_name: macos
@@ -51,9 +51,7 @@ jobs:
- name: Upgrade pip, setuptools
run: python -m pip install --upgrade pip setuptools
- name: Install capa with build requirements
run: |
pip install -r requirements.txt
pip install -e .[build]
run: pip install -e .[build]
- name: Build standalone executable
run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec
- name: Does it run (PE)?

View File

@@ -25,7 +25,6 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e .[build]
- name: build package
run: |

View File

@@ -35,9 +35,7 @@ jobs:
with:
python-version: "3.11"
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
run: pip install -e .[dev]
- name: Lint with ruff
run: pre-commit run ruff
- name: Lint with isort
@@ -63,9 +61,7 @@ jobs:
with:
python-version: "3.11"
- name: Install capa
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
run: pip install -e .[dev]
- name: Run rule linter
run: python scripts/lint.py rules/
@@ -76,7 +72,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, windows-2019, macos-12]
os: [ubuntu-20.04, windows-2019, macos-11]
# across all operating systems
python-version: ["3.8", "3.11"]
include:
@@ -100,9 +96,7 @@ jobs:
if: matrix.os == 'ubuntu-20.04'
run: sudo apt-get install -y libyaml-dev
- name: Install capa
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
run: pip install -e .[dev]
- name: Run tests (fast)
# this set of tests runs about 80% of the cases in 20% of the time,
# and should catch most errors quickly.
@@ -137,9 +131,7 @@ jobs:
run: sudo apt-get install -y libyaml-dev
- name: Install capa
if: ${{ env.BN_SERIAL != 0 }}
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
run: pip install -e .[dev]
- name: install Binary Ninja
if: ${{ env.BN_SERIAL != 0 }}
run: |
@@ -196,9 +188,7 @@ jobs:
- name: Install pyyaml
run: sudo apt-get install -y libyaml-dev
- name: Install capa
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
run: pip install -e .[dev]
- name: Run tests
run: |
mkdir ./.github/ghidra/project

View File

@@ -110,16 +110,6 @@ repos:
always_run: true
pass_filenames: false
- repo: local
hooks:
- id: deptry
name: deptry
stages: [push, manual]
language: system
entry: deptry .
always_run: true
pass_filenames: false
- repo: local
hooks:
- id: pytest-fast
@@ -138,3 +128,12 @@ repos:
always_run: true
pass_filenames: false
- repo: local
hooks:
- id: deptry
name: deptry
stages: [push, manual]
language: system
entry: deptry .
always_run: true
pass_filenames: false

View File

@@ -1,58 +1,20 @@
# Change Log
## master (unreleased)
### New Features
### Breaking Changes
### New Rules (0)
-
### Bug Fixes
- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
### capa explorer IDA Pro plugin
### Development
- CI: use macos-12 since macos-11 is deprecated and will be removed on June 28th, 2024 #2173 @mr-tz
### Raw diffs
- [capa v7.1.0...master](https://github.com/mandiant/capa/compare/v7.1.0...master)
- [capa-rules v7.1.0...master](https://github.com/mandiant/capa-rules/compare/v7.1.0...master)
## v7.1.0
The v7.1.0 release brings large performance improvements to capa's rule matching engine.
Additionally, we've fixed various bugs and added new features for people using and developing capa.
Special thanks to our repeat and new contributors:
* @sjha2048 made their first contribution in https://github.com/mandiant/capa/pull/2000
* @Rohit1123 made their first contribution in https://github.com/mandiant/capa/pull/1990
* @psahithireddy made their first contribution in https://github.com/mandiant/capa/pull/2020
* @Atlas-64 made their first contribution in https://github.com/mandiant/capa/pull/2018
* @s-ff made their first contribution in https://github.com/mandiant/capa/pull/2011
* @samadpls made their first contribution in https://github.com/mandiant/capa/pull/2024
* @acelynnzhang made their first contribution in https://github.com/mandiant/capa/pull/2044
* @RainRat made their first contribution in https://github.com/mandiant/capa/pull/2058
* @ReversingWithMe made their first contribution in https://github.com/mandiant/capa/pull/2093
* @malwarefrank made their first contribution in https://github.com/mandiant/capa/pull/2037
### New Features
- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls
- ELF: detect OS from statically-linked Go binaries #1978 @williballenthin
### New Features
- add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
- document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
- Add json to sarif conversion script @reversingwithme
- render maec/* fields #843 @s-ff
- replace Halo spinner with Rich #2086 @s-ff
- optimize rule matching #2080 @williballenthin
- add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin
- relax dependency version requirements for the capa library #2053 @williballenthin
- add scripts dependency group and update documentation #2145 @mr-tz
### New Rules (25)
### Breaking Changes
### New Rules (17)
- impact/wipe-disk/delete-drive-layout-via-ioctl william.ballenthin@mandiant.com
- host-interaction/driver/interact-with-driver-via-ioctl moritz.raabe@mandiant.com
@@ -71,14 +33,7 @@ Special thanks to our repeat and new contributors:
- persistence/act-as-time-provider-dll jakub.jozwiak@mandiant.com
- host-interaction/gui/window/hide/hide-graphical-window-from-taskbar jakub.jozwiak@mandiant.com
- compiler/dart/compiled-with-dart jakub.jozwiak@mandiant.com
- nursery/bypass-hidden-api-restrictions-via-jni-on-android mehunhoff@google.com
- nursery/get-current-process-filesystem-mounts-on-linux mehunhoff@google.com
- nursery/get-current-process-memory-mapping-on-linux mehunhoff@google.com
- nursery/get-system-property-on-android mehunhoff@google.com
- nursery/hook-routines-via-lsplant mehunhoff@google.com
- nursery/load-packed-dex-via-jiagu-on-android mehunhoff@google.com
- nursery/modify-api-blacklist-or-denylist-via-jni-on-android mehunhoff@google.com
- nursery/truncate-file-on-linux mehunhoff@google.com
-
### Bug Fixes
@@ -87,6 +42,7 @@ Special thanks to our repeat and new contributors:
- binja: add support for forwarded export #1646 @xusheng6
- cape: support more report formats #2035 @mr-tz
### capa explorer IDA Pro plugin
- replace deprecated IDA API find_binary with bin_search #1606 @s-ff
@@ -101,8 +57,8 @@ Special thanks to our repeat and new contributors:
- add deptry support #1497 @s-ff
### Raw diffs
- [capa v7.0.1...v7.1.0](https://github.com/mandiant/capa/compare/v7.0.1...v7.1.0)
- [capa-rules v7.0.1...v7.1.0](https://github.com/mandiant/capa-rules/compare/v7.0.1...v7.1.0)
- [capa v7.0.1...master](https://github.com/mandiant/capa/compare/v7.0.1...master)
- [capa-rules v7.0.1...master](https://github.com/mandiant/capa-rules/compare/v7.0.1...master)
## v7.0.1

View File

@@ -412,7 +412,7 @@ ARCH_AMD64 = "amd64"
ARCH_AARCH64 = "aarch64"
# dotnet
ARCH_ANY = "any"
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_AARCH64, ARCH_ANY)
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY, ARCH_AARCH64)
class Arch(Feature):
@@ -460,6 +460,7 @@ VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
FORMAT_AUTO = "auto"
FORMAT_SC32 = "sc32"
FORMAT_SC64 = "sc64"
FORMAT_BINEXPORT2 = "binexport2"
FORMAT_CAPE = "cape"
FORMAT_FREEZE = "freeze"
FORMAT_RESULT = "result"
@@ -471,6 +472,7 @@ STATIC_FORMATS = {
FORMAT_DOTNET,
FORMAT_FREEZE,
FORMAT_RESULT,
FORMAT_BINEXPORT2,
}
DYNAMIC_FORMATS = {
FORMAT_CAPE,

View File

@@ -28,7 +28,7 @@ from capa.features.extractors.base_extractor import (
class BinjaFeatureExtractor(StaticFeatureExtractor):
def __init__(self, bv: binja.BinaryView):
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, len(bv.file.raw))))
self.bv = bv
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))

View File

@@ -48,7 +48,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
else:
logger.warning("unknown file format, file command output: %s", report.target.file.type)
raise ValueError(
f"unrecognized file format from the CAPE report; output of file command: {report.target.file.type}"
"unrecognized file format from the CAPE report; output of file command: {report.target.file.type}"
)
@@ -73,7 +73,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
else:
# if the operating system information is missing from the cape report, it's likely a bug
logger.warning("unrecognized OS: %s", file_output)
raise ValueError(f"unrecognized OS from the CAPE report; output of file command: {file_output}")
raise ValueError("unrecognized OS from the CAPE report; output of file command: {file_output}")
else:
# the sample is shellcode
logger.debug("unsupported file format, file command output: %s", file_output)

View File

@@ -83,7 +83,7 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str
return None
try:
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get(token.rid)
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
except UnicodeDecodeError as e:
logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
return None
@@ -119,14 +119,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
access: Optional[str]
# assume .NET imports starting with get_/set_ are used to access a property
member_ref_name: str = str(member_ref.Name)
if member_ref_name.startswith("get_"):
if member_ref.Name.startswith("get_"):
access = FeatureAccess.READ
elif member_ref_name.startswith("set_"):
elif member_ref.Name.startswith("set_"):
access = FeatureAccess.WRITE
else:
access = None
member_ref_name: str = member_ref.Name
if member_ref_name.startswith(("get_", "set_")):
# remove get_/set_ from MemberRef name
member_ref_name = member_ref_name[4:]
@@ -212,7 +212,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
access: Optional[str] = accessor_map.get(token)
method_name: str = str(method.row.Name)
method_name: str = method.row.Name
if method_name.startswith(("get_", "set_")):
# remove get_/set_
method_name = method_name[4:]
@@ -289,8 +289,8 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
module = ""
else:
module = str(impl_map.ImportScope.row.Name)
method: str = str(impl_map.ImportName)
module = impl_map.ImportScope.row.Name
method: str = impl_map.ImportName
member_forward_table: int
if impl_map.MemberForwarded.table is None:
@@ -320,11 +320,8 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
if row_index - 1 <= 0:
return None
table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(table_index)
if table is None:
return None
try:
table = pe.net.mdtables.tables.get(table_index, [])
return table[row_index - 1]
except IndexError:
return None
@@ -337,7 +334,7 @@ def resolve_nested_typedef_name(
if index in nested_class_table:
typedef_name = []
name = str(typedef.TypeName)
name = typedef.TypeName
# Append the current typedef name
typedef_name.append(name)
@@ -346,24 +343,24 @@ def resolve_nested_typedef_name(
# Iterate through the typedef table to resolve the nested name
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
if table_row is None:
return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
return typedef.TypeNamespace, tuple(typedef_name[::-1])
name = str(table_row.TypeName)
name = table_row.TypeName
typedef_name.append(name)
index = nested_class_table[index]
# Document the root enclosing details
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
if table_row is None:
return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
return typedef.TypeNamespace, tuple(typedef_name[::-1])
enclosing_name = str(table_row.TypeName)
enclosing_name = table_row.TypeName
typedef_name.append(enclosing_name)
return str(table_row.TypeNamespace), tuple(typedef_name[::-1])
return table_row.TypeNamespace, tuple(typedef_name[::-1])
else:
return str(typedef.TypeNamespace), (str(typedef.TypeName),)
return typedef.TypeNamespace, (typedef.TypeName,)
def resolve_nested_typeref_name(
@@ -373,29 +370,29 @@ def resolve_nested_typeref_name(
# If the ResolutionScope decodes to a typeRef type then it is nested
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
typeref_name = []
name = str(typeref.TypeName)
name = typeref.TypeName
# Not appending the current typeref name to avoid potential duplicate
# Validate index
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index)
if table_row is None:
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
return typeref.TypeNamespace, (typeref.TypeName,)
while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef):
# Iterate through the typeref table to resolve the nested name
typeref_name.append(name)
name = str(table_row.TypeName)
name = table_row.TypeName
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index)
if table_row is None:
return str(typeref.TypeNamespace), tuple(typeref_name[::-1])
return typeref.TypeNamespace, tuple(typeref_name[::-1])
# Document the root enclosing details
typeref_name.append(str(table_row.TypeName))
typeref_name.append(table_row.TypeName)
return str(table_row.TypeNamespace), tuple(typeref_name[::-1])
return table_row.TypeNamespace, tuple(typeref_name[::-1])
else:
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
return typeref.TypeNamespace, (typeref.TypeName,)
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:

View File

@@ -78,12 +78,12 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
for _, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
# emit internal .NET namespaces
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
namespaces.add(str(typedef.TypeNamespace))
namespaces.add(typedef.TypeNamespace)
for _, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
# emit external .NET namespaces
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
namespaces.add(str(typeref.TypeNamespace))
namespaces.add(typeref.TypeNamespace)
# namespaces may be empty, discard
namespaces.discard("")

View File

@@ -58,10 +58,6 @@ class OS(str, Enum):
SYLLABLE = "syllable"
NACL = "nacl"
ANDROID = "android"
DRAGONFLYBSD = "dragonfly BSD"
ILLUMOS = "illumos"
ZOS = "z/os"
UNIX = "unix"
# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
@@ -85,8 +81,6 @@ class Phdr:
paddr: int
filesz: int
buf: bytes
flags: int
memsz: int
@dataclass
@@ -321,23 +315,24 @@ class ELF:
phent_offset = i * self.e_phentsize
phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize]
(p_type,) = struct.unpack_from(self.endian + "I", phent, 0x0)
logger.debug("ph:p_type: 0x%04x", p_type)
if self.bitness == 32:
p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz, p_flags = struct.unpack_from(
self.endian + "IIIIIII", phent, 0x0
)
p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "IIII", phent, 0x4)
elif self.bitness == 64:
p_type, p_flags, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz = struct.unpack_from(
self.endian + "IIQQQQQ", phent, 0x0
)
p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "QQQQ", phent, 0x8)
else:
raise NotImplementedError()
logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
self.f.seek(p_offset)
buf = self.f.read(p_filesz)
if len(buf) != p_filesz:
raise ValueError("failed to read program header content")
return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf, p_flags, p_memsz)
return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf)
@property
def program_headers(self):
@@ -362,6 +357,8 @@ class ELF:
else:
raise NotImplementedError()
logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size)
self.f.seek(sh_offset)
buf = self.f.read(sh_size)
if len(buf) != sh_size:
@@ -870,8 +867,6 @@ def guess_os_from_ident_directive(elf: ELF) -> Optional[OS]:
return OS.LINUX
elif "Red Hat" in comment:
return OS.LINUX
elif "Alpine" in comment:
return OS.LINUX
elif "Android" in comment:
return OS.ANDROID
@@ -957,506 +952,11 @@ def guess_os_from_symtab(elf: ELF) -> Optional[OS]:
for os, hints in keywords.items():
if any(hint in sym_name for hint in hints):
logger.debug("symtab: %s looks like %s", sym_name, os)
return os
return None
def is_go_binary(elf: ELF) -> bool:
for shdr in elf.section_headers:
if shdr.get_name(elf) == ".note.go.buildid":
logger.debug("go buildinfo: found section .note.go.buildid")
return True
# The `go version` command enumerates sections for the name `.go.buildinfo`
# (in addition to looking for the BUILDINFO_MAGIC) to check if an executable is go or not.
# See references to the `errNotGoExe` error here:
# https://github.com/golang/go/blob/master/src/debug/buildinfo/buildinfo.go#L41
for shdr in elf.section_headers:
if shdr.get_name(elf) == ".go.buildinfo":
logger.debug("go buildinfo: found section .go.buildinfo")
return True
# other strategy used by FLOSS: search for known runtime strings.
# https://github.com/mandiant/flare-floss/blob/b2ca8adfc5edf278861dd6bff67d73da39683b46/floss/language/identify.py#L88
return False
def get_go_buildinfo_data(elf: ELF) -> Optional[bytes]:
for shdr in elf.section_headers:
if shdr.get_name(elf) == ".go.buildinfo":
logger.debug("go buildinfo: found section .go.buildinfo")
return shdr.buf
PT_LOAD = 0x1
PF_X = 1
PF_W = 2
for phdr in elf.program_headers:
if phdr.type != PT_LOAD:
continue
if (phdr.flags & (PF_X | PF_W)) == PF_W:
logger.debug("go buildinfo: found data segment")
return phdr.buf
return None
def read_data(elf: ELF, rva: int, size: int) -> Optional[bytes]:
# ELF segments are for runtime data,
# ELF sections are for link-time data.
# So we want to read Program Headers/Segments.
for phdr in elf.program_headers:
if phdr.vaddr <= rva < phdr.vaddr + phdr.memsz:
segment_data = phdr.buf
# pad the section with NULLs
# assume page alignment is already handled.
# might need more hardening here.
if len(segment_data) < phdr.memsz:
segment_data += b"\x00" * (phdr.memsz - len(segment_data))
segment_offset = rva - phdr.vaddr
return segment_data[segment_offset : segment_offset + size]
return None
def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
if elf.bitness == 32:
struct_size = 8
struct_format = elf.endian + "II"
elif elf.bitness == 64:
struct_size = 16
struct_format = elf.endian + "QQ"
else:
raise ValueError("invalid psize")
struct_buf = read_data(elf, rva, struct_size)
if not struct_buf:
return None
addr, length = struct.unpack_from(struct_format, struct_buf, 0)
return read_data(elf, addr, length)
def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
"""
In a binary compiled by Go, the buildinfo structure may contain
metadata about the build environment, including the configured
GOOS, which specifies the target operating system.
Search for and parse the buildinfo structure,
which may be found in the .go.buildinfo section,
and often contains this metadata inline. Otherwise,
follow a few byte slices to the relevant information.
This strategy is derived from GoReSym.
"""
buf = get_go_buildinfo_data(elf)
if not buf:
logger.debug("go buildinfo: no buildinfo section")
return None
assert isinstance(buf, bytes)
# The build info blob left by the linker is identified by
# a 16-byte header, consisting of:
# - buildInfoMagic (14 bytes),
# - the binary's pointer size (1 byte), and
# - whether the binary is big endian (1 byte).
#
# Then:
# - virtual address to Go string: runtime.buildVersion
# - virtual address to Go string: runtime.modinfo
#
# On 32-bit platforms, the last 8 bytes are unused.
#
# If the endianness has the 2 bit set, then the pointers are zero,
# and the 32-byte header is followed by varint-prefixed string data
# for the two string values we care about.
# https://github.com/mandiant/GoReSym/blob/0860a1b1b4f3495e9fb7e71eb4386bf3e0a7c500/buildinfo/buildinfo.go#L185-L193
BUILDINFO_MAGIC = b"\xFF Go buildinf:"
try:
index = buf.index(BUILDINFO_MAGIC)
except ValueError:
logger.debug("go buildinfo: no buildinfo magic")
return None
psize, flags = struct.unpack_from("<bb", buf, index + len(BUILDINFO_MAGIC))
assert psize in (4, 8)
is_big_endian = flags & 0b01
has_inline_strings = flags & 0b10
logger.debug("go buildinfo: psize: %d big endian: %s inline: %s", psize, is_big_endian, has_inline_strings)
GOOS_TO_OS = {
b"aix": OS.AIX,
b"android": OS.ANDROID,
b"dragonfly": OS.DRAGONFLYBSD,
b"freebsd": OS.FREEBSD,
b"hurd": OS.HURD,
b"illumos": OS.ILLUMOS,
b"linux": OS.LINUX,
b"netbsd": OS.NETBSD,
b"openbsd": OS.OPENBSD,
b"solaris": OS.SOLARIS,
b"zos": OS.ZOS,
b"windows": None, # PE format
b"plan9": None, # a.out format
b"ios": None, # Mach-O format
b"darwin": None, # Mach-O format
b"nacl": None, # dropped in GO 1.14
b"js": None,
}
if has_inline_strings:
# This is the common case/path. Most samples will have an inline GOOS string.
#
# To find samples on VT, use these VTGrep searches:
#
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 02}
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 02}
# If present, the GOOS key will be found within
# the current buildinfo data region.
#
# Brute force the k-v pair, like `GOOS=linux`,
# rather than try to parse the data, which would be fragile.
for key, os in GOOS_TO_OS.items():
if (b"GOOS=" + key) in buf:
logger.debug("go buildinfo: found os: %s", os)
return os
else:
# This is the uncommon path. Most samples will have an inline GOOS string.
#
# To find samples on VT, use the referenced VTGrep content searches.
info_format = {
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
# like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
# in which the modinfo doesn't have GOOS.
(4, False): "<II",
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
# like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
# in which the modinfo doesn't have GOOS.
(8, False): "<QQ",
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
# (no matches on VT today)
(4, True): ">II",
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
# like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
# in which the modinfo doesn't have GOOS.
(8, True): ">QQ",
}
build_version_address, modinfo_address = struct.unpack_from(
info_format[(psize, is_big_endian)], buf, index + 0x10
)
logger.debug("go buildinfo: build version address: 0x%x", build_version_address)
logger.debug("go buildinfo: modinfo address: 0x%x", modinfo_address)
build_version = read_go_slice(elf, build_version_address)
if build_version:
logger.debug("go buildinfo: build version: %s", build_version.decode("utf-8"))
modinfo = read_go_slice(elf, modinfo_address)
if modinfo:
if modinfo[-0x11] == ord("\n"):
# Strip module framing: sentinel strings delimiting the module info.
# These are cmd/go/internal/modload/build.infoStart and infoEnd.
# Which should probably be:
# infoStart, _ = hex.DecodeString("3077af0c9274080241e1c107e6d618e6")
# infoEnd, _ = hex.DecodeString("f932433186182072008242104116d8f2")
modinfo = modinfo[0x10:-0x10]
logger.debug("go buildinfo: modinfo: %s", modinfo.decode("utf-8"))
if not modinfo:
return None
for key, os in GOOS_TO_OS.items():
# Brute force the k-v pair, like `GOOS=linux`,
# rather than try to parse the data, which would be fragile.
if (b"GOOS=" + key) in modinfo:
logger.debug("go buildinfo: found os: %s", os)
return os
return None
def guess_os_from_go_source(elf: ELF) -> Optional[OS]:
"""
In a binary compiled by Go, runtime metadata may contain
references to the source filenames, including the
src/runtime/os_* files, whose name indicates the
target operating system.
Confirm the given ELF seems to be built by Go,
and then look for strings that look like
Go source filenames.
This strategy is derived from GoReSym.
"""
if not is_go_binary(elf):
return None
for phdr in elf.program_headers:
buf = phdr.buf
NEEDLE_OS = b"/src/runtime/os_"
try:
index = buf.index(NEEDLE_OS)
except ValueError:
continue
rest = buf[index + len(NEEDLE_OS) : index + len(NEEDLE_OS) + 32]
filename = rest.partition(b".go")[0].decode("utf-8")
logger.debug("go source: filename: /src/runtime/os_%s.go", filename)
# via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
# candidates today:
# - aix
# - android
# - darwin
# - darwin_arm64
# - dragonfly
# - freebsd
# - freebsd2
# - freebsd_amd64
# - freebsd_arm
# - freebsd_arm64
# - freebsd_noauxv
# - freebsd_riscv64
# - illumos
# - js
# - linux
# - linux_arm
# - linux_arm64
# - linux_be64
# - linux_generic
# - linux_loong64
# - linux_mips64x
# - linux_mipsx
# - linux_noauxv
# - linux_novdso
# - linux_ppc64x
# - linux_riscv64
# - linux_s390x
# - linux_x86
# - netbsd
# - netbsd_386
# - netbsd_amd64
# - netbsd_arm
# - netbsd_arm64
# - nonopenbsd
# - only_solaris
# - openbsd
# - openbsd_arm
# - openbsd_arm64
# - openbsd_libc
# - openbsd_mips64
# - openbsd_syscall
# - openbsd_syscall1
# - openbsd_syscall2
# - plan9
# - plan9_arm
# - solaris
# - unix
# - unix_nonlinux
# - wasip1
# - wasm
# - windows
# - windows_arm
# - windows_arm64
OS_FILENAME_TO_OS = {
"aix": OS.AIX,
"android": OS.ANDROID,
"dragonfly": OS.DRAGONFLYBSD,
"freebsd": OS.FREEBSD,
"freebsd2": OS.FREEBSD,
"freebsd_": OS.FREEBSD,
"illumos": OS.ILLUMOS,
"linux": OS.LINUX,
"netbsd": OS.NETBSD,
"only_solaris": OS.SOLARIS,
"openbsd": OS.OPENBSD,
"solaris": OS.SOLARIS,
"unix_nonlinux": OS.UNIX,
}
for prefix, os in OS_FILENAME_TO_OS.items():
if filename.startswith(prefix):
return os
for phdr in elf.program_headers:
buf = phdr.buf
NEEDLE_RT0 = b"/src/runtime/rt0_"
try:
index = buf.index(NEEDLE_RT0)
except ValueError:
continue
rest = buf[index + len(NEEDLE_RT0) : index + len(NEEDLE_RT0) + 32]
filename = rest.partition(b".s")[0].decode("utf-8")
logger.debug("go source: filename: /src/runtime/rt0_%s.s", filename)
# via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
# candidates today:
# - aix_ppc64
# - android_386
# - android_amd64
# - android_arm
# - android_arm64
# - darwin_amd64
# - darwin_arm64
# - dragonfly_amd64
# - freebsd_386
# - freebsd_amd64
# - freebsd_arm
# - freebsd_arm64
# - freebsd_riscv64
# - illumos_amd64
# - ios_amd64
# - ios_arm64
# - js_wasm
# - linux_386
# - linux_amd64
# - linux_arm
# - linux_arm64
# - linux_loong64
# - linux_mips64x
# - linux_mipsx
# - linux_ppc64
# - linux_ppc64le
# - linux_riscv64
# - linux_s390x
# - netbsd_386
# - netbsd_amd64
# - netbsd_arm
# - netbsd_arm64
# - openbsd_386
# - openbsd_amd64
# - openbsd_arm
# - openbsd_arm64
# - openbsd_mips64
# - openbsd_ppc64
# - openbsd_riscv64
# - plan9_386
# - plan9_amd64
# - plan9_arm
# - solaris_amd64
# - wasip1_wasm
# - windows_386
# - windows_amd64
# - windows_arm
# - windows_arm64
RT0_FILENAME_TO_OS = {
"aix": OS.AIX,
"android": OS.ANDROID,
"dragonfly": OS.DRAGONFLYBSD,
"freebsd": OS.FREEBSD,
"illumos": OS.ILLUMOS,
"linux": OS.LINUX,
"netbsd": OS.NETBSD,
"openbsd": OS.OPENBSD,
"solaris": OS.SOLARIS,
}
for prefix, os in RT0_FILENAME_TO_OS.items():
if filename.startswith(prefix):
return os
return None
def guess_os_from_vdso_strings(elf: ELF) -> Optional[OS]:
"""
The "vDSO" (virtual dynamic shared object) is a small shared
library that the kernel automatically maps into the address space
of all user-space applications.
Some statically linked executables include small dynamic linker
routines that finds these vDSO symbols, using the ASCII
symbol name and version. We can therefore recognize the pairs
(symbol, version) to guess the binary targets Linux.
"""
for phdr in elf.program_headers:
buf = phdr.buf
# We don't really use the arch, but its interesting for documentation
# I suppose we could restrict the arch here to what's in the ELF header,
# but that's even more work. Let's see if this is sufficient.
for arch, symbol, version in (
# via: https://man7.org/linux/man-pages/man7/vdso.7.html
("arm", b"__vdso_gettimeofday", b"LINUX_2.6"),
("arm", b"__vdso_clock_gettime", b"LINUX_2.6"),
("aarch64", b"__kernel_rt_sigreturn", b"LINUX_2.6.39"),
("aarch64", b"__kernel_gettimeofday", b"LINUX_2.6.39"),
("aarch64", b"__kernel_clock_gettime", b"LINUX_2.6.39"),
("aarch64", b"__kernel_clock_getres", b"LINUX_2.6.39"),
("mips", b"__kernel_gettimeofday", b"LINUX_2.6"),
("mips", b"__kernel_clock_gettime", b"LINUX_2.6"),
("ia64", b"__kernel_sigtramp", b"LINUX_2.5"),
("ia64", b"__kernel_syscall_via_break", b"LINUX_2.5"),
("ia64", b"__kernel_syscall_via_epc", b"LINUX_2.5"),
("ppc/32", b"__kernel_clock_getres", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_clock_gettime64", b"LINUX_5.11"),
("ppc/32", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_getcpu", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_sigtramp_rt32", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_sigtramp32", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_clock_getres", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_getcpu", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_sigtramp_rt64", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
("riscv", b"__vdso_rt_sigreturn", b"LINUX_4.15"),
("riscv", b"__vdso_gettimeofday", b"LINUX_4.15"),
("riscv", b"__vdso_clock_gettime", b"LINUX_4.15"),
("riscv", b"__vdso_clock_getres", b"LINUX_4.15"),
("riscv", b"__vdso_getcpu", b"LINUX_4.15"),
("riscv", b"__vdso_flush_icache", b"LINUX_4.15"),
("s390", b"__kernel_clock_getres", b"LINUX_2.6.29"),
("s390", b"__kernel_clock_gettime", b"LINUX_2.6.29"),
("s390", b"__kernel_gettimeofday", b"LINUX_2.6.29"),
("superh", b"__kernel_rt_sigreturn", b"LINUX_2.6"),
("superh", b"__kernel_sigreturn", b"LINUX_2.6"),
("superh", b"__kernel_vsyscall", b"LINUX_2.6"),
("i386", b"__kernel_sigreturn", b"LINUX_2.5"),
("i386", b"__kernel_rt_sigreturn", b"LINUX_2.5"),
("i386", b"__kernel_vsyscall", b"LINUX_2.5"),
("i386", b"__vdso_clock_gettime", b"LINUX_2.6"),
("i386", b"__vdso_gettimeofday", b"LINUX_2.6"),
("i386", b"__vdso_time", b"LINUX_2.6"),
("x86-64", b"__vdso_clock_gettime", b"LINUX_2.6"),
("x86-64", b"__vdso_getcpu", b"LINUX_2.6"),
("x86-64", b"__vdso_gettimeofday", b"LINUX_2.6"),
("x86-64", b"__vdso_time", b"LINUX_2.6"),
("x86/32", b"__vdso_clock_gettime", b"LINUX_2.6"),
("x86/32", b"__vdso_getcpu", b"LINUX_2.6"),
("x86/32", b"__vdso_gettimeofday", b"LINUX_2.6"),
("x86/32", b"__vdso_time", b"LINUX_2.6"),
):
if symbol in buf and version in buf:
logger.debug("vdso string: %s %s %s", arch, symbol.decode("ascii"), version.decode("ascii"))
return OS.LINUX
return None
def detect_elf_os(f) -> str:
"""
f: type Union[BinaryIO, IDAIO, GHIDRAIO]
@@ -1523,27 +1023,6 @@ def detect_elf_os(f) -> str:
logger.warning("Error guessing OS from symbol table: %s", e)
symtab_guess = None
try:
goos_guess = guess_os_from_go_buildinfo(elf)
logger.debug("guess: Go buildinfo: %s", goos_guess)
except Exception as e:
logger.warning("Error guessing OS from Go buildinfo: %s", e)
goos_guess = None
try:
gosrc_guess = guess_os_from_go_source(elf)
logger.debug("guess: Go source: %s", gosrc_guess)
except Exception as e:
logger.warning("Error guessing OS from Go source path: %s", e)
gosrc_guess = None
try:
vdso_guess = guess_os_from_vdso_strings(elf)
logger.debug("guess: vdso strings: %s", vdso_guess)
except Exception as e:
logger.warning("Error guessing OS from vdso strings: %s", e)
symtab_guess = None
ret = None
if osabi_guess:
@@ -1567,24 +1046,11 @@ def detect_elf_os(f) -> str:
elif symtab_guess:
ret = symtab_guess
elif goos_guess:
ret = goos_guess
elif gosrc_guess:
# prefer goos_guess to this method,
# which is just string interpretation.
ret = gosrc_guess
elif ident_guess:
# at the bottom because we don't trust this too much
# due to potential for bugs with cross-compilation.
ret = ident_guess
elif vdso_guess:
# at the bottom because this is just scanning strings,
# which isn't very authoritative.
ret = vdso_guess
return ret.value if ret is not None else "unknown"

View File

@@ -10,7 +10,8 @@ import logging
from typing import Tuple, Iterator
from pathlib import Path
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
from elftools.elf.elffile import ELFFile, SymbolTableSection
from elftools.elf.relocation import RelocationSection
import capa.features.extractors.common
from capa.features.file import Export, Import, Section
@@ -46,37 +47,17 @@ def extract_file_export_names(elf: ELFFile, **kwargs):
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue
logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
for symbol in segment.iter_symbols():
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
continue
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
continue
if symbol.entry.st_value == 0:
continue
if symbol.entry.st_shndx == "SHN_UNDEF":
continue
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
def extract_file_import_names(elf: ELFFile, **kwargs):
# Create a dictionary to store symbol names by their index
symbol_names = {}
# Extract symbol names and store them in the dictionary
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
continue
for _, symbol in enumerate(segment.iter_symbols()):
for _, symbol in enumerate(section.iter_symbols()):
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
@@ -92,19 +73,21 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
symbol_names[_] = symbol.name
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
for section in elf.iter_sections():
if not isinstance(section, RelocationSection):
continue
relocation_tables = segment.get_relocation_tables()
logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))
if section["sh_entsize"] == 0:
logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
continue
for relocation_table in relocation_tables.values():
for relocation in relocation_table.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())
for relocation in section.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
def extract_file_section_names(elf: ELFFile, **kwargs):

View File

@@ -26,11 +26,13 @@ from capa.features.common import (
FORMAT_DOTNET,
FORMAT_FREEZE,
FORMAT_UNKNOWN,
FORMAT_BINEXPORT2,
Format,
)
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
EXTENSIONS_BINEXPORT2 = ("BinExport", "BinExport2")
EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz")
EXTENSIONS_ELF = "elf_"
EXTENSIONS_FREEZE = "frz"
@@ -105,15 +107,8 @@ def get_format_from_extension(sample: Path) -> str:
format_ = get_format_from_report(sample)
elif sample.name.endswith(EXTENSIONS_FREEZE):
format_ = FORMAT_FREEZE
return format_
def get_auto_format(path: Path) -> str:
format_ = get_format(path)
if format_ == FORMAT_UNKNOWN:
format_ = get_format_from_extension(path)
if format_ == FORMAT_UNKNOWN:
raise UnsupportedFormatError()
elif sample.name.endswith(EXTENSIONS_BINEXPORT2):
format_ = FORMAT_BINEXPORT2
return format_
@@ -136,6 +131,15 @@ def get_format(sample: Path) -> str:
return FORMAT_UNKNOWN
def get_auto_format(path: Path) -> str:
format_ = get_format(path)
if format_ == FORMAT_UNKNOWN:
format_ = get_format_from_extension(path)
if format_ == FORMAT_UNKNOWN:
raise UnsupportedFormatError()
return format_
@contextlib.contextmanager
def redirecting_print_to_tqdm(disable_progress):
"""

View File

@@ -8,7 +8,6 @@
import sys
import logging
import datetime
import contextlib
from typing import Set, Dict, List, Optional
from pathlib import Path
@@ -155,18 +154,6 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):
viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
with contextlib.suppress(Exception):
# unfortuately viv raises a raw Exception (not any subclass).
# This happens when the module isn't found, such as with a viv upgrade.
#
# Remove the symbolic switch case solver.
# This is only enabled for ELF files, not PE files.
# During the following performance investigation, this analysis module
# had some terrible worst-case behavior.
# We can put up with slightly worse CFG reconstruction in order to avoid this.
# https://github.com/mandiant/capa/issues/1989#issuecomment-1948022767
vw.delFuncAnalysisModule("vivisect.analysis.generic.symswitchcase")
vw.analyze()
logger.debug("%s", get_meta_str(vw))

View File

@@ -5,7 +5,7 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
__version__ = "7.1.0"
__version__ = "7.0.1"
def get_major_version():

View File

@@ -91,12 +91,6 @@ For more details about creating and using virtual environments, check out the [v
##### Install development dependencies
When developing capa, please use the pinned dependencies found in `requirements.txt`.
This ensures that everyone has the exact same, reproducible environment.
Please install these dependencies before install capa (from source or from PyPI):
`$ pip install -r requirements.txt`
We use the following tools to ensure consistent code style and formatting:
- [black](https://github.com/psf/black) code formatter
- [isort](https://pypi.org/project/isort/) code formatter
@@ -107,8 +101,7 @@ We use the following tools to ensure consistent code style and formatting:
To install these development dependencies, run:
- `$ pip install -e /local/path/to/src[dev]` or
- `$ pip install -e /local/path/to/src[dev,scripts]` to also install all script dependencies
`$ pip install -e /local/path/to/src[dev]`
We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same linters & configuration locally as in CI.

View File

@@ -32,76 +32,25 @@ classifiers = [
"Topic :: Security",
]
dependencies = [
# ---------------------------------------
# As a library, capa uses lower version bounds
# when specifying its dependencies. This lets
# other programs that use capa (and other libraries)
# to find a compatible set of dependency versions.
#
# We can optionally pin to specific versions or
# limit the upper bound when there's a good reason;
# but the default is to assume all greater versions
# probably work with capa until proven otherwise.
#
# The following link provides good background:
# https://iscinumpy.dev/post/bound-version-constraints/
#
# When we develop capa, and when we distribute it as
# a standalone binary, we'll use specific versions
# that are pinned in requirements.txt.
# But the requirements for a library are specified here
# and are looser.
#
# Related discussions:
#
# - https://github.com/mandiant/capa/issues/2053
# - https://github.com/mandiant/capa/pull/2059
# - https://github.com/mandiant/capa/pull/2079
#
# ---------------------------------------
# The following dependency versions were imported
# during June 2024 by truncating specific versions to
# their major-most version (major version when possible,
# or minor otherwise).
# As specific constraints are identified, please provide
# comments and context.
"tqdm>=4",
"pyyaml>=6",
"tabulate>=0.9",
"colorama>=0.4",
"termcolor>=2",
"wcwidth>=0.2",
"ida-settings>=2",
"ruamel.yaml>=0.18",
"pefile>=2023.2.7",
"pyelftools>=0.31",
"pydantic>=2",
"rich>=13",
"humanize>=4",
"protobuf>=5",
# ---------------------------------------
# Dependencies that we develop
#
# These dependencies are often actively influenced by capa,
# so we provide a minimum patch version that includes the
# latest bug fixes we need here.
"viv-utils[flirt]>=0.7.9",
"vivisect>=1.1.1",
"dncil>=1.0.2",
# ---------------------------------------
# Dependencies with version caps
#
# These dependencies must not exceed the version cap,
# typically due to dropping support for python releases
# we still support.
# TODO(williballenthin): networkx 3.2 doesn't support python 3.8 while capa does.
# https://github.com/mandiant/capa/issues/1966
"networkx>=3,<3.2",
"dnfile>=0.15.0",
"tqdm==4.66.4",
"pyyaml==6.0.1",
"tabulate==0.9.0",
"colorama==0.4.6",
"termcolor==2.4.0",
"wcwidth==0.2.13",
"ida-settings==2.1.0",
"viv-utils[flirt]==0.7.9",
"networkx==3.1",
"ruamel.yaml==0.18.6",
"vivisect==1.1.1",
"pefile==2023.2.7",
"pyelftools==0.31",
"dnfile==0.14.1",
"dncil==1.0.2",
"pydantic==2.7.1",
"rich==13.7.1",
"humanize==4.9.0",
"protobuf==5.27.0",
]
dynamic = ["version"]
@@ -114,19 +63,15 @@ namespaces = false
[project.optional-dependencies]
dev = [
# Dev and build dependencies are not relaxed because
# we want all developer environments to be consistent.
# These dependencies are not used in production environments
# and should not conflict with other libraries/tooling.
"pre-commit==3.5.0",
"pytest==8.0.0",
"pytest-sugar==1.0.0",
"pytest-instafail==0.5.0",
"pytest-cov==5.0.0",
"flake8==7.1.0",
"flake8==7.0.0",
"flake8-bugbear==24.4.26",
"flake8-encodings==0.5.1",
"flake8-comprehensions==3.15.0",
"flake8-comprehensions==3.14.0",
"flake8-logging-format==0.9.0",
"flake8-no-implicit-concat==0.3.5",
"flake8-print==5.0.0",
@@ -134,12 +79,14 @@ dev = [
"flake8-simplify==0.21.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.5.0",
"ruff==0.4.7",
"black==24.4.2",
"isort==5.13.2",
"mypy==1.10.0",
"psutil==5.9.2",
"stix2==3.0.1",
"requests==2.31.0",
"mypy-protobuf==3.6.0",
"PyGithub==2.3.0",
# type stubs for mypy
"types-backports==0.1.3",
"types-colorama==0.4.15.11",
@@ -152,21 +99,10 @@ dev = [
"deptry==0.16.1"
]
build = [
# Dev and build dependencies are not relaxed because
# we want all developer environments to be consistent.
# These dependencies are not used in production environments
# and should not conflict with other libraries/tooling.
"pyinstaller==6.8.0",
"setuptools==70.0.0",
"pyinstaller==6.7.0",
"setuptools==69.5.1",
"build==1.2.1"
]
scripts = [
"jschema_to_python==1.2.3",
"psutil==6.0.0",
"stix2==3.0.1",
"sarif_om==1.0.4",
"requests==2.32.3",
]
[tool.deptry]
extend_exclude = [
@@ -216,7 +152,6 @@ DEP002 = [
"mypy",
"mypy-protobuf",
"pre-commit",
"PyGithub",
"pyinstaller",
"pytest",
"pytest-cov",
@@ -240,9 +175,6 @@ DEP003 = [
"typing_extensions" # TODO(s-ff): remove when Python 3.9 is deprecated, see #1699
]
[tool.deptry.package_module_name_map]
PyGithub = "github"
[project.urls]
Homepage = "https://github.com/mandiant/capa"
Repository = "https://github.com/mandiant/capa.git"

View File

@@ -1,46 +0,0 @@
# Dependencies with specific version constraints
# used during development and building the standalone executables.
# For these environments, use `pip install -r requirements.txt`
# before installing capa from source/pypi. This will ensure
# the following specific versions are used.
#
# Initially generated via: pip freeze | grep -v -- "-e"
# Kept up to date by dependabot.
annotated-types==0.7.0
colorama==0.4.6
cxxfilt==0.2.2
dncil==1.0.2
dnfile==0.15.0
funcy==2.0
humanize==4.9.0
ida-netnode==3.0
ida-settings==2.1.0
intervaltree==3.1.0
markdown-it-py==3.0.0
mdurl==0.1.2
msgpack==1.0.8
networkx==3.1
pefile==2023.2.7
pip==24.0
protobuf==5.27.1
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.22
pydantic==2.7.3
pydantic-core==2.18.4
pyelftools==0.31
pygments==2.18.0
python-flirt==0.8.10
pyyaml==6.0.1
rich==13.7.1
ruamel-yaml==0.18.6
ruamel-yaml-clib==0.2.8
setuptools==70.0.0
six==1.16.0
sortedcontainers==2.4.0
tabulate==0.9.0
termcolor==2.4.0
tqdm==4.66.4
viv-utils==0.7.11
vivisect==1.1.1
wcwidth==0.2.13

2
rules

Submodule rules updated: e63c454fbb...9e0ffdf7c5

View File

@@ -1,358 +0,0 @@
# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
"""
Convert capa json output to sarif schema
usage: capa2sarif.py [-h] [-g] [-r] [-t TAG] [--version] capa_output
Capa to SARIF analysis file
positional arguments:
capa_output Path to capa JSON output file
optional arguments:
-h, --help show this help message and exit
--version show program's version number and exit
-t TAG, --tag TAG filter on rule meta field values (ruleid)
Requires:
- sarif_om 1.0.4
- jschema_to_python 1.2.3
"""
import sys
import json
import logging
import argparse
from typing import List, Optional
from pathlib import Path
from capa.version import __version__
logger = logging.getLogger("capa2sarif")
# Dependencies
try:
from sarif_om import Run, Tool, SarifLog, ToolComponent
except ImportError as e:
logger.error(
"Required import `sarif_om` is not installed. This is solved by installing `python3 -m pip install sarif_om>=1.0.4`. %s",
e,
)
exit(-4)
try:
from jschema_to_python.to_json import to_json
except ImportError as e:
logger.error(
"Required import `jschema_to_python` is not installed. This is solved by installing `python3 -m pip install jschema_to_python>=1.2.3`, %s",
e,
)
exit(-4)
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Capa to SARIF analysis file")
# Positional argument
parser.add_argument("capa_output", help="Path to capa JSON output file")
# Optional arguments
parser.add_argument(
"-g",
"--ghidra-compat",
action="store_true",
help="Compatibility for Ghidra 11.0.X",
)
parser.add_argument(
"-r",
"--radare-compat",
action="store_true",
help="Compatibility for Radare r2sarif plugin v2.0",
)
parser.add_argument("-t", "--tag", help="Filter on rule meta field values (ruleid)")
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
return parser.parse_args()
def main() -> int:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
args = _parse_args()
try:
with Path(args.capa_output).open() as capa_output:
json_data = json.load(capa_output)
except ValueError:
logger.error("Input data was not valid JSON, input should be a capa json output file.")
return -1
except json.JSONDecodeError:
# An exception has occured
logger.error("Input data was not valid JSON, input should be a capa json output file.")
return -2
# Marshall json into Sarif
# Create baseline sarif structure to be populated from json data
sarif_structure: Optional[dict] = _sarif_boilerplate(json_data["meta"], json_data["rules"])
if sarif_structure is None:
logger.error("An Error has occured creating default sarif structure.")
return -3
_populate_artifact(sarif_structure, json_data["meta"])
_populate_invocations(sarif_structure, json_data["meta"])
_populate_results(sarif_structure, json_data["rules"], args.ghidra_compat)
if args.ghidra_compat:
# Ghidra can't handle this structure as of 11.0.x
if "invocations" in sarif_structure["runs"][0]:
del sarif_structure["runs"][0]["invocations"]
# artifacts must include a description as well with a text field.
if "artifacts" in sarif_structure["runs"][0]:
sarif_structure["runs"][0]["artifacts"][0]["description"] = {"text": "placeholder"}
# For better compliance with Ghidra table. Iteraction through properties['additionalProperties']
"""
"additionalProperties": {
"to": "<hex addr>",
"offset": 0,
"primary": true,
"index": <>"",
"kind": "<kind column value>",
"opIndex": 0,
"sourceType": ""
}
"""
if args.radare_compat:
# Add just enough for passing tests
_add_filler_optional(json_data, sarif_structure)
print(json.dumps(sarif_structure, indent=4)) # noqa: T201
return 0
def _sarif_boilerplate(data_meta: dict, data_rules: dict) -> Optional[dict]:
# Only track rules that appear in this log, not full 1k
rules = []
# Parse rules from parsed sarif structure
for key in data_rules:
# Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
# this is not good practice to use long name for ruleID
attack_length = len(data_rules[key]["meta"]["attack"])
mbc_length = len(data_rules[key]["meta"]["mbc"])
if attack_length or mbc_length:
id = (
data_rules[key]["meta"]["attack"][0]["id"]
if attack_length > 0
else data_rules[key]["meta"]["mbc"][0]["id"]
)
else:
id = data_rules[key]["meta"]["name"]
# Append current rule
rules.append(
{
# Default to attack identifier, fall back to MBC, mainly relevant if both are present
"id": id,
"name": data_rules[key]["meta"]["name"],
"shortDescription": {"text": data_rules[key]["meta"]["name"]},
"messageStrings": {"default": {"text": data_rules[key]["meta"]["name"]}},
"properties": {
"namespace": data_rules[key]["meta"]["namespace"] if "namespace" in data_rules[key]["meta"] else [],
"scopes": data_rules[key]["meta"]["scopes"],
"references": data_rules[key]["meta"]["references"],
"lib": data_rules[key]["meta"]["lib"],
},
}
)
tool = Tool(
driver=ToolComponent(
name="Capa",
version=__version__,
information_uri="https://github.com/mandiant/capa",
rules=rules,
)
)
# Create a SARIF Log object, populate with a single run
sarif_log = SarifLog(
version="2.1.0",
schema_uri="https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json",
runs=[Run(tool=tool, results=[], artifacts=[], invocations=[])],
)
# Convert the SARIF log to a dictionary and then to a JSON string
try:
sarif_outline = json.loads(to_json(sarif_log))
except json.JSONDecodeError:
# An exception has occured
return None
return sarif_outline
def _populate_artifact(sarif_log: dict, meta_data: dict) -> None:
"""
@param sarif_log: dict - sarif data structure including runs
@param meta_data: dict - Capa meta output
@returns None, updates sarif_log via side-effects
"""
sample = meta_data["sample"]
artifact = {
"location": {"uri": sample["path"]},
"roles": ["analysisTarget"],
"hashes": {
"md5": sample["md5"],
"sha-1": sample["sha1"],
"sha-256": sample["sha256"],
},
}
sarif_log["runs"][0]["artifacts"].append(artifact)
def _populate_invocations(sarif_log: dict, meta_data: dict) -> None:
"""
@param sarif_log: dict - sarif data structure including runs
@param meta_data: dict - Capa meta output
@returns None, updates sarif_log via side-effects
"""
analysis_time = meta_data["timestamp"]
argv = meta_data["argv"]
analysis = meta_data["analysis"]
invoke = {
"commandLine": "capa " + " ".join(argv),
"arguments": argv if len(argv) > 0 else [],
# Format in Zulu time, this may require a conversion from local timezone
"endTimeUtc": f"{analysis_time}Z",
"executionSuccessful": True,
"properties": {
"format": analysis["format"],
"arch": analysis["arch"],
"os": analysis["os"],
"extractor": analysis["extractor"],
"rule_location": analysis["rules"],
"base_address": analysis["base_address"],
},
}
sarif_log["runs"][0]["invocations"].append(invoke)
def _enumerate_evidence(node: dict, related_count: int) -> List[dict]:
related_locations = []
if node.get("success") and node.get("node", {}).get("type") != "statement":
label = ""
if node.get("node", {}).get("type") == "feature":
if node.get("node", {}).get("feature", {}).get("type") == "api":
label = "api: " + node.get("node", {}).get("feature", {}).get("api")
elif node.get("node", {}).get("feature", {}).get("type") == "match":
label = "match: " + node.get("node", {}).get("feature", {}).get("match")
elif node.get("node", {}).get("feature", {}).get("type") == "number":
label = f"number: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('number')})"
elif node.get("node", {}).get("feature", {}).get("type") == "offset":
label = f"offset: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('offset')})"
elif node.get("node", {}).get("feature", {}).get("type") == "mnemonic":
label = f"mnemonic: {node.get('node', {}).get('feature', {}).get('mnemonic')}"
elif node.get("node", {}).get("feature", {}).get("type") == "characteristic":
label = f"characteristic: {node.get('node', {}).get('feature', {}).get('characteristic')}"
elif node.get("node", {}).get("feature", {}).get("type") == "os":
label = f"os: {node.get('node', {}).get('feature', {}).get('os')}"
elif node.get("node", {}).get("feature", {}).get("type") == "operand number":
label = f"operand: ({node.get('node', {}).get('feature', {}).get('index')} ) {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('operand_number')})"
else:
logger.error(
"Not implemented %s",
node.get("node", {}).get("feature", {}).get("type"),
)
return []
else:
logger.error("Not implemented %s", node.get("node", {}).get("type"))
return []
for loc in node.get("locations", []):
if loc["type"] != "absolute":
continue
related_locations.append(
{
"id": related_count,
"message": {"text": label},
"physicalLocation": {"address": {"absoluteAddress": loc["value"]}},
}
)
related_count += 1
if node.get("success") and node.get("node", {}).get("type") == "statement":
for child in node.get("children", []):
related_locations += _enumerate_evidence(child, related_count)
return related_locations
def _populate_results(sarif_log: dict, data_rules: dict, ghidra_compat: bool) -> None:
"""
@param sarif_log: dict - sarif data structure including runs
@param meta_data: dict - Capa meta output
@returns None, updates sarif_log via side-effects
"""
results = sarif_log["runs"][0]["results"]
# Parse rules from parsed sarif structure
for key in data_rules:
# Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
# this is not good practice to use long name for ruleID.
attack_length = len(data_rules[key]["meta"]["attack"])
mbc_length = len(data_rules[key]["meta"]["mbc"])
if attack_length or mbc_length:
id = (
data_rules[key]["meta"]["attack"][0]["id"]
if attack_length > 0
else data_rules[key]["meta"]["mbc"][0]["id"]
)
else:
id = data_rules[key]["meta"]["name"]
for address, details in data_rules[key]["matches"]:
related_cnt = 0
related_locations = _enumerate_evidence(details, related_cnt)
res = {
"ruleId": id,
"level": "none" if not ghidra_compat else "NONE",
"message": {"text": data_rules[key]["meta"]["name"]},
"kind": "informational" if not ghidra_compat else "INFORMATIONAL",
"locations": [
{
"physicalLocation": {
"address": {
"absoluteAddress": address["value"],
}
},
}
],
}
if not ghidra_compat:
res["relatedLocations"] = related_locations
results.append(res)
def _add_filler_optional(capa_result: dict, sarif_log: dict) -> None:
"""Update sarif file with just enough fields to pass radare tests"""
base_address = capa_result["meta"]["analysis"]["base_address"]["value"]
# Assume there is only one run, and one binary artifact
artifact = sarif_log["runs"][0]["artifacts"][0]
if "properties" not in artifact:
artifact["properties"] = {}
if "additionalProperties" not in artifact["properties"]:
artifact["properties"]["additionalProperties"] = {}
if "imageBase" not in artifact["properties"]["additionalProperties"]:
artifact["properties"]["additionalProperties"]["imageBase"] = base_address
if __name__ == "__main__":
sys.exit(main())

View File

@@ -69,8 +69,7 @@ def load_analysis(bv):
return 0
binaryninja.log_info(f"Using capa file {path}")
with Path(path).open("r", encoding="utf-8") as file:
doc = json.load(file)
doc = json.loads(path.read_bytes().decode("utf-8"))
if "meta" not in doc or "rules" not in doc:
binaryninja.log_error("doesn't appear to be a capa report")
@@ -84,35 +83,20 @@ def load_analysis(bv):
binaryninja.log_error("sample mismatch")
return -2
# Retreive base address
capa_base_address = 0
if "analysis" in doc["meta"] and "base_address" in doc["meta"]["analysis"]:
if doc["meta"]["analysis"]["base_address"]["type"] == "absolute":
capa_base_address = int(doc["meta"]["analysis"]["base_address"]["value"])
rows = []
for rule in doc["rules"].values():
if rule["meta"].get("lib"):
continue
if rule["meta"].get("capa/subscope"):
continue
if rule["meta"]["scopes"].get("static") != "function":
if rule["meta"]["scope"] != "function":
continue
name = rule["meta"]["name"]
ns = rule["meta"].get("namespace", "")
for matches in rule["matches"]:
for match in matches:
if "type" not in match.keys():
continue
if "value" not in match.keys():
continue
va = match["value"]
# Substract va and CAPA base_address
va = int(va) - capa_base_address
# Add binja base address
va = va + bv.start
rows.append((ns, name, va))
for va in rule["matches"].keys():
va = int(va)
rows.append((ns, name, va))
# order by (namespace, name) so that like things show up together
rows = sorted(rows)

View File

@@ -171,8 +171,8 @@ def print_dynamic_analysis(extractor: DynamicFeatureExtractor, args):
process_handles = tuple(extractor.get_processes())
if args.process:
process_handles = tuple(filter(lambda ph: extractor.get_process_name(ph) == args.process, process_handles))
if args.process not in [extractor.get_process_name(ph) for ph in process_handles]:
process_handles = tuple(filter(lambda ph: ph.inner["name"] == args.process, process_handles))
if args.process not in [ph.inner["name"] for ph in args.process]:
print(f"{args.process} not a process")
return -1
@@ -227,13 +227,13 @@ def print_static_features(functions, extractor: StaticFeatureExtractor):
def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
for p in processes:
print(f"proc: {extractor.get_process_name(p)} (ppid={p.address.ppid}, pid={p.address.pid})")
print(f"proc: {p.inner.process_name} (ppid={p.address.ppid}, pid={p.address.pid})")
for feature, addr in extractor.extract_process_features(p):
if is_global_feature(feature):
continue
print(f" proc: {extractor.get_process_name(p)}: {feature}")
print(f" proc: {p.inner.process_name}: {feature}")
for t in extractor.get_threads(p):
print(f" thread: {t.address.tid}")

View File

@@ -389,8 +389,6 @@ def get_data_path_by_name(name) -> Path:
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
elif name.startswith("1038a2"):
return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
elif name.startswith("3da7c"):
return CD / "data" / "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_"
elif name.startswith("nested_typedef"):
return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_"
elif name.startswith("nested_typeref"):

View File

@@ -14,11 +14,17 @@ from capa.features.extractors.elffile import extract_file_export_names, extract_
CD = Path(__file__).resolve().parent
SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"
STRIPPED_SAMPLE_PATH = CD / "data" / "bb38149ff4b5c95722b83f24ca27a42b.elf_"
def check_import_features(sample_path, expected_imports):
path = Path(sample_path)
def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
imports = list(extract_file_import_names(elf))
@@ -34,52 +40,6 @@ def check_import_features(sample_path, expected_imports):
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."
def check_export_features(sample_path, expected_exports):
path = Path(sample_path)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))
# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]
# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
def test_stripped_elffile_import_features():
expected_imports = ["__cxa_atexit", "__cxa_finalize", "__stack_chk_fail", "fclose", "fopen", "__android_log_print"]
check_import_features(STRIPPED_SAMPLE_PATH, expected_imports)
def test_stripped_elffile_export_features():
expected_exports = [
"_ZN7_JNIEnv14GetArrayLengthEP7_jarray",
"Java_o_ac_a",
"Java_o_ac_b",
"_Z6existsPKc",
"_ZN7_JNIEnv17GetStringUTFCharsEP8_jstringPh",
"_ZN7_JNIEnv21GetObjectArrayElementEP13_jobjectArrayi",
"_ZN7_JNIEnv21ReleaseStringUTFCharsEP8_jstringPKc",
]
check_export_features(STRIPPED_SAMPLE_PATH, expected_exports)
def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
check_import_features(SAMPLE_PATH, expected_imports)
def test_elffile_export_features():
expected_exports = [
"deregister_tm_clones",
@@ -95,4 +55,17 @@ def test_elffile_export_features():
"_IO_stdin_used",
"__libc_csu_init",
]
check_export_features(SAMPLE_PATH, expected_exports)
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))
# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]
# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."

View File

@@ -92,12 +92,6 @@ def test_elf_android_notes():
assert capa.features.extractors.elf.detect_elf_os(f) == "android"
def test_elf_go_buildinfo():
path = get_data_path_by_name("3da7c")
with Path(path).open("rb") as f:
assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
def test_elf_parse_capa_pyinstaller_header():
# error after misidentified large pydata section with address 0; fixed in #1454
# compressed ELF header of capa-v5.1.0-linux

View File

@@ -23,21 +23,10 @@ def get_script_path(s: str):
return str(CD / ".." / "scripts" / s)
def get_binary_file_path():
def get_file_path():
return str(CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_")
def get_report_file_path():
return str(
CD
/ "data"
/ "dynamic"
/ "cape"
/ "v2.4"
/ "fb7ade52dc5a1d6128b9c217114a46d0089147610f99f5122face29e429a1e74.json.gz"
)
def get_rules_path():
return str(CD / ".." / "rules")
@@ -51,21 +40,16 @@ def get_rule_path():
[
pytest.param("capa2yara.py", [get_rules_path()]),
pytest.param("capafmt.py", [get_rule_path()]),
pytest.param(
"capa2sarif.py",
[Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"],
),
# testing some variations of linter script
pytest.param("lint.py", ["-t", "create directory", get_rules_path()]),
# `create directory` rule has native and .NET example PEs
pytest.param("lint.py", ["--thorough", "-t", "create directory", get_rules_path()]),
pytest.param("match-function-id.py", [get_binary_file_path()]),
pytest.param("show-capabilities-by-function.py", [get_binary_file_path()]),
pytest.param("show-features.py", [get_binary_file_path()]),
pytest.param("show-features.py", ["-F", "0x407970", get_binary_file_path()]),
pytest.param("show-features.py", ["-P", "MicrosoftEdgeUpdate.exe", get_report_file_path()]),
pytest.param("show-unused-features.py", [get_binary_file_path()]),
pytest.param("capa_as_library.py", [get_binary_file_path()]),
pytest.param("match-function-id.py", [get_file_path()]),
pytest.param("show-capabilities-by-function.py", [get_file_path()]),
pytest.param("show-features.py", [get_file_path()]),
pytest.param("show-features.py", ["-F", "0x407970", get_file_path()]),
pytest.param("show-unused-features.py", [get_file_path()]),
pytest.param("capa_as_library.py", [get_file_path()]),
],
)
def test_scripts(script, args):