mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 07:10:29 -08:00
resolve merge conflict
This commit is contained in:
9
.github/workflows/build.yml
vendored
9
.github/workflows/build.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
fail-fast: true
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- os: ubuntu-18.04
|
- os: ubuntu-20.04
|
||||||
# use old linux so that the shared library versioning is more portable
|
# use old linux so that the shared library versioning is more portable
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: linux
|
asset_name: linux
|
||||||
@@ -36,7 +36,7 @@ jobs:
|
|||||||
uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
|
uses: actions/setup-python@d27e3f3d7c64b4bbf8e4abfb9b63b83e846e0435 # v4.5.0
|
||||||
with:
|
with:
|
||||||
python-version: 3.8
|
python-version: 3.8
|
||||||
- if: matrix.os == 'ubuntu-18.04'
|
- if: matrix.os == 'ubuntu-20.04'
|
||||||
run: sudo apt-get install -y libyaml-dev
|
run: sudo apt-get install -y libyaml-dev
|
||||||
- name: Upgrade pip, setuptools
|
- name: Upgrade pip, setuptools
|
||||||
run: python -m pip install --upgrade pip setuptools
|
run: python -m pip install --upgrade pip setuptools
|
||||||
@@ -65,10 +65,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# OSs not already tested above
|
# OSs not already tested above
|
||||||
- os: ubuntu-18.04
|
- os: ubuntu-22.04
|
||||||
artifact_name: capa
|
|
||||||
asset_name: linux
|
|
||||||
- os: ubuntu-20.04
|
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: linux
|
asset_name: linux
|
||||||
- os: windows-2022
|
- os: windows-2022
|
||||||
|
|||||||
3
.github/workflows/tests.yml
vendored
3
.github/workflows/tests.yml
vendored
@@ -76,6 +76,8 @@ jobs:
|
|||||||
python-version: "3.8"
|
python-version: "3.8"
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
python-version: "3.9"
|
python-version: "3.9"
|
||||||
|
- os: ubuntu-20.04
|
||||||
|
python-version: "3.10"
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
|
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0
|
||||||
@@ -96,6 +98,7 @@ jobs:
|
|||||||
binja-tests:
|
binja-tests:
|
||||||
name: Binary Ninja tests for ${{ matrix.python-version }} on ${{ matrix.os }}
|
name: Binary Ninja tests for ${{ matrix.python-version }} on ${{ matrix.os }}
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-20.04
|
||||||
|
needs: [code_style, rule_linter]
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
|
|||||||
10
CHANGELOG.md
10
CHANGELOG.md
@@ -4,13 +4,14 @@
|
|||||||
|
|
||||||
### New Features
|
### New Features
|
||||||
- add protobuf format for result documents #1219 @williballenthin @mr-tz
|
- add protobuf format for result documents #1219 @williballenthin @mr-tz
|
||||||
|
|
||||||
- extractor: add Binary Ninja feature extractor @xusheng6
|
- extractor: add Binary Ninja feature extractor @xusheng6
|
||||||
- new cli flag `--os` to override auto-detected operating system for a sample @captainGeech42
|
- new cli flag `--os` to override auto-detected operating system for a sample @captainGeech42
|
||||||
|
- Change colour/highlight to "cyan" instead of "blue" for easy noticing.#1384 @ggold7046
|
||||||
|
- add new format to parse output json back to capa #1396 @ooprathamm
|
||||||
|
|
||||||
### Breaking Changes
|
### Breaking Changes
|
||||||
|
|
||||||
### New Rules (22)
|
### New Rules (26)
|
||||||
|
|
||||||
- persistence/scheduled-tasks/schedule-task-via-at joren485
|
- persistence/scheduled-tasks/schedule-task-via-at joren485
|
||||||
- data-manipulation/prng/generate-random-numbers-via-rtlgenrandom william.ballenthin@mandiant.com
|
- data-manipulation/prng/generate-random-numbers-via-rtlgenrandom william.ballenthin@mandiant.com
|
||||||
@@ -34,9 +35,14 @@
|
|||||||
- nursery/set-web-proxy-in-dotnet michael.hunhoff@mandiant.com
|
- nursery/set-web-proxy-in-dotnet michael.hunhoff@mandiant.com
|
||||||
- nursery/check-for-windows-sandbox-via-subdirectory echernofsky@google.com
|
- nursery/check-for-windows-sandbox-via-subdirectory echernofsky@google.com
|
||||||
- nursery/enumerate-pe-sections-in-dotnet @mr-tz
|
- nursery/enumerate-pe-sections-in-dotnet @mr-tz
|
||||||
|
- nursery/destroy-software-breakpoint-capability echernofsky@google.com
|
||||||
|
- nursery/send-data-to-internet michael.hunhoff@mandiant.com
|
||||||
|
- nursery/compiled-with-cx_freeze @mr-tz
|
||||||
|
- nursery/contain-a-thread-local-storage-tls-section-in-dotnet michael.hunhoff@mandiant.com
|
||||||
-
|
-
|
||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
- extractor: removed '.dynsym' as the library name for ELF imports #1318 @stevemk14ebr
|
||||||
- extractor: fix vivisect loop detection corner case #1310 @mr-tz
|
- extractor: fix vivisect loop detection corner case #1310 @mr-tz
|
||||||
- match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff
|
- match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff
|
||||||
- extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6
|
- extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
[](https://pypi.org/project/flare-capa)
|
[](https://pypi.org/project/flare-capa)
|
||||||
[](https://github.com/mandiant/capa/releases)
|
[](https://github.com/mandiant/capa/releases)
|
||||||
[](https://github.com/mandiant/capa-rules)
|
[](https://github.com/mandiant/capa-rules)
|
||||||
[](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
|
[](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
|
||||||
[](https://github.com/mandiant/capa/releases)
|
[](https://github.com/mandiant/capa/releases)
|
||||||
[](LICENSE.txt)
|
[](LICENSE.txt)
|
||||||
|
|||||||
@@ -450,6 +450,7 @@ FORMAT_AUTO = "auto"
|
|||||||
FORMAT_SC32 = "sc32"
|
FORMAT_SC32 = "sc32"
|
||||||
FORMAT_SC64 = "sc64"
|
FORMAT_SC64 = "sc64"
|
||||||
FORMAT_FREEZE = "freeze"
|
FORMAT_FREEZE = "freeze"
|
||||||
|
FORMAT_RESULT = "result"
|
||||||
FORMAT_UNKNOWN = "unknown"
|
FORMAT_UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
|||||||
|
|
||||||
ordinal = sym.ordinal
|
ordinal = sym.ordinal
|
||||||
if ordinal != 0 and (lib_name != ""):
|
if ordinal != 0 and (lib_name != ""):
|
||||||
ordinal_name = "#%d" % (ordinal)
|
ordinal_name = f"#{ordinal}"
|
||||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name):
|
for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name):
|
||||||
yield Import(name), addr
|
yield Import(name), addr
|
||||||
|
|
||||||
@@ -147,7 +147,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
# no file type to return when processing a binary file, but we want to continue processing
|
# no file type to return when processing a binary file, but we want to continue processing
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("unexpected file format: %d" % view_type)
|
raise NotImplementedError(f"unexpected file format: {view_type}")
|
||||||
|
|
||||||
|
|
||||||
def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ if spec is not None:
|
|||||||
|
|
||||||
|
|
||||||
def find_binja_path() -> str:
|
def find_binja_path() -> str:
|
||||||
raw_output = subprocess.check_output(["python", "-c", "%s" % code]).decode("ascii").strip()
|
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
|
||||||
return bytes.fromhex(raw_output).decode("utf8")
|
return bytes.fromhex(raw_output).decode("utf8")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -12,11 +12,14 @@ import capa.features.extractors.pefile
|
|||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
from capa.features.common import (
|
from capa.features.common import (
|
||||||
OS,
|
OS,
|
||||||
|
OS_ANY,
|
||||||
OS_AUTO,
|
OS_AUTO,
|
||||||
|
ARCH_ANY,
|
||||||
FORMAT_PE,
|
FORMAT_PE,
|
||||||
FORMAT_ELF,
|
FORMAT_ELF,
|
||||||
OS_WINDOWS,
|
OS_WINDOWS,
|
||||||
FORMAT_FREEZE,
|
FORMAT_FREEZE,
|
||||||
|
FORMAT_RESULT,
|
||||||
Arch,
|
Arch,
|
||||||
Format,
|
Format,
|
||||||
String,
|
String,
|
||||||
@@ -27,6 +30,11 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# match strings for formats
|
||||||
|
MATCH_PE = b"MZ"
|
||||||
|
MATCH_ELF = b"\x7fELF"
|
||||||
|
MATCH_RESULT = b'{"meta":'
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
|
def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
|
||||||
"""
|
"""
|
||||||
@@ -40,12 +48,14 @@ def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]:
|
|||||||
|
|
||||||
|
|
||||||
def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
|
def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if buf.startswith(b"MZ"):
|
if buf.startswith(MATCH_PE):
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif buf.startswith(b"\x7fELF"):
|
elif buf.startswith(MATCH_ELF):
|
||||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||||
elif is_freeze(buf):
|
elif is_freeze(buf):
|
||||||
yield Format(FORMAT_FREEZE), NO_ADDRESS
|
yield Format(FORMAT_FREEZE), NO_ADDRESS
|
||||||
|
elif buf.startswith(MATCH_RESULT):
|
||||||
|
yield Format(FORMAT_RESULT), NO_ADDRESS
|
||||||
else:
|
else:
|
||||||
# we likely end up here:
|
# we likely end up here:
|
||||||
# 1. handling a file format (e.g. macho)
|
# 1. handling a file format (e.g. macho)
|
||||||
@@ -56,10 +66,13 @@ def extract_format(buf) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
|
|
||||||
|
|
||||||
def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
||||||
if buf.startswith(b"MZ"):
|
if buf.startswith(MATCH_PE):
|
||||||
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
||||||
|
|
||||||
elif buf.startswith(b"\x7fELF"):
|
elif buf.startswith(MATCH_RESULT):
|
||||||
|
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
elif buf.startswith(MATCH_ELF):
|
||||||
with contextlib.closing(io.BytesIO(buf)) as f:
|
with contextlib.closing(io.BytesIO(buf)) as f:
|
||||||
arch = capa.features.extractors.elf.detect_elf_arch(f)
|
arch = capa.features.extractors.elf.detect_elf_arch(f)
|
||||||
|
|
||||||
@@ -88,9 +101,11 @@ def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
if os != OS_AUTO:
|
if os != OS_AUTO:
|
||||||
yield OS(os), NO_ADDRESS
|
yield OS(os), NO_ADDRESS
|
||||||
|
|
||||||
if buf.startswith(b"MZ"):
|
if buf.startswith(MATCH_PE):
|
||||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||||
elif buf.startswith(b"\x7fELF"):
|
elif buf.startswith(MATCH_RESULT):
|
||||||
|
yield OS(OS_ANY), NO_ADDRESS
|
||||||
|
elif buf.startswith(MATCH_ELF):
|
||||||
with contextlib.closing(io.BytesIO(buf)) as f:
|
with contextlib.closing(io.BytesIO(buf)) as f:
|
||||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||||
|
|
||||||
|
|||||||
@@ -88,6 +88,7 @@ class Shdr:
|
|||||||
offset: int
|
offset: int
|
||||||
size: int
|
size: int
|
||||||
link: int
|
link: int
|
||||||
|
entsize: int
|
||||||
buf: bytes
|
buf: bytes
|
||||||
|
|
||||||
|
|
||||||
@@ -320,12 +321,12 @@ class ELF:
|
|||||||
shent = self.shbuf[shent_offset : shent_offset + self.e_shentsize]
|
shent = self.shbuf[shent_offset : shent_offset + self.e_shentsize]
|
||||||
|
|
||||||
if self.bitness == 32:
|
if self.bitness == 32:
|
||||||
sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(
|
sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, _, _, sh_entsize = struct.unpack_from(
|
||||||
self.endian + "IIIIIII", shent, 0x0
|
self.endian + "IIIIIIIIII", shent, 0x0
|
||||||
)
|
)
|
||||||
elif self.bitness == 64:
|
elif self.bitness == 64:
|
||||||
sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link = struct.unpack_from(
|
sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, _, _, sh_entsize = struct.unpack_from(
|
||||||
self.endian + "IIQQQQI", shent, 0x0
|
self.endian + "IIQQQQIIQQ", shent, 0x0
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
@@ -337,7 +338,7 @@ class ELF:
|
|||||||
if len(buf) != sh_size:
|
if len(buf) != sh_size:
|
||||||
raise ValueError("failed to read section header content")
|
raise ValueError("failed to read section header content")
|
||||||
|
|
||||||
return Shdr(sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, buf)
|
return Shdr(sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, sh_entsize, buf)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def section_headers(self):
|
def section_headers(self):
|
||||||
@@ -502,6 +503,23 @@ class ELF:
|
|||||||
|
|
||||||
yield read_cstr(strtab, d_val)
|
yield read_cstr(strtab, d_val)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def symtab(self) -> Optional[Tuple[Shdr, Shdr]]:
|
||||||
|
"""
|
||||||
|
fetch the Shdr for the symtab and the associated strtab.
|
||||||
|
"""
|
||||||
|
SHT_SYMTAB = 0x2
|
||||||
|
for shdr in self.section_headers:
|
||||||
|
if shdr.type != SHT_SYMTAB:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# the linked section contains strings referenced by the symtab structures.
|
||||||
|
strtab_shdr = self.parse_section_header(shdr.link)
|
||||||
|
|
||||||
|
return shdr, strtab_shdr
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ABITag:
|
class ABITag:
|
||||||
@@ -603,11 +621,76 @@ class SHNote:
|
|||||||
return ABITag(os, kmajor, kminor, kpatch)
|
return ABITag(os, kmajor, kminor, kpatch)
|
||||||
|
|
||||||
|
|
||||||
def guess_os_from_osabi(elf) -> Optional[OS]:
|
@dataclass
|
||||||
|
class Symbol:
|
||||||
|
name_offset: int
|
||||||
|
value: int
|
||||||
|
size: int
|
||||||
|
info: int
|
||||||
|
other: int
|
||||||
|
shndx: int
|
||||||
|
|
||||||
|
|
||||||
|
class SymTab:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
endian: str,
|
||||||
|
bitness: int,
|
||||||
|
symtab: Shdr,
|
||||||
|
strtab: Shdr,
|
||||||
|
) -> None:
|
||||||
|
self.symbols: List[Symbol] = []
|
||||||
|
|
||||||
|
self.symtab = symtab
|
||||||
|
self.strtab = strtab
|
||||||
|
|
||||||
|
self._parse(endian, bitness, symtab.buf)
|
||||||
|
|
||||||
|
def _parse(self, endian: str, bitness: int, symtab_buf: bytes) -> None:
|
||||||
|
"""
|
||||||
|
return the symbol's information in
|
||||||
|
the order specified by sys/elf32.h
|
||||||
|
"""
|
||||||
|
for i in range(int(len(self.symtab.buf) / self.symtab.entsize)):
|
||||||
|
if bitness == 32:
|
||||||
|
name_offset, value, size, info, other, shndx = struct.unpack_from(
|
||||||
|
endian + "IIIBBH", symtab_buf, i * self.symtab.entsize
|
||||||
|
)
|
||||||
|
elif bitness == 64:
|
||||||
|
name_offset, info, other, shndx, value, size = struct.unpack_from(
|
||||||
|
endian + "IBBBQQ", symtab_buf, i * self.symtab.entsize
|
||||||
|
)
|
||||||
|
|
||||||
|
self.symbols.append(Symbol(name_offset, value, size, info, other, shndx))
|
||||||
|
|
||||||
|
def get_name(self, symbol: Symbol) -> str:
|
||||||
|
"""
|
||||||
|
fetch a symbol's name from symtab's
|
||||||
|
associated strings' section (SHT_STRTAB)
|
||||||
|
"""
|
||||||
|
if not self.strtab:
|
||||||
|
raise ValueError("no strings found")
|
||||||
|
|
||||||
|
for i in range(symbol.name_offset, self.strtab.size):
|
||||||
|
if self.strtab.buf[i] == 0:
|
||||||
|
return self.strtab.buf[symbol.name_offset : i].decode("utf-8")
|
||||||
|
|
||||||
|
raise ValueError("symbol name not found")
|
||||||
|
|
||||||
|
def get_symbols(self) -> Iterator[Symbol]:
|
||||||
|
"""
|
||||||
|
return a tuple: (name, value, size, info, other, shndx)
|
||||||
|
for each symbol contained in the symbol table
|
||||||
|
"""
|
||||||
|
for symbol in self.symbols:
|
||||||
|
yield symbol
|
||||||
|
|
||||||
|
|
||||||
|
def guess_os_from_osabi(elf: ELF) -> Optional[OS]:
|
||||||
return elf.ei_osabi
|
return elf.ei_osabi
|
||||||
|
|
||||||
|
|
||||||
def guess_os_from_ph_notes(elf) -> Optional[OS]:
|
def guess_os_from_ph_notes(elf: ELF) -> Optional[OS]:
|
||||||
# search for PT_NOTE sections that specify an OS
|
# search for PT_NOTE sections that specify an OS
|
||||||
# for example, on Linux there is a GNU section with minimum kernel version
|
# for example, on Linux there is a GNU section with minimum kernel version
|
||||||
PT_NOTE = 0x4
|
PT_NOTE = 0x4
|
||||||
@@ -646,7 +729,7 @@ def guess_os_from_ph_notes(elf) -> Optional[OS]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def guess_os_from_sh_notes(elf) -> Optional[OS]:
|
def guess_os_from_sh_notes(elf: ELF) -> Optional[OS]:
|
||||||
# search for notes stored in sections that aren't visible in program headers.
|
# search for notes stored in sections that aren't visible in program headers.
|
||||||
# e.g. .note.Linux in Linux kernel modules.
|
# e.g. .note.Linux in Linux kernel modules.
|
||||||
SHT_NOTE = 0x7
|
SHT_NOTE = 0x7
|
||||||
@@ -679,7 +762,7 @@ def guess_os_from_sh_notes(elf) -> Optional[OS]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def guess_os_from_linker(elf) -> Optional[OS]:
|
def guess_os_from_linker(elf: ELF) -> Optional[OS]:
|
||||||
# search for recognizable dynamic linkers (interpreters)
|
# search for recognizable dynamic linkers (interpreters)
|
||||||
# for example, on linux, we see file paths like: /lib64/ld-linux-x86-64.so.2
|
# for example, on linux, we see file paths like: /lib64/ld-linux-x86-64.so.2
|
||||||
linker = elf.linker
|
linker = elf.linker
|
||||||
@@ -689,7 +772,7 @@ def guess_os_from_linker(elf) -> Optional[OS]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def guess_os_from_abi_versions_needed(elf) -> Optional[OS]:
|
def guess_os_from_abi_versions_needed(elf: ELF) -> Optional[OS]:
|
||||||
# then lets look for GLIBC symbol versioning requirements.
|
# then lets look for GLIBC symbol versioning requirements.
|
||||||
# this will let us guess about linux/hurd in some cases.
|
# this will let us guess about linux/hurd in some cases.
|
||||||
|
|
||||||
@@ -720,7 +803,7 @@ def guess_os_from_abi_versions_needed(elf) -> Optional[OS]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def guess_os_from_needed_dependencies(elf) -> Optional[OS]:
|
def guess_os_from_needed_dependencies(elf: ELF) -> Optional[OS]:
|
||||||
for needed in elf.needed:
|
for needed in elf.needed:
|
||||||
if needed.startswith("libmachuser.so"):
|
if needed.startswith("libmachuser.so"):
|
||||||
return OS.HURD
|
return OS.HURD
|
||||||
@@ -730,6 +813,33 @@ def guess_os_from_needed_dependencies(elf) -> Optional[OS]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def guess_os_from_symtab(elf: ELF) -> Optional[OS]:
|
||||||
|
shdrs = elf.symtab
|
||||||
|
if not shdrs:
|
||||||
|
# executable does not contain a symbol table
|
||||||
|
# or the symbol's names are stripped
|
||||||
|
return None
|
||||||
|
|
||||||
|
symtab_shdr, strtab_shdr = shdrs
|
||||||
|
symtab = SymTab(elf.endian, elf.bitness, symtab_shdr, strtab_shdr)
|
||||||
|
|
||||||
|
keywords = {
|
||||||
|
OS.LINUX: [
|
||||||
|
"linux",
|
||||||
|
"/linux/",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
for symbol in symtab.get_symbols():
|
||||||
|
sym_name = symtab.get_name(symbol)
|
||||||
|
|
||||||
|
for os, hints in keywords.items():
|
||||||
|
if any(map(lambda x: x in sym_name, hints)):
|
||||||
|
return os
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def detect_elf_os(f) -> str:
|
def detect_elf_os(f) -> str:
|
||||||
"""
|
"""
|
||||||
f: type Union[BinaryIO, IDAIO]
|
f: type Union[BinaryIO, IDAIO]
|
||||||
@@ -754,6 +864,9 @@ def detect_elf_os(f) -> str:
|
|||||||
needed_dependencies_guess = guess_os_from_needed_dependencies(elf)
|
needed_dependencies_guess = guess_os_from_needed_dependencies(elf)
|
||||||
logger.debug("guess: needed dependencies: %s", needed_dependencies_guess)
|
logger.debug("guess: needed dependencies: %s", needed_dependencies_guess)
|
||||||
|
|
||||||
|
symtab_guess = guess_os_from_symtab(elf)
|
||||||
|
logger.debug("guess: pertinent symbol name: %s", symtab_guess)
|
||||||
|
|
||||||
ret = None
|
ret = None
|
||||||
|
|
||||||
if osabi_guess:
|
if osabi_guess:
|
||||||
@@ -774,6 +887,9 @@ def detect_elf_os(f) -> str:
|
|||||||
elif needed_dependencies_guess:
|
elif needed_dependencies_guess:
|
||||||
ret = needed_dependencies_guess
|
ret = needed_dependencies_guess
|
||||||
|
|
||||||
|
elif symtab_guess:
|
||||||
|
ret = symtab_guess
|
||||||
|
|
||||||
return ret.value if ret is not None else "unknown"
|
return ret.value if ret is not None else "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -90,8 +90,11 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
|
|||||||
if not library:
|
if not library:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# IDA uses section names for the library of ELF imports, like ".dynsym"
|
# IDA uses section names for the library of ELF imports, like ".dynsym".
|
||||||
library = library.lstrip(".")
|
# These are not useful to us, we may need to expand this list over time
|
||||||
|
# TODO: exhaust this list, see #1419
|
||||||
|
if library == ".dynsym":
|
||||||
|
library = ""
|
||||||
|
|
||||||
def inspect_import(ea, function, ordinal):
|
def inspect_import(ea, function, ordinal):
|
||||||
if function and function.startswith("__imp_"):
|
if function and function.startswith("__imp_"):
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ class Number(Feature):
|
|||||||
elif isinstance(self.value, float):
|
elif isinstance(self.value, float):
|
||||||
return str(self.value)
|
return str(self.value)
|
||||||
else:
|
else:
|
||||||
raise ValueError("invalid value type %s" % (type(self.value)))
|
raise ValueError(f"invalid value type {type(self.value)}")
|
||||||
|
|
||||||
|
|
||||||
# max recognized structure size (and therefore, offset size)
|
# max recognized structure size (and therefore, offset size)
|
||||||
|
|||||||
114
capa/main.py
114
capa/main.py
@@ -69,6 +69,7 @@ from capa.features.common import (
|
|||||||
FORMAT_SC64,
|
FORMAT_SC64,
|
||||||
FORMAT_DOTNET,
|
FORMAT_DOTNET,
|
||||||
FORMAT_FREEZE,
|
FORMAT_FREEZE,
|
||||||
|
FORMAT_RESULT,
|
||||||
)
|
)
|
||||||
from capa.features.address import NO_ADDRESS, Address
|
from capa.features.address import NO_ADDRESS, Address
|
||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||||
@@ -498,7 +499,7 @@ def get_workspace(path, format_, sigpaths):
|
|||||||
def get_extractor(
|
def get_extractor(
|
||||||
path: str,
|
path: str,
|
||||||
format_: str,
|
format_: str,
|
||||||
os: str,
|
os_: str,
|
||||||
backend: str,
|
backend: str,
|
||||||
sigpaths: List[str],
|
sigpaths: List[str],
|
||||||
should_save_workspace=False,
|
should_save_workspace=False,
|
||||||
@@ -517,7 +518,7 @@ def get_extractor(
|
|||||||
if not is_supported_arch(path):
|
if not is_supported_arch(path):
|
||||||
raise UnsupportedArchError()
|
raise UnsupportedArchError()
|
||||||
|
|
||||||
if os == OS_AUTO and not is_supported_os(path):
|
if os_ == OS_AUTO and not is_supported_os(path):
|
||||||
raise UnsupportedOSError()
|
raise UnsupportedOSError()
|
||||||
|
|
||||||
if format_ == FORMAT_DOTNET:
|
if format_ == FORMAT_DOTNET:
|
||||||
@@ -548,7 +549,7 @@ def get_extractor(
|
|||||||
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
|
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
|
||||||
bv: BinaryView = BinaryViewType.get_view_of_file(path)
|
bv: BinaryView = BinaryViewType.get_view_of_file(path)
|
||||||
if bv is None:
|
if bv is None:
|
||||||
raise RuntimeError("Binary Ninja cannot open file %s" % (path))
|
raise RuntimeError(f"Binary Ninja cannot open file {path}")
|
||||||
|
|
||||||
return capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)
|
return capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)
|
||||||
|
|
||||||
@@ -569,7 +570,7 @@ def get_extractor(
|
|||||||
else:
|
else:
|
||||||
logger.debug("CAPA_SAVE_WORKSPACE unset, not saving workspace")
|
logger.debug("CAPA_SAVE_WORKSPACE unset, not saving workspace")
|
||||||
|
|
||||||
return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os)
|
return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path, os_)
|
||||||
|
|
||||||
|
|
||||||
def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
|
def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
|
||||||
@@ -913,12 +914,12 @@ def install_common_args(parser, wanted=None):
|
|||||||
(OS_MACOS,),
|
(OS_MACOS,),
|
||||||
(OS_WINDOWS,),
|
(OS_WINDOWS,),
|
||||||
]
|
]
|
||||||
os_help = ", ".join(["%s (%s)" % (o[0], o[1]) if len(o) == 2 else o[0] for o in oses])
|
os_help = ", ".join([f"{o[0]} ({o[1]})" if len(o) == 2 else o[0] for o in oses])
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--os",
|
"--os",
|
||||||
choices=[o[0] for o in oses],
|
choices=[o[0] for o in oses],
|
||||||
default=OS_AUTO,
|
default=OS_AUTO,
|
||||||
help="select sample OS: %s" % os_help,
|
help=f"select sample OS: {os_help}",
|
||||||
)
|
)
|
||||||
|
|
||||||
if "rules" in wanted:
|
if "rules" in wanted:
|
||||||
@@ -1182,53 +1183,72 @@ def main(argv=None):
|
|||||||
logger.debug("file limitation short circuit, won't analyze fully.")
|
logger.debug("file limitation short circuit, won't analyze fully.")
|
||||||
return E_FILE_LIMITATION
|
return E_FILE_LIMITATION
|
||||||
|
|
||||||
if format_ == FORMAT_FREEZE:
|
# TODO: #1411 use a real type, not a dict here.
|
||||||
with open(args.sample, "rb") as f:
|
meta: Dict[str, Any]
|
||||||
extractor = capa.features.freeze.load(f.read())
|
capabilities: MatchResults
|
||||||
|
counts: Dict[str, Any]
|
||||||
|
|
||||||
|
if format_ == FORMAT_RESULT:
|
||||||
|
# result document directly parses into meta, capabilities
|
||||||
|
result_doc = capa.render.result_document.ResultDocument.parse_file(args.sample)
|
||||||
|
meta, capabilities = result_doc.to_capa()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
try:
|
# all other formats we must create an extractor
|
||||||
if format_ == FORMAT_PE:
|
# and use that to extract meta and capabilities
|
||||||
sig_paths = get_signatures(args.signatures)
|
|
||||||
else:
|
|
||||||
sig_paths = []
|
|
||||||
logger.debug("skipping library code matching: only have native PE signatures")
|
|
||||||
except IOError as e:
|
|
||||||
logger.error("%s", str(e))
|
|
||||||
return E_INVALID_SIG
|
|
||||||
|
|
||||||
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
|
if format_ == FORMAT_FREEZE:
|
||||||
|
# freeze format deserializes directly into an extractor
|
||||||
|
with open(args.sample, "rb") as f:
|
||||||
|
extractor = capa.features.freeze.load(f.read())
|
||||||
|
else:
|
||||||
|
# all other formats we must create an extractor,
|
||||||
|
# such as viv, binary ninja, etc. workspaces
|
||||||
|
# and use those for extracting.
|
||||||
|
|
||||||
try:
|
try:
|
||||||
extractor = get_extractor(
|
if format_ == FORMAT_PE:
|
||||||
args.sample,
|
sig_paths = get_signatures(args.signatures)
|
||||||
format_,
|
else:
|
||||||
args.os,
|
sig_paths = []
|
||||||
args.backend,
|
logger.debug("skipping library code matching: only have native PE signatures")
|
||||||
sig_paths,
|
except IOError as e:
|
||||||
should_save_workspace,
|
logger.error("%s", str(e))
|
||||||
disable_progress=args.quiet,
|
return E_INVALID_SIG
|
||||||
)
|
|
||||||
except UnsupportedFormatError:
|
|
||||||
log_unsupported_format_error()
|
|
||||||
return E_INVALID_FILE_TYPE
|
|
||||||
except UnsupportedArchError:
|
|
||||||
log_unsupported_arch_error()
|
|
||||||
return E_INVALID_FILE_ARCH
|
|
||||||
except UnsupportedOSError:
|
|
||||||
log_unsupported_os_error()
|
|
||||||
return E_INVALID_FILE_OS
|
|
||||||
|
|
||||||
meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor)
|
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
|
||||||
|
|
||||||
capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
|
try:
|
||||||
meta["analysis"].update(counts)
|
extractor = get_extractor(
|
||||||
meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
|
args.sample,
|
||||||
|
format_,
|
||||||
|
args.os,
|
||||||
|
args.backend,
|
||||||
|
sig_paths,
|
||||||
|
should_save_workspace,
|
||||||
|
disable_progress=args.quiet,
|
||||||
|
)
|
||||||
|
except UnsupportedFormatError:
|
||||||
|
log_unsupported_format_error()
|
||||||
|
return E_INVALID_FILE_TYPE
|
||||||
|
except UnsupportedArchError:
|
||||||
|
log_unsupported_arch_error()
|
||||||
|
return E_INVALID_FILE_ARCH
|
||||||
|
except UnsupportedOSError:
|
||||||
|
log_unsupported_os_error()
|
||||||
|
return E_INVALID_FILE_OS
|
||||||
|
|
||||||
if has_file_limitation(rules, capabilities):
|
meta = collect_metadata(argv, args.sample, args.format, args.os, args.rules, extractor)
|
||||||
# bail if capa encountered file limitation e.g. a packed binary
|
|
||||||
# do show the output in verbose mode, though.
|
capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
|
||||||
if not (args.verbose or args.vverbose or args.json):
|
meta["analysis"].update(counts)
|
||||||
return E_FILE_LIMITATION
|
meta["analysis"]["layout"] = compute_layout(rules, extractor, capabilities)
|
||||||
|
|
||||||
|
if has_file_limitation(rules, capabilities):
|
||||||
|
# bail if capa encountered file limitation e.g. a packed binary
|
||||||
|
# do show the output in verbose mode, though.
|
||||||
|
if not (args.verbose or args.vverbose or args.json):
|
||||||
|
return E_FILE_LIMITATION
|
||||||
|
|
||||||
if args.json:
|
if args.json:
|
||||||
print(capa.render.json.render(meta, rules, capabilities))
|
print(capa.render.json.render(meta, rules, capabilities))
|
||||||
|
|||||||
@@ -502,27 +502,36 @@ def metadata_from_pb2(meta: capa_pb2.Metadata) -> rd.Metadata:
|
|||||||
rules=tuple(meta.analysis.rules),
|
rules=tuple(meta.analysis.rules),
|
||||||
base_address=addr_from_pb2(meta.analysis.base_address),
|
base_address=addr_from_pb2(meta.analysis.base_address),
|
||||||
layout=rd.Layout(
|
layout=rd.Layout(
|
||||||
functions=[
|
functions=tuple(
|
||||||
rd.FunctionLayout(
|
[
|
||||||
address=addr_from_pb2(f.address),
|
rd.FunctionLayout(
|
||||||
matched_basic_blocks=[
|
address=addr_from_pb2(f.address),
|
||||||
rd.BasicBlockLayout(address=addr_from_pb2(bb.address)) for bb in f.matched_basic_blocks
|
matched_basic_blocks=tuple(
|
||||||
],
|
[
|
||||||
)
|
rd.BasicBlockLayout(address=addr_from_pb2(bb.address))
|
||||||
for f in meta.analysis.layout.functions
|
for bb in f.matched_basic_blocks
|
||||||
]
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
for f in meta.analysis.layout.functions
|
||||||
|
]
|
||||||
|
)
|
||||||
),
|
),
|
||||||
feature_counts=rd.FeatureCounts(
|
feature_counts=rd.FeatureCounts(
|
||||||
file=meta.analysis.feature_counts.file,
|
file=meta.analysis.feature_counts.file,
|
||||||
functions=[
|
functions=tuple(
|
||||||
rd.FunctionFeatureCount(address=addr_from_pb2(f.address), count=f.count)
|
[
|
||||||
for f in meta.analysis.feature_counts.functions
|
rd.FunctionFeatureCount(address=addr_from_pb2(f.address), count=f.count)
|
||||||
],
|
for f in meta.analysis.feature_counts.functions
|
||||||
|
]
|
||||||
|
),
|
||||||
|
),
|
||||||
|
library_functions=tuple(
|
||||||
|
[
|
||||||
|
rd.LibraryFunction(address=addr_from_pb2(lf.address), name=lf.name)
|
||||||
|
for lf in meta.analysis.library_functions
|
||||||
|
]
|
||||||
),
|
),
|
||||||
library_functions=[
|
|
||||||
rd.LibraryFunction(address=addr_from_pb2(lf.address), name=lf.name)
|
|
||||||
for lf in meta.analysis.library_functions
|
|
||||||
],
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -585,13 +594,14 @@ def feature_from_pb2(f: capa_pb2.FeatureNode) -> frzf.Feature:
|
|||||||
return frzf.ExportFeature(export=ff.export, description=ff.description or None)
|
return frzf.ExportFeature(export=ff.export, description=ff.description or None)
|
||||||
elif type_ == "import_":
|
elif type_ == "import_":
|
||||||
ff = f.import_
|
ff = f.import_
|
||||||
return frzf.ImportFeature(import_=ff.import_, description=ff.description or None)
|
return frzf.ImportFeature(import_=ff.import_, description=ff.description or None) # type: ignore
|
||||||
|
# Mypy is unable to recognize `import_` as an argument
|
||||||
elif type_ == "section":
|
elif type_ == "section":
|
||||||
ff = f.section
|
ff = f.section
|
||||||
return frzf.SectionFeature(section=ff.section, description=ff.description or None)
|
return frzf.SectionFeature(section=ff.section, description=ff.description or None)
|
||||||
elif type_ == "function_name":
|
elif type_ == "function_name":
|
||||||
ff = f.function_name
|
ff = f.function_name
|
||||||
return frzf.FunctionNameFeature(function_name=ff.function_name, description=ff.description or None)
|
return frzf.FunctionNameFeature(function_name=ff.function_name, description=ff.description or None) # type: ignore
|
||||||
elif type_ == "substring":
|
elif type_ == "substring":
|
||||||
ff = f.substring
|
ff = f.substring
|
||||||
return frzf.SubstringFeature(substring=ff.substring, description=ff.description or None)
|
return frzf.SubstringFeature(substring=ff.substring, description=ff.description or None)
|
||||||
@@ -603,7 +613,8 @@ def feature_from_pb2(f: capa_pb2.FeatureNode) -> frzf.Feature:
|
|||||||
return frzf.StringFeature(string=ff.string, description=ff.description or None)
|
return frzf.StringFeature(string=ff.string, description=ff.description or None)
|
||||||
elif type_ == "class_":
|
elif type_ == "class_":
|
||||||
ff = f.class_
|
ff = f.class_
|
||||||
return frzf.ClassFeature(class_=ff.class_, description=ff.description or None)
|
return frzf.ClassFeature(class_=ff.class_, description=ff.description or None) # type: ignore
|
||||||
|
# Mypy is unable to recognize `class_` as an argument due to aliasing
|
||||||
elif type_ == "namespace":
|
elif type_ == "namespace":
|
||||||
ff = f.namespace
|
ff = f.namespace
|
||||||
return frzf.NamespaceFeature(namespace=ff.namespace, description=ff.description or None)
|
return frzf.NamespaceFeature(namespace=ff.namespace, description=ff.description or None)
|
||||||
@@ -629,12 +640,13 @@ def feature_from_pb2(f: capa_pb2.FeatureNode) -> frzf.Feature:
|
|||||||
ff = f.operand_number
|
ff = f.operand_number
|
||||||
return frzf.OperandNumberFeature(
|
return frzf.OperandNumberFeature(
|
||||||
index=ff.index, operand_number=number_from_pb2(ff.operand_number), description=ff.description or None
|
index=ff.index, operand_number=number_from_pb2(ff.operand_number), description=ff.description or None
|
||||||
)
|
) # type: ignore
|
||||||
elif type_ == "operand_offset":
|
elif type_ == "operand_offset":
|
||||||
ff = f.operand_offset
|
ff = f.operand_offset
|
||||||
return frzf.OperandOffsetFeature(
|
return frzf.OperandOffsetFeature(
|
||||||
index=ff.index, operand_offset=int_from_pb2(ff.operand_offset), description=ff.description or None
|
index=ff.index, operand_offset=int_from_pb2(ff.operand_offset), description=ff.description or None
|
||||||
)
|
) # type: ignore
|
||||||
|
# Mypy is unable to recognize `operand_offset` as an argument due to aliasing
|
||||||
elif type_ == "basic_block":
|
elif type_ == "basic_block":
|
||||||
ff = f.basic_block
|
ff = f.basic_block
|
||||||
return frzf.BasicBlockFeature(description=ff.description or None)
|
return frzf.BasicBlockFeature(description=ff.description or None)
|
||||||
@@ -651,16 +663,16 @@ def match_from_pb2(match: capa_pb2.Match) -> rd.Match:
|
|||||||
return rd.Match(
|
return rd.Match(
|
||||||
success=match.success,
|
success=match.success,
|
||||||
node=rd.StatementNode(statement=statement_from_pb2(match.statement)),
|
node=rd.StatementNode(statement=statement_from_pb2(match.statement)),
|
||||||
children=children,
|
children=tuple(children),
|
||||||
locations=locations,
|
locations=tuple(locations),
|
||||||
captures={},
|
captures={},
|
||||||
)
|
)
|
||||||
elif node_type == "feature":
|
elif node_type == "feature":
|
||||||
return rd.Match(
|
return rd.Match(
|
||||||
success=match.success,
|
success=match.success,
|
||||||
node=rd.FeatureNode(feature=feature_from_pb2(match.feature)),
|
node=rd.FeatureNode(feature=feature_from_pb2(match.feature)),
|
||||||
children=children,
|
children=tuple(children),
|
||||||
locations=locations,
|
locations=tuple(locations),
|
||||||
captures={capture: tuple(map(addr_from_pb2, locs.address)) for capture, locs in match.captures.items()},
|
captures={capture: tuple(map(addr_from_pb2, locs.address)) for capture, locs in match.captures.items()},
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -694,7 +706,8 @@ def maec_from_pb2(pb: capa_pb2.MaecMetadata) -> rd.MaecMetadata:
|
|||||||
malware_family=pb.malware_family or None,
|
malware_family=pb.malware_family or None,
|
||||||
malware_category=pb.malware_category or None,
|
malware_category=pb.malware_category or None,
|
||||||
malware_category_ov=pb.malware_category_ov or None,
|
malware_category_ov=pb.malware_category_ov or None,
|
||||||
)
|
) # type: ignore
|
||||||
|
# Mypy is unable to recognise arguments due to alias
|
||||||
|
|
||||||
|
|
||||||
def rule_metadata_from_pb2(pb: capa_pb2.RuleMetadata) -> rd.RuleMetadata:
|
def rule_metadata_from_pb2(pb: capa_pb2.RuleMetadata) -> rd.RuleMetadata:
|
||||||
@@ -711,7 +724,8 @@ def rule_metadata_from_pb2(pb: capa_pb2.RuleMetadata) -> rd.RuleMetadata:
|
|||||||
lib=pb.lib,
|
lib=pb.lib,
|
||||||
is_subscope_rule=pb.is_subscope_rule,
|
is_subscope_rule=pb.is_subscope_rule,
|
||||||
maec=maec_from_pb2(pb.maec),
|
maec=maec_from_pb2(pb.maec),
|
||||||
)
|
) # type: ignore
|
||||||
|
# Mypy is unable to recognise `attack` and `is_subscope_rule` as arguments due to alias
|
||||||
|
|
||||||
|
|
||||||
def doc_from_pb2(doc: capa_pb2.ResultDocument) -> rd.ResultDocument:
|
def doc_from_pb2(doc: capa_pb2.ResultDocument) -> rd.ResultDocument:
|
||||||
|
|||||||
@@ -6,7 +6,8 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import datetime
|
import datetime
|
||||||
from typing import Any, Dict, Tuple, Union, Optional
|
import collections
|
||||||
|
from typing import Any, Dict, List, Tuple, Union, Optional
|
||||||
|
|
||||||
from pydantic import Field, BaseModel
|
from pydantic import Field, BaseModel
|
||||||
|
|
||||||
@@ -125,6 +126,41 @@ class Metadata(FrozenModel):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def to_capa(self) -> Dict[str, Any]:
|
||||||
|
capa_meta = {
|
||||||
|
"timestamp": self.timestamp.isoformat(),
|
||||||
|
"version": self.version,
|
||||||
|
"sample": {
|
||||||
|
"md5": self.sample.md5,
|
||||||
|
"sha1": self.sample.sha1,
|
||||||
|
"sha256": self.sample.sha256,
|
||||||
|
"path": self.sample.path,
|
||||||
|
},
|
||||||
|
"analysis": {
|
||||||
|
"format": self.analysis.format,
|
||||||
|
"arch": self.analysis.arch,
|
||||||
|
"os": self.analysis.os,
|
||||||
|
"extractor": self.analysis.extractor,
|
||||||
|
"rules": self.analysis.rules,
|
||||||
|
"base_address": self.analysis.base_address.to_capa(),
|
||||||
|
"layout": {
|
||||||
|
"functions": {
|
||||||
|
f.address.to_capa(): {
|
||||||
|
"matched_basic_blocks": [bb.address.to_capa() for bb in f.matched_basic_blocks]
|
||||||
|
}
|
||||||
|
for f in self.analysis.layout.functions
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"feature_counts": {
|
||||||
|
"file": self.analysis.feature_counts.file,
|
||||||
|
"functions": {fc.address.to_capa(): fc.count for fc in self.analysis.feature_counts.functions},
|
||||||
|
},
|
||||||
|
"library_functions": {lf.address.to_capa(): lf.name for lf in self.analysis.library_functions},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return capa_meta
|
||||||
|
|
||||||
|
|
||||||
class CompoundStatementType:
|
class CompoundStatementType:
|
||||||
AND = "and"
|
AND = "and"
|
||||||
@@ -227,6 +263,54 @@ def node_from_capa(node: Union[capa.engine.Statement, capa.engine.Feature]) -> N
|
|||||||
assert_never(node)
|
assert_never(node)
|
||||||
|
|
||||||
|
|
||||||
|
def node_to_capa(
|
||||||
|
node: Node, children: List[Union[capa.engine.Statement, capa.engine.Feature]]
|
||||||
|
) -> Union[capa.engine.Statement, capa.engine.Feature]:
|
||||||
|
if isinstance(node, StatementNode):
|
||||||
|
if isinstance(node.statement, CompoundStatement):
|
||||||
|
if node.statement.type == CompoundStatementType.AND:
|
||||||
|
return capa.engine.And(description=node.statement.description, children=children)
|
||||||
|
|
||||||
|
elif node.statement.type == CompoundStatementType.OR:
|
||||||
|
return capa.engine.Or(description=node.statement.description, children=children)
|
||||||
|
|
||||||
|
elif node.statement.type == CompoundStatementType.NOT:
|
||||||
|
return capa.engine.Not(description=node.statement.description, child=children[0])
|
||||||
|
|
||||||
|
elif node.statement.type == CompoundStatementType.OPTIONAL:
|
||||||
|
return capa.engine.Some(description=node.statement.description, count=0, children=children)
|
||||||
|
|
||||||
|
else:
|
||||||
|
assert_never(node.statement.type)
|
||||||
|
|
||||||
|
elif isinstance(node.statement, SomeStatement):
|
||||||
|
return capa.engine.Some(
|
||||||
|
description=node.statement.description, count=node.statement.count, children=children
|
||||||
|
)
|
||||||
|
|
||||||
|
elif isinstance(node.statement, RangeStatement):
|
||||||
|
return capa.engine.Range(
|
||||||
|
description=node.statement.description,
|
||||||
|
min=node.statement.min,
|
||||||
|
max=node.statement.max,
|
||||||
|
child=node.statement.child.to_capa(),
|
||||||
|
)
|
||||||
|
|
||||||
|
elif isinstance(node.statement, SubscopeStatement):
|
||||||
|
return capa.engine.Subscope(
|
||||||
|
description=node.statement.description, scope=node.statement.scope, child=children[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
assert_never(node.statement)
|
||||||
|
|
||||||
|
elif isinstance(node, FeatureNode):
|
||||||
|
return node.feature.to_capa()
|
||||||
|
|
||||||
|
else:
|
||||||
|
assert_never(node)
|
||||||
|
|
||||||
|
|
||||||
class Match(FrozenModel):
|
class Match(FrozenModel):
|
||||||
"""
|
"""
|
||||||
args:
|
args:
|
||||||
@@ -359,6 +443,39 @@ class Match(FrozenModel):
|
|||||||
captures={capture: tuple(captures[capture]) for capture in captures},
|
captures={capture: tuple(captures[capture]) for capture in captures},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def to_capa(self, rules_by_name: Dict[str, capa.rules.Rule]) -> capa.engine.Result:
|
||||||
|
children = [child.to_capa(rules_by_name) for child in self.children]
|
||||||
|
statement = node_to_capa(self.node, [child.statement for child in children])
|
||||||
|
|
||||||
|
if isinstance(self.node, FeatureNode):
|
||||||
|
feature = self.node.feature
|
||||||
|
|
||||||
|
if isinstance(feature, (frzf.SubstringFeature, frzf.RegexFeature)):
|
||||||
|
matches = {capture: {loc.to_capa() for loc in locs} for capture, locs in self.captures.items()}
|
||||||
|
|
||||||
|
if isinstance(feature, frzf.SubstringFeature):
|
||||||
|
assert isinstance(statement, capa.features.common.Substring)
|
||||||
|
statement = capa.features.common._MatchedSubstring(statement, matches)
|
||||||
|
elif isinstance(feature, frzf.RegexFeature):
|
||||||
|
assert isinstance(statement, capa.features.common.Regex)
|
||||||
|
statement = capa.features.common._MatchedRegex(statement, matches)
|
||||||
|
else:
|
||||||
|
assert_never(feature)
|
||||||
|
|
||||||
|
# apparently we don't have to fixup match and subscope entries here.
|
||||||
|
# at least, default, verbose, and vverbose renderers seem to work well without any special handling here.
|
||||||
|
#
|
||||||
|
# children contains a single tree of results, corresponding to the logic of the matched rule.
|
||||||
|
# self.node.feature.match contains the name of the rule that was matched.
|
||||||
|
# so its all available to reconstruct, if necessary.
|
||||||
|
|
||||||
|
return capa.features.common.Result(
|
||||||
|
success=self.success,
|
||||||
|
statement=statement,
|
||||||
|
locations={loc.to_capa() for loc in self.locations},
|
||||||
|
children=children,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_parts_id(s: str):
|
def parse_parts_id(s: str):
|
||||||
id_ = ""
|
id_ = ""
|
||||||
@@ -543,3 +660,22 @@ class ResultDocument(FrozenModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
return ResultDocument(meta=Metadata.from_capa(meta), rules=rule_matches)
|
return ResultDocument(meta=Metadata.from_capa(meta), rules=rule_matches)
|
||||||
|
|
||||||
|
def to_capa(self) -> Tuple[Dict, Dict]:
|
||||||
|
meta = self.meta.to_capa()
|
||||||
|
capabilities: Dict[
|
||||||
|
str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]
|
||||||
|
] = collections.defaultdict(list)
|
||||||
|
|
||||||
|
# this doesn't quite work because we don't have the rule source for rules that aren't matched.
|
||||||
|
rules_by_name = {
|
||||||
|
rule_name: capa.rules.Rule.from_yaml(rule_match.source) for rule_name, rule_match in self.rules.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
for rule_name, rule_match in self.rules.items():
|
||||||
|
for addr, match in rule_match.matches:
|
||||||
|
result: capa.engine.Result = match.to_capa(rules_by_name)
|
||||||
|
|
||||||
|
capabilities[rule_name].append((addr.to_capa(), result))
|
||||||
|
|
||||||
|
return meta, capabilities
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ import capa.render.result_document as rd
|
|||||||
|
|
||||||
def bold(s: str) -> str:
|
def bold(s: str) -> str:
|
||||||
"""draw attention to the given string"""
|
"""draw attention to the given string"""
|
||||||
return termcolor.colored(s, "blue")
|
return termcolor.colored(s, "cyan")
|
||||||
|
|
||||||
|
|
||||||
def bold2(s: str) -> str:
|
def bold2(s: str) -> str:
|
||||||
|
|||||||
2
rules
2
rules
Submodule rules updated: 4ca8023363...a10ccf3fd8
8
setup.py
8
setup.py
@@ -27,8 +27,8 @@ requirements = [
|
|||||||
"pyelftools==0.29",
|
"pyelftools==0.29",
|
||||||
"dnfile==0.13.0",
|
"dnfile==0.13.0",
|
||||||
"dncil==1.0.2",
|
"dncil==1.0.2",
|
||||||
"pydantic==1.10.6",
|
"pydantic==1.10.7",
|
||||||
"protobuf==4.21.12",
|
"protobuf==4.22.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
# this sets __version__
|
# this sets __version__
|
||||||
@@ -71,7 +71,7 @@ setuptools.setup(
|
|||||||
"dev": [
|
"dev": [
|
||||||
"pytest==7.1.3",
|
"pytest==7.1.3",
|
||||||
"pytest-sugar==0.9.4",
|
"pytest-sugar==0.9.4",
|
||||||
"pytest-instafail==0.4.2",
|
"pytest-instafail==0.5.0",
|
||||||
"pytest-cov==4.0.0",
|
"pytest-cov==4.0.0",
|
||||||
"pycodestyle==2.10.0",
|
"pycodestyle==2.10.0",
|
||||||
"ruff==0.0.259",
|
"ruff==0.0.259",
|
||||||
@@ -90,7 +90,7 @@ setuptools.setup(
|
|||||||
"types-termcolor==1.1.4",
|
"types-termcolor==1.1.4",
|
||||||
"types-psutil==5.8.23",
|
"types-psutil==5.8.23",
|
||||||
"types_requests==2.28.1",
|
"types_requests==2.28.1",
|
||||||
"types-protobuf==4.21.0.5",
|
"types-protobuf==4.22.0.1",
|
||||||
],
|
],
|
||||||
"build": [
|
"build": [
|
||||||
"pyinstaller==5.9.0",
|
"pyinstaller==5.9.0",
|
||||||
|
|||||||
Submodule tests/data updated: 3cbd7768c2...9b302d4bcf
@@ -241,6 +241,8 @@ def get_data_path_by_name(name):
|
|||||||
return os.path.join(CD, "data", "kernel32-64.dll_")
|
return os.path.join(CD, "data", "kernel32-64.dll_")
|
||||||
elif name == "pma01-01":
|
elif name == "pma01-01":
|
||||||
return os.path.join(CD, "data", "Practical Malware Analysis Lab 01-01.dll_")
|
return os.path.join(CD, "data", "Practical Malware Analysis Lab 01-01.dll_")
|
||||||
|
elif name == "pma01-01-rd":
|
||||||
|
return os.path.join(CD, "data", "rd", "Practical Malware Analysis Lab 01-01.dll_.json")
|
||||||
elif name == "pma12-04":
|
elif name == "pma12-04":
|
||||||
return os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
|
return os.path.join(CD, "data", "Practical Malware Analysis Lab 12-04.exe_")
|
||||||
elif name == "pma16-01":
|
elif name == "pma16-01":
|
||||||
@@ -307,6 +309,8 @@ def get_data_path_by_name(name):
|
|||||||
return os.path.join(CD, "data", "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_")
|
return os.path.join(CD, "data", "bf7a9c8bdfa6d47e01ad2b056264acc3fd90cf43fe0ed8deec93ab46b47d76cb.elf_")
|
||||||
elif name.startswith("294b8d"):
|
elif name.startswith("294b8d"):
|
||||||
return os.path.join(CD, "data", "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_")
|
return os.path.join(CD, "data", "294b8db1f2702b60fb2e42fdc50c2cee6a5046112da9a5703a548a4fa50477bc.elf_")
|
||||||
|
elif name.startswith("2bf18d"):
|
||||||
|
return os.path.join(CD, "data", "2bf18d0403677378adad9001b1243211.elf_")
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"unexpected sample fixture: {name}")
|
raise ValueError(f"unexpected sample fixture: {name}")
|
||||||
|
|
||||||
@@ -365,6 +369,8 @@ def get_sample_md5_by_name(name):
|
|||||||
elif name.startswith("294b8d"):
|
elif name.startswith("294b8d"):
|
||||||
# file name is SHA256 hash
|
# file name is SHA256 hash
|
||||||
return "3db3e55b16a7b1b1afb970d5e77c5d98"
|
return "3db3e55b16a7b1b1afb970d5e77c5d98"
|
||||||
|
elif name.startswith("2bf18d"):
|
||||||
|
return "2bf18d0403677378adad9001b1243211"
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"unexpected sample fixture: {name}")
|
raise ValueError(f"unexpected sample fixture: {name}")
|
||||||
|
|
||||||
|
|||||||
@@ -467,3 +467,12 @@ def test_main_dotnet4(_039a6_dotnetfile_extractor):
|
|||||||
# tests successful execution and one rendering
|
# tests successful execution and one rendering
|
||||||
path = _039a6_dotnetfile_extractor.path
|
path = _039a6_dotnetfile_extractor.path
|
||||||
assert capa.main.main([path, "-vv"]) == 0
|
assert capa.main.main([path, "-vv"]) == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_main_rd():
|
||||||
|
path = fixtures.get_data_path_by_name("pma01-01-rd")
|
||||||
|
assert capa.main.main([path, "-vv"]) == 0
|
||||||
|
assert capa.main.main([path, "-v"]) == 0
|
||||||
|
assert capa.main.main([path, "-j"]) == 0
|
||||||
|
assert capa.main.main([path, "-q"]) == 0
|
||||||
|
assert capa.main.main([path]) == 0
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ def test_elf_sh_notes():
|
|||||||
# guess: sh notes: OS.LINUX
|
# guess: sh notes: OS.LINUX
|
||||||
# guess: linker: None
|
# guess: linker: None
|
||||||
# guess: ABI versions needed: None
|
# guess: ABI versions needed: None
|
||||||
|
# guess: symtab: None
|
||||||
# guess: needed dependencies: None
|
# guess: needed dependencies: None
|
||||||
path = get_data_path_by_name("2f7f5f")
|
path = get_data_path_by_name("2f7f5f")
|
||||||
with open(path, "rb") as f:
|
with open(path, "rb") as f:
|
||||||
@@ -32,6 +33,7 @@ def test_elf_pt_notes():
|
|||||||
# guess: sh notes: OS.LINUX
|
# guess: sh notes: OS.LINUX
|
||||||
# guess: linker: OS.LINUX
|
# guess: linker: OS.LINUX
|
||||||
# guess: ABI versions needed: OS.LINUX
|
# guess: ABI versions needed: OS.LINUX
|
||||||
|
# guess: symtab: None
|
||||||
# guess: needed dependencies: None
|
# guess: needed dependencies: None
|
||||||
path = get_data_path_by_name("7351f.elf")
|
path = get_data_path_by_name("7351f.elf")
|
||||||
with open(path, "rb") as f:
|
with open(path, "rb") as f:
|
||||||
@@ -44,6 +46,7 @@ def test_elf_so_needed():
|
|||||||
# guess: sh notes: OS.HURD
|
# guess: sh notes: OS.HURD
|
||||||
# guess: linker: None
|
# guess: linker: None
|
||||||
# guess: ABI versions needed: OS.HURD
|
# guess: ABI versions needed: OS.HURD
|
||||||
|
# guess: symtab: None
|
||||||
# guess: needed dependencies: OS.HURD
|
# guess: needed dependencies: OS.HURD
|
||||||
path = get_data_path_by_name("b5f052")
|
path = get_data_path_by_name("b5f052")
|
||||||
with open(path, "rb") as f:
|
with open(path, "rb") as f:
|
||||||
@@ -56,7 +59,21 @@ def test_elf_abi_version_hurd():
|
|||||||
# guess: sh notes: OS.HURD
|
# guess: sh notes: OS.HURD
|
||||||
# guess: linker: None
|
# guess: linker: None
|
||||||
# guess: ABI versions needed: OS.HURD
|
# guess: ABI versions needed: OS.HURD
|
||||||
|
# guess: symtab: None
|
||||||
# guess: needed dependencies: None
|
# guess: needed dependencies: None
|
||||||
path = get_data_path_by_name("bf7a9c")
|
path = get_data_path_by_name("bf7a9c")
|
||||||
with open(path, "rb") as f:
|
with open(path, "rb") as f:
|
||||||
assert capa.features.extractors.elf.detect_elf_os(f) == "hurd"
|
assert capa.features.extractors.elf.detect_elf_os(f) == "hurd"
|
||||||
|
|
||||||
|
|
||||||
|
def test_elf_symbol_table():
|
||||||
|
# guess: osabi: None
|
||||||
|
# guess: ph notes: None
|
||||||
|
# guess: sh notes: None
|
||||||
|
# guess: linker: None
|
||||||
|
# guess: ABI versions needed: None
|
||||||
|
# guess: symtab: OS.LINUX
|
||||||
|
# guess: needed dependencies: None
|
||||||
|
path = get_data_path_by_name("2bf18d")
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
import copy
|
import copy
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import fixtures
|
||||||
from fixtures import *
|
from fixtures import *
|
||||||
|
|
||||||
import capa
|
import capa
|
||||||
@@ -268,3 +269,18 @@ def assert_round_trip(rd: rdoc.ResultDocument):
|
|||||||
def test_round_trip(request, rd_file):
|
def test_round_trip(request, rd_file):
|
||||||
rd: rdoc.ResultDocument = request.getfixturevalue(rd_file)
|
rd: rdoc.ResultDocument = request.getfixturevalue(rd_file)
|
||||||
assert_round_trip(rd)
|
assert_round_trip(rd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_to_rdoc():
|
||||||
|
path = fixtures.get_data_path_by_name("pma01-01-rd")
|
||||||
|
assert isinstance(rdoc.ResultDocument.parse_file(path), rdoc.ResultDocument)
|
||||||
|
|
||||||
|
|
||||||
|
def test_rdoc_to_capa():
|
||||||
|
path = fixtures.get_data_path_by_name("pma01-01-rd")
|
||||||
|
|
||||||
|
rd = rdoc.ResultDocument.parse_file(path)
|
||||||
|
|
||||||
|
meta, capabilites = rd.to_capa()
|
||||||
|
assert isinstance(meta, dict)
|
||||||
|
assert isinstance(capabilites, dict)
|
||||||
|
|||||||
@@ -81,4 +81,4 @@ def test_proto_conversion(tmpdir):
|
|||||||
p = run_program(get_script_path("proto-to-results.py"), [pb])
|
p = run_program(get_script_path("proto-to-results.py"), [pb])
|
||||||
assert p.returncode == 0
|
assert p.returncode == 0
|
||||||
|
|
||||||
assert p.stdout.startswith(b'{\n "meta": ')
|
assert p.stdout.startswith(b'{\n "meta": ') or p.stdout.startswith(b'{\r\n "meta": ')
|
||||||
|
|||||||
Reference in New Issue
Block a user