mirror of
https://github.com/mandiant/capa.git
synced 2025-12-05 20:40:05 -08:00
merge upstream
This commit is contained in:
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
@@ -51,7 +51,9 @@ jobs:
|
||||
- name: Upgrade pip, setuptools
|
||||
run: python -m pip install --upgrade pip setuptools
|
||||
- name: Install capa with build requirements
|
||||
run: pip install -e .[build]
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[build]
|
||||
- name: Build standalone executable
|
||||
run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec
|
||||
- name: Does it run (PE)?
|
||||
|
||||
1
.github/workflows/publish.yml
vendored
1
.github/workflows/publish.yml
vendored
@@ -25,6 +25,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[build]
|
||||
- name: build package
|
||||
run: |
|
||||
|
||||
20
.github/workflows/tests.yml
vendored
20
.github/workflows/tests.yml
vendored
@@ -35,7 +35,9 @@ jobs:
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install dependencies
|
||||
run: pip install -e .[dev]
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[dev,scripts]
|
||||
- name: Lint with ruff
|
||||
run: pre-commit run ruff
|
||||
- name: Lint with isort
|
||||
@@ -61,7 +63,9 @@ jobs:
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install capa
|
||||
run: pip install -e .[dev]
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[dev,scripts]
|
||||
- name: Run rule linter
|
||||
run: python scripts/lint.py rules/
|
||||
|
||||
@@ -96,7 +100,9 @@ jobs:
|
||||
if: matrix.os == 'ubuntu-20.04'
|
||||
run: sudo apt-get install -y libyaml-dev
|
||||
- name: Install capa
|
||||
run: pip install -e .[dev]
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[dev,scripts]
|
||||
- name: Run tests (fast)
|
||||
# this set of tests runs about 80% of the cases in 20% of the time,
|
||||
# and should catch most errors quickly.
|
||||
@@ -131,7 +137,9 @@ jobs:
|
||||
run: sudo apt-get install -y libyaml-dev
|
||||
- name: Install capa
|
||||
if: ${{ env.BN_SERIAL != 0 }}
|
||||
run: pip install -e .[dev]
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[dev,scripts]
|
||||
- name: install Binary Ninja
|
||||
if: ${{ env.BN_SERIAL != 0 }}
|
||||
run: |
|
||||
@@ -188,7 +196,9 @@ jobs:
|
||||
- name: Install pyyaml
|
||||
run: sudo apt-get install -y libyaml-dev
|
||||
- name: Install capa
|
||||
run: pip install -e .[dev]
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install -e .[dev,scripts]
|
||||
- name: Run tests
|
||||
run: |
|
||||
mkdir ./.github/ghidra/project
|
||||
|
||||
@@ -110,6 +110,16 @@ repos:
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: deptry
|
||||
name: deptry
|
||||
stages: [push, manual]
|
||||
language: system
|
||||
entry: deptry .
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: pytest-fast
|
||||
@@ -128,12 +138,3 @@ repos:
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: deptry
|
||||
name: deptry
|
||||
stages: [push, manual]
|
||||
language: system
|
||||
entry: deptry .
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
|
||||
59
CHANGELOG.md
59
CHANGELOG.md
@@ -1,20 +1,55 @@
|
||||
# Change Log
|
||||
|
||||
## master (unreleased)
|
||||
- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls
|
||||
|
||||
### New Features
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
### New Rules (0)
|
||||
|
||||
-
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
|
||||
### Development
|
||||
|
||||
### Raw diffs
|
||||
- [capa v7.1.0...master](https://github.com/mandiant/capa/compare/v7.1.0...master)
|
||||
- [capa-rules v7.1.0...master](https://github.com/mandiant/capa-rules/compare/v7.1.0...master)
|
||||
|
||||
## v7.1.0
|
||||
The v7.1.0 release brings large performance improvements to capa's rule matching engine.
|
||||
Additionally, we've fixed various bugs and added new features for people using and developing capa.
|
||||
|
||||
Special thanks to our repeat and new contributors:
|
||||
* @sjha2048 made their first contribution in https://github.com/mandiant/capa/pull/2000
|
||||
* @Rohit1123 made their first contribution in https://github.com/mandiant/capa/pull/1990
|
||||
* @psahithireddy made their first contribution in https://github.com/mandiant/capa/pull/2020
|
||||
* @Atlas-64 made their first contribution in https://github.com/mandiant/capa/pull/2018
|
||||
* @s-ff made their first contribution in https://github.com/mandiant/capa/pull/2011
|
||||
* @samadpls made their first contribution in https://github.com/mandiant/capa/pull/2024
|
||||
* @acelynnzhang made their first contribution in https://github.com/mandiant/capa/pull/2044
|
||||
* @RainRat made their first contribution in https://github.com/mandiant/capa/pull/2058
|
||||
* @ReversingWithMe made their first contribution in https://github.com/mandiant/capa/pull/2093
|
||||
* @malwarefrank made their first contribution in https://github.com/mandiant/capa/pull/2037
|
||||
|
||||
### New Features
|
||||
- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls
|
||||
- ELF: detect OS from statically-linked Go binaries #1978 @williballenthin
|
||||
- add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
|
||||
- document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
|
||||
- Add json to sarif conversion script @reversingwithme
|
||||
- render maec/* fields #843 @s-ff
|
||||
- replace Halo spinner with Rich #2086 @s-ff
|
||||
- optimize rule matching #2080 @williballenthin
|
||||
- add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin
|
||||
- relax dependency version requirements for the capa library #2053 @williballenthin
|
||||
- add scripts dependency group and update documentation #2145 @mr-tz
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
|
||||
### New Rules (17)
|
||||
### New Rules (25)
|
||||
|
||||
- impact/wipe-disk/delete-drive-layout-via-ioctl william.ballenthin@mandiant.com
|
||||
- host-interaction/driver/interact-with-driver-via-ioctl moritz.raabe@mandiant.com
|
||||
@@ -33,7 +68,14 @@
|
||||
- persistence/act-as-time-provider-dll jakub.jozwiak@mandiant.com
|
||||
- host-interaction/gui/window/hide/hide-graphical-window-from-taskbar jakub.jozwiak@mandiant.com
|
||||
- compiler/dart/compiled-with-dart jakub.jozwiak@mandiant.com
|
||||
-
|
||||
- nursery/bypass-hidden-api-restrictions-via-jni-on-android mehunhoff@google.com
|
||||
- nursery/get-current-process-filesystem-mounts-on-linux mehunhoff@google.com
|
||||
- nursery/get-current-process-memory-mapping-on-linux mehunhoff@google.com
|
||||
- nursery/get-system-property-on-android mehunhoff@google.com
|
||||
- nursery/hook-routines-via-lsplant mehunhoff@google.com
|
||||
- nursery/load-packed-dex-via-jiagu-on-android mehunhoff@google.com
|
||||
- nursery/modify-api-blacklist-or-denylist-via-jni-on-android mehunhoff@google.com
|
||||
- nursery/truncate-file-on-linux mehunhoff@google.com
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
@@ -43,7 +85,6 @@
|
||||
- cape: support more report formats #2035 @mr-tz
|
||||
- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
|
||||
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
- replace deprecated IDA API find_binary with bin_search #1606 @s-ff
|
||||
|
||||
@@ -58,8 +99,8 @@
|
||||
- add deptry support #1497 @s-ff
|
||||
|
||||
### Raw diffs
|
||||
- [capa v7.0.1...master](https://github.com/mandiant/capa/compare/v7.0.1...master)
|
||||
- [capa-rules v7.0.1...master](https://github.com/mandiant/capa-rules/compare/v7.0.1...master)
|
||||
- [capa v7.0.1...v7.1.0](https://github.com/mandiant/capa/compare/v7.0.1...v7.1.0)
|
||||
- [capa-rules v7.0.1...v7.1.0](https://github.com/mandiant/capa-rules/compare/v7.0.1...v7.1.0)
|
||||
|
||||
## v7.0.1
|
||||
|
||||
|
||||
@@ -409,9 +409,10 @@ class Bytes(Feature):
|
||||
# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
|
||||
ARCH_I386 = "i386"
|
||||
ARCH_AMD64 = "amd64"
|
||||
ARCH_AARCH64 = "aarch64"
|
||||
# dotnet
|
||||
ARCH_ANY = "any"
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_AARCH64, ARCH_ANY)
|
||||
|
||||
|
||||
class Arch(Feature):
|
||||
|
||||
@@ -83,7 +83,7 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str
|
||||
return None
|
||||
|
||||
try:
|
||||
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
|
||||
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get(token.rid)
|
||||
except UnicodeDecodeError as e:
|
||||
logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
|
||||
return None
|
||||
@@ -119,14 +119,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
access: Optional[str]
|
||||
|
||||
# assume .NET imports starting with get_/set_ are used to access a property
|
||||
if member_ref.Name.startswith("get_"):
|
||||
member_ref_name: str = str(member_ref.Name)
|
||||
if member_ref_name.startswith("get_"):
|
||||
access = FeatureAccess.READ
|
||||
elif member_ref.Name.startswith("set_"):
|
||||
elif member_ref_name.startswith("set_"):
|
||||
access = FeatureAccess.WRITE
|
||||
else:
|
||||
access = None
|
||||
|
||||
member_ref_name: str = member_ref.Name
|
||||
if member_ref_name.startswith(("get_", "set_")):
|
||||
# remove get_/set_ from MemberRef name
|
||||
member_ref_name = member_ref_name[4:]
|
||||
@@ -212,7 +212,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
|
||||
token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
|
||||
access: Optional[str] = accessor_map.get(token)
|
||||
|
||||
method_name: str = method.row.Name
|
||||
method_name: str = str(method.row.Name)
|
||||
if method_name.startswith(("get_", "set_")):
|
||||
# remove get_/set_
|
||||
method_name = method_name[4:]
|
||||
@@ -289,8 +289,8 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
|
||||
logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
|
||||
module = ""
|
||||
else:
|
||||
module = impl_map.ImportScope.row.Name
|
||||
method: str = impl_map.ImportName
|
||||
module = str(impl_map.ImportScope.row.Name)
|
||||
method: str = str(impl_map.ImportName)
|
||||
|
||||
member_forward_table: int
|
||||
if impl_map.MemberForwarded.table is None:
|
||||
@@ -320,8 +320,11 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
|
||||
if row_index - 1 <= 0:
|
||||
return None
|
||||
|
||||
table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(table_index)
|
||||
if table is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
table = pe.net.mdtables.tables.get(table_index, [])
|
||||
return table[row_index - 1]
|
||||
except IndexError:
|
||||
return None
|
||||
@@ -334,7 +337,7 @@ def resolve_nested_typedef_name(
|
||||
|
||||
if index in nested_class_table:
|
||||
typedef_name = []
|
||||
name = typedef.TypeName
|
||||
name = str(typedef.TypeName)
|
||||
|
||||
# Append the current typedef name
|
||||
typedef_name.append(name)
|
||||
@@ -343,24 +346,24 @@ def resolve_nested_typedef_name(
|
||||
# Iterate through the typedef table to resolve the nested name
|
||||
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
|
||||
if table_row is None:
|
||||
return typedef.TypeNamespace, tuple(typedef_name[::-1])
|
||||
return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
|
||||
|
||||
name = table_row.TypeName
|
||||
name = str(table_row.TypeName)
|
||||
typedef_name.append(name)
|
||||
index = nested_class_table[index]
|
||||
|
||||
# Document the root enclosing details
|
||||
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
|
||||
if table_row is None:
|
||||
return typedef.TypeNamespace, tuple(typedef_name[::-1])
|
||||
return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
|
||||
|
||||
enclosing_name = table_row.TypeName
|
||||
enclosing_name = str(table_row.TypeName)
|
||||
typedef_name.append(enclosing_name)
|
||||
|
||||
return table_row.TypeNamespace, tuple(typedef_name[::-1])
|
||||
return str(table_row.TypeNamespace), tuple(typedef_name[::-1])
|
||||
|
||||
else:
|
||||
return typedef.TypeNamespace, (typedef.TypeName,)
|
||||
return str(typedef.TypeNamespace), (str(typedef.TypeName),)
|
||||
|
||||
|
||||
def resolve_nested_typeref_name(
|
||||
@@ -370,29 +373,29 @@ def resolve_nested_typeref_name(
|
||||
# If the ResolutionScope decodes to a typeRef type then it is nested
|
||||
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
||||
typeref_name = []
|
||||
name = typeref.TypeName
|
||||
name = str(typeref.TypeName)
|
||||
# Not appending the current typeref name to avoid potential duplicate
|
||||
|
||||
# Validate index
|
||||
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index)
|
||||
if table_row is None:
|
||||
return typeref.TypeNamespace, (typeref.TypeName,)
|
||||
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
||||
|
||||
while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
||||
# Iterate through the typeref table to resolve the nested name
|
||||
typeref_name.append(name)
|
||||
name = table_row.TypeName
|
||||
name = str(table_row.TypeName)
|
||||
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index)
|
||||
if table_row is None:
|
||||
return typeref.TypeNamespace, tuple(typeref_name[::-1])
|
||||
return str(typeref.TypeNamespace), tuple(typeref_name[::-1])
|
||||
|
||||
# Document the root enclosing details
|
||||
typeref_name.append(table_row.TypeName)
|
||||
typeref_name.append(str(table_row.TypeName))
|
||||
|
||||
return table_row.TypeNamespace, tuple(typeref_name[::-1])
|
||||
return str(table_row.TypeNamespace), tuple(typeref_name[::-1])
|
||||
|
||||
else:
|
||||
return typeref.TypeNamespace, (typeref.TypeName,)
|
||||
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
||||
|
||||
|
||||
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
|
||||
|
||||
@@ -78,12 +78,12 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
|
||||
for _, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
|
||||
# emit internal .NET namespaces
|
||||
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
|
||||
namespaces.add(typedef.TypeNamespace)
|
||||
namespaces.add(str(typedef.TypeNamespace))
|
||||
|
||||
for _, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
|
||||
# emit external .NET namespaces
|
||||
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
|
||||
namespaces.add(typeref.TypeNamespace)
|
||||
namespaces.add(str(typeref.TypeNamespace))
|
||||
|
||||
# namespaces may be empty, discard
|
||||
namespaces.discard("")
|
||||
|
||||
@@ -58,6 +58,10 @@ class OS(str, Enum):
|
||||
SYLLABLE = "syllable"
|
||||
NACL = "nacl"
|
||||
ANDROID = "android"
|
||||
DRAGONFLYBSD = "dragonfly BSD"
|
||||
ILLUMOS = "illumos"
|
||||
ZOS = "z/os"
|
||||
UNIX = "unix"
|
||||
|
||||
|
||||
# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
|
||||
@@ -81,6 +85,8 @@ class Phdr:
|
||||
paddr: int
|
||||
filesz: int
|
||||
buf: bytes
|
||||
flags: int
|
||||
memsz: int
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -315,24 +321,23 @@ class ELF:
|
||||
phent_offset = i * self.e_phentsize
|
||||
phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize]
|
||||
|
||||
(p_type,) = struct.unpack_from(self.endian + "I", phent, 0x0)
|
||||
logger.debug("ph:p_type: 0x%04x", p_type)
|
||||
|
||||
if self.bitness == 32:
|
||||
p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "IIII", phent, 0x4)
|
||||
p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz, p_flags = struct.unpack_from(
|
||||
self.endian + "IIIIIII", phent, 0x0
|
||||
)
|
||||
elif self.bitness == 64:
|
||||
p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "QQQQ", phent, 0x8)
|
||||
p_type, p_flags, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz = struct.unpack_from(
|
||||
self.endian + "IIQQQQQ", phent, 0x0
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
|
||||
|
||||
self.f.seek(p_offset)
|
||||
buf = self.f.read(p_filesz)
|
||||
if len(buf) != p_filesz:
|
||||
raise ValueError("failed to read program header content")
|
||||
|
||||
return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf)
|
||||
return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf, p_flags, p_memsz)
|
||||
|
||||
@property
|
||||
def program_headers(self):
|
||||
@@ -357,8 +362,6 @@ class ELF:
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size)
|
||||
|
||||
self.f.seek(sh_offset)
|
||||
buf = self.f.read(sh_size)
|
||||
if len(buf) != sh_size:
|
||||
@@ -867,6 +870,8 @@ def guess_os_from_ident_directive(elf: ELF) -> Optional[OS]:
|
||||
return OS.LINUX
|
||||
elif "Red Hat" in comment:
|
||||
return OS.LINUX
|
||||
elif "Alpine" in comment:
|
||||
return OS.LINUX
|
||||
elif "Android" in comment:
|
||||
return OS.ANDROID
|
||||
|
||||
@@ -952,11 +957,506 @@ def guess_os_from_symtab(elf: ELF) -> Optional[OS]:
|
||||
|
||||
for os, hints in keywords.items():
|
||||
if any(hint in sym_name for hint in hints):
|
||||
logger.debug("symtab: %s looks like %s", sym_name, os)
|
||||
return os
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def is_go_binary(elf: ELF) -> bool:
|
||||
for shdr in elf.section_headers:
|
||||
if shdr.get_name(elf) == ".note.go.buildid":
|
||||
logger.debug("go buildinfo: found section .note.go.buildid")
|
||||
return True
|
||||
|
||||
# The `go version` command enumerates sections for the name `.go.buildinfo`
|
||||
# (in addition to looking for the BUILDINFO_MAGIC) to check if an executable is go or not.
|
||||
# See references to the `errNotGoExe` error here:
|
||||
# https://github.com/golang/go/blob/master/src/debug/buildinfo/buildinfo.go#L41
|
||||
for shdr in elf.section_headers:
|
||||
if shdr.get_name(elf) == ".go.buildinfo":
|
||||
logger.debug("go buildinfo: found section .go.buildinfo")
|
||||
return True
|
||||
|
||||
# other strategy used by FLOSS: search for known runtime strings.
|
||||
# https://github.com/mandiant/flare-floss/blob/b2ca8adfc5edf278861dd6bff67d73da39683b46/floss/language/identify.py#L88
|
||||
return False
|
||||
|
||||
|
||||
def get_go_buildinfo_data(elf: ELF) -> Optional[bytes]:
|
||||
for shdr in elf.section_headers:
|
||||
if shdr.get_name(elf) == ".go.buildinfo":
|
||||
logger.debug("go buildinfo: found section .go.buildinfo")
|
||||
return shdr.buf
|
||||
|
||||
PT_LOAD = 0x1
|
||||
PF_X = 1
|
||||
PF_W = 2
|
||||
for phdr in elf.program_headers:
|
||||
if phdr.type != PT_LOAD:
|
||||
continue
|
||||
|
||||
if (phdr.flags & (PF_X | PF_W)) == PF_W:
|
||||
logger.debug("go buildinfo: found data segment")
|
||||
return phdr.buf
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def read_data(elf: ELF, rva: int, size: int) -> Optional[bytes]:
|
||||
# ELF segments are for runtime data,
|
||||
# ELF sections are for link-time data.
|
||||
# So we want to read Program Headers/Segments.
|
||||
for phdr in elf.program_headers:
|
||||
if phdr.vaddr <= rva < phdr.vaddr + phdr.memsz:
|
||||
segment_data = phdr.buf
|
||||
|
||||
# pad the section with NULLs
|
||||
# assume page alignment is already handled.
|
||||
# might need more hardening here.
|
||||
if len(segment_data) < phdr.memsz:
|
||||
segment_data += b"\x00" * (phdr.memsz - len(segment_data))
|
||||
|
||||
segment_offset = rva - phdr.vaddr
|
||||
return segment_data[segment_offset : segment_offset + size]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
|
||||
if elf.bitness == 32:
|
||||
struct_size = 8
|
||||
struct_format = elf.endian + "II"
|
||||
elif elf.bitness == 64:
|
||||
struct_size = 16
|
||||
struct_format = elf.endian + "QQ"
|
||||
else:
|
||||
raise ValueError("invalid psize")
|
||||
|
||||
struct_buf = read_data(elf, rva, struct_size)
|
||||
if not struct_buf:
|
||||
return None
|
||||
|
||||
addr, length = struct.unpack_from(struct_format, struct_buf, 0)
|
||||
|
||||
return read_data(elf, addr, length)
|
||||
|
||||
|
||||
def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
|
||||
"""
|
||||
In a binary compiled by Go, the buildinfo structure may contain
|
||||
metadata about the build environment, including the configured
|
||||
GOOS, which specifies the target operating system.
|
||||
|
||||
Search for and parse the buildinfo structure,
|
||||
which may be found in the .go.buildinfo section,
|
||||
and often contains this metadata inline. Otherwise,
|
||||
follow a few byte slices to the relevant information.
|
||||
|
||||
This strategy is derived from GoReSym.
|
||||
"""
|
||||
buf = get_go_buildinfo_data(elf)
|
||||
if not buf:
|
||||
logger.debug("go buildinfo: no buildinfo section")
|
||||
return None
|
||||
|
||||
assert isinstance(buf, bytes)
|
||||
|
||||
# The build info blob left by the linker is identified by
|
||||
# a 16-byte header, consisting of:
|
||||
# - buildInfoMagic (14 bytes),
|
||||
# - the binary's pointer size (1 byte), and
|
||||
# - whether the binary is big endian (1 byte).
|
||||
#
|
||||
# Then:
|
||||
# - virtual address to Go string: runtime.buildVersion
|
||||
# - virtual address to Go string: runtime.modinfo
|
||||
#
|
||||
# On 32-bit platforms, the last 8 bytes are unused.
|
||||
#
|
||||
# If the endianness has the 2 bit set, then the pointers are zero,
|
||||
# and the 32-byte header is followed by varint-prefixed string data
|
||||
# for the two string values we care about.
|
||||
# https://github.com/mandiant/GoReSym/blob/0860a1b1b4f3495e9fb7e71eb4386bf3e0a7c500/buildinfo/buildinfo.go#L185-L193
|
||||
BUILDINFO_MAGIC = b"\xFF Go buildinf:"
|
||||
|
||||
try:
|
||||
index = buf.index(BUILDINFO_MAGIC)
|
||||
except ValueError:
|
||||
logger.debug("go buildinfo: no buildinfo magic")
|
||||
return None
|
||||
|
||||
psize, flags = struct.unpack_from("<bb", buf, index + len(BUILDINFO_MAGIC))
|
||||
assert psize in (4, 8)
|
||||
is_big_endian = flags & 0b01
|
||||
has_inline_strings = flags & 0b10
|
||||
logger.debug("go buildinfo: psize: %d big endian: %s inline: %s", psize, is_big_endian, has_inline_strings)
|
||||
|
||||
GOOS_TO_OS = {
|
||||
b"aix": OS.AIX,
|
||||
b"android": OS.ANDROID,
|
||||
b"dragonfly": OS.DRAGONFLYBSD,
|
||||
b"freebsd": OS.FREEBSD,
|
||||
b"hurd": OS.HURD,
|
||||
b"illumos": OS.ILLUMOS,
|
||||
b"linux": OS.LINUX,
|
||||
b"netbsd": OS.NETBSD,
|
||||
b"openbsd": OS.OPENBSD,
|
||||
b"solaris": OS.SOLARIS,
|
||||
b"zos": OS.ZOS,
|
||||
b"windows": None, # PE format
|
||||
b"plan9": None, # a.out format
|
||||
b"ios": None, # Mach-O format
|
||||
b"darwin": None, # Mach-O format
|
||||
b"nacl": None, # dropped in GO 1.14
|
||||
b"js": None,
|
||||
}
|
||||
|
||||
if has_inline_strings:
|
||||
# This is the common case/path. Most samples will have an inline GOOS string.
|
||||
#
|
||||
# To find samples on VT, use these VTGrep searches:
|
||||
#
|
||||
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 02}
|
||||
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 02}
|
||||
|
||||
# If present, the GOOS key will be found within
|
||||
# the current buildinfo data region.
|
||||
#
|
||||
# Brute force the k-v pair, like `GOOS=linux`,
|
||||
# rather than try to parse the data, which would be fragile.
|
||||
for key, os in GOOS_TO_OS.items():
|
||||
if (b"GOOS=" + key) in buf:
|
||||
logger.debug("go buildinfo: found os: %s", os)
|
||||
return os
|
||||
else:
|
||||
# This is the uncommon path. Most samples will have an inline GOOS string.
|
||||
#
|
||||
# To find samples on VT, use the referenced VTGrep content searches.
|
||||
info_format = {
|
||||
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
|
||||
# like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
|
||||
# in which the modinfo doesn't have GOOS.
|
||||
(4, False): "<II",
|
||||
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
|
||||
# like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
|
||||
# in which the modinfo doesn't have GOOS.
|
||||
(8, False): "<QQ",
|
||||
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
|
||||
# (no matches on VT today)
|
||||
(4, True): ">II",
|
||||
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
|
||||
# like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
|
||||
# in which the modinfo doesn't have GOOS.
|
||||
(8, True): ">QQ",
|
||||
}
|
||||
|
||||
build_version_address, modinfo_address = struct.unpack_from(
|
||||
info_format[(psize, is_big_endian)], buf, index + 0x10
|
||||
)
|
||||
logger.debug("go buildinfo: build version address: 0x%x", build_version_address)
|
||||
logger.debug("go buildinfo: modinfo address: 0x%x", modinfo_address)
|
||||
|
||||
build_version = read_go_slice(elf, build_version_address)
|
||||
if build_version:
|
||||
logger.debug("go buildinfo: build version: %s", build_version.decode("utf-8"))
|
||||
|
||||
modinfo = read_go_slice(elf, modinfo_address)
|
||||
if modinfo:
|
||||
if modinfo[-0x11] == ord("\n"):
|
||||
# Strip module framing: sentinel strings delimiting the module info.
|
||||
# These are cmd/go/internal/modload/build.infoStart and infoEnd.
|
||||
# Which should probably be:
|
||||
# infoStart, _ = hex.DecodeString("3077af0c9274080241e1c107e6d618e6")
|
||||
# infoEnd, _ = hex.DecodeString("f932433186182072008242104116d8f2")
|
||||
modinfo = modinfo[0x10:-0x10]
|
||||
logger.debug("go buildinfo: modinfo: %s", modinfo.decode("utf-8"))
|
||||
|
||||
if not modinfo:
|
||||
return None
|
||||
|
||||
for key, os in GOOS_TO_OS.items():
|
||||
# Brute force the k-v pair, like `GOOS=linux`,
|
||||
# rather than try to parse the data, which would be fragile.
|
||||
if (b"GOOS=" + key) in modinfo:
|
||||
logger.debug("go buildinfo: found os: %s", os)
|
||||
return os
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def guess_os_from_go_source(elf: ELF) -> Optional[OS]:
|
||||
"""
|
||||
In a binary compiled by Go, runtime metadata may contain
|
||||
references to the source filenames, including the
|
||||
src/runtime/os_* files, whose name indicates the
|
||||
target operating system.
|
||||
|
||||
Confirm the given ELF seems to be built by Go,
|
||||
and then look for strings that look like
|
||||
Go source filenames.
|
||||
|
||||
This strategy is derived from GoReSym.
|
||||
"""
|
||||
if not is_go_binary(elf):
|
||||
return None
|
||||
|
||||
for phdr in elf.program_headers:
|
||||
buf = phdr.buf
|
||||
NEEDLE_OS = b"/src/runtime/os_"
|
||||
try:
|
||||
index = buf.index(NEEDLE_OS)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
rest = buf[index + len(NEEDLE_OS) : index + len(NEEDLE_OS) + 32]
|
||||
filename = rest.partition(b".go")[0].decode("utf-8")
|
||||
logger.debug("go source: filename: /src/runtime/os_%s.go", filename)
|
||||
|
||||
# via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
|
||||
# candidates today:
|
||||
# - aix
|
||||
# - android
|
||||
# - darwin
|
||||
# - darwin_arm64
|
||||
# - dragonfly
|
||||
# - freebsd
|
||||
# - freebsd2
|
||||
# - freebsd_amd64
|
||||
# - freebsd_arm
|
||||
# - freebsd_arm64
|
||||
# - freebsd_noauxv
|
||||
# - freebsd_riscv64
|
||||
# - illumos
|
||||
# - js
|
||||
# - linux
|
||||
# - linux_arm
|
||||
# - linux_arm64
|
||||
# - linux_be64
|
||||
# - linux_generic
|
||||
# - linux_loong64
|
||||
# - linux_mips64x
|
||||
# - linux_mipsx
|
||||
# - linux_noauxv
|
||||
# - linux_novdso
|
||||
# - linux_ppc64x
|
||||
# - linux_riscv64
|
||||
# - linux_s390x
|
||||
# - linux_x86
|
||||
# - netbsd
|
||||
# - netbsd_386
|
||||
# - netbsd_amd64
|
||||
# - netbsd_arm
|
||||
# - netbsd_arm64
|
||||
# - nonopenbsd
|
||||
# - only_solaris
|
||||
# - openbsd
|
||||
# - openbsd_arm
|
||||
# - openbsd_arm64
|
||||
# - openbsd_libc
|
||||
# - openbsd_mips64
|
||||
# - openbsd_syscall
|
||||
# - openbsd_syscall1
|
||||
# - openbsd_syscall2
|
||||
# - plan9
|
||||
# - plan9_arm
|
||||
# - solaris
|
||||
# - unix
|
||||
# - unix_nonlinux
|
||||
# - wasip1
|
||||
# - wasm
|
||||
# - windows
|
||||
# - windows_arm
|
||||
# - windows_arm64
|
||||
|
||||
OS_FILENAME_TO_OS = {
|
||||
"aix": OS.AIX,
|
||||
"android": OS.ANDROID,
|
||||
"dragonfly": OS.DRAGONFLYBSD,
|
||||
"freebsd": OS.FREEBSD,
|
||||
"freebsd2": OS.FREEBSD,
|
||||
"freebsd_": OS.FREEBSD,
|
||||
"illumos": OS.ILLUMOS,
|
||||
"linux": OS.LINUX,
|
||||
"netbsd": OS.NETBSD,
|
||||
"only_solaris": OS.SOLARIS,
|
||||
"openbsd": OS.OPENBSD,
|
||||
"solaris": OS.SOLARIS,
|
||||
"unix_nonlinux": OS.UNIX,
|
||||
}
|
||||
|
||||
for prefix, os in OS_FILENAME_TO_OS.items():
|
||||
if filename.startswith(prefix):
|
||||
return os
|
||||
|
||||
for phdr in elf.program_headers:
|
||||
buf = phdr.buf
|
||||
NEEDLE_RT0 = b"/src/runtime/rt0_"
|
||||
try:
|
||||
index = buf.index(NEEDLE_RT0)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
rest = buf[index + len(NEEDLE_RT0) : index + len(NEEDLE_RT0) + 32]
|
||||
filename = rest.partition(b".s")[0].decode("utf-8")
|
||||
logger.debug("go source: filename: /src/runtime/rt0_%s.s", filename)
|
||||
|
||||
# via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
|
||||
# candidates today:
|
||||
# - aix_ppc64
|
||||
# - android_386
|
||||
# - android_amd64
|
||||
# - android_arm
|
||||
# - android_arm64
|
||||
# - darwin_amd64
|
||||
# - darwin_arm64
|
||||
# - dragonfly_amd64
|
||||
# - freebsd_386
|
||||
# - freebsd_amd64
|
||||
# - freebsd_arm
|
||||
# - freebsd_arm64
|
||||
# - freebsd_riscv64
|
||||
# - illumos_amd64
|
||||
# - ios_amd64
|
||||
# - ios_arm64
|
||||
# - js_wasm
|
||||
# - linux_386
|
||||
# - linux_amd64
|
||||
# - linux_arm
|
||||
# - linux_arm64
|
||||
# - linux_loong64
|
||||
# - linux_mips64x
|
||||
# - linux_mipsx
|
||||
# - linux_ppc64
|
||||
# - linux_ppc64le
|
||||
# - linux_riscv64
|
||||
# - linux_s390x
|
||||
# - netbsd_386
|
||||
# - netbsd_amd64
|
||||
# - netbsd_arm
|
||||
# - netbsd_arm64
|
||||
# - openbsd_386
|
||||
# - openbsd_amd64
|
||||
# - openbsd_arm
|
||||
# - openbsd_arm64
|
||||
# - openbsd_mips64
|
||||
# - openbsd_ppc64
|
||||
# - openbsd_riscv64
|
||||
# - plan9_386
|
||||
# - plan9_amd64
|
||||
# - plan9_arm
|
||||
# - solaris_amd64
|
||||
# - wasip1_wasm
|
||||
# - windows_386
|
||||
# - windows_amd64
|
||||
# - windows_arm
|
||||
# - windows_arm64
|
||||
|
||||
RT0_FILENAME_TO_OS = {
|
||||
"aix": OS.AIX,
|
||||
"android": OS.ANDROID,
|
||||
"dragonfly": OS.DRAGONFLYBSD,
|
||||
"freebsd": OS.FREEBSD,
|
||||
"illumos": OS.ILLUMOS,
|
||||
"linux": OS.LINUX,
|
||||
"netbsd": OS.NETBSD,
|
||||
"openbsd": OS.OPENBSD,
|
||||
"solaris": OS.SOLARIS,
|
||||
}
|
||||
|
||||
for prefix, os in RT0_FILENAME_TO_OS.items():
|
||||
if filename.startswith(prefix):
|
||||
return os
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def guess_os_from_vdso_strings(elf: ELF) -> Optional[OS]:
|
||||
"""
|
||||
The "vDSO" (virtual dynamic shared object) is a small shared
|
||||
library that the kernel automatically maps into the address space
|
||||
of all user-space applications.
|
||||
|
||||
Some statically linked executables include small dynamic linker
|
||||
routines that finds these vDSO symbols, using the ASCII
|
||||
symbol name and version. We can therefore recognize the pairs
|
||||
(symbol, version) to guess the binary targets Linux.
|
||||
"""
|
||||
for phdr in elf.program_headers:
|
||||
buf = phdr.buf
|
||||
|
||||
# We don't really use the arch, but its interesting for documentation
|
||||
# I suppose we could restrict the arch here to what's in the ELF header,
|
||||
# but that's even more work. Let's see if this is sufficient.
|
||||
for arch, symbol, version in (
|
||||
# via: https://man7.org/linux/man-pages/man7/vdso.7.html
|
||||
("arm", b"__vdso_gettimeofday", b"LINUX_2.6"),
|
||||
("arm", b"__vdso_clock_gettime", b"LINUX_2.6"),
|
||||
("aarch64", b"__kernel_rt_sigreturn", b"LINUX_2.6.39"),
|
||||
("aarch64", b"__kernel_gettimeofday", b"LINUX_2.6.39"),
|
||||
("aarch64", b"__kernel_clock_gettime", b"LINUX_2.6.39"),
|
||||
("aarch64", b"__kernel_clock_getres", b"LINUX_2.6.39"),
|
||||
("mips", b"__kernel_gettimeofday", b"LINUX_2.6"),
|
||||
("mips", b"__kernel_clock_gettime", b"LINUX_2.6"),
|
||||
("ia64", b"__kernel_sigtramp", b"LINUX_2.5"),
|
||||
("ia64", b"__kernel_syscall_via_break", b"LINUX_2.5"),
|
||||
("ia64", b"__kernel_syscall_via_epc", b"LINUX_2.5"),
|
||||
("ppc/32", b"__kernel_clock_getres", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_clock_gettime64", b"LINUX_5.11"),
|
||||
("ppc/32", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_getcpu", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_sigtramp_rt32", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_sigtramp32", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
|
||||
("ppc/32", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_clock_getres", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_getcpu", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_sigtramp_rt64", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
|
||||
("ppc/64", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
|
||||
("riscv", b"__vdso_rt_sigreturn", b"LINUX_4.15"),
|
||||
("riscv", b"__vdso_gettimeofday", b"LINUX_4.15"),
|
||||
("riscv", b"__vdso_clock_gettime", b"LINUX_4.15"),
|
||||
("riscv", b"__vdso_clock_getres", b"LINUX_4.15"),
|
||||
("riscv", b"__vdso_getcpu", b"LINUX_4.15"),
|
||||
("riscv", b"__vdso_flush_icache", b"LINUX_4.15"),
|
||||
("s390", b"__kernel_clock_getres", b"LINUX_2.6.29"),
|
||||
("s390", b"__kernel_clock_gettime", b"LINUX_2.6.29"),
|
||||
("s390", b"__kernel_gettimeofday", b"LINUX_2.6.29"),
|
||||
("superh", b"__kernel_rt_sigreturn", b"LINUX_2.6"),
|
||||
("superh", b"__kernel_sigreturn", b"LINUX_2.6"),
|
||||
("superh", b"__kernel_vsyscall", b"LINUX_2.6"),
|
||||
("i386", b"__kernel_sigreturn", b"LINUX_2.5"),
|
||||
("i386", b"__kernel_rt_sigreturn", b"LINUX_2.5"),
|
||||
("i386", b"__kernel_vsyscall", b"LINUX_2.5"),
|
||||
("i386", b"__vdso_clock_gettime", b"LINUX_2.6"),
|
||||
("i386", b"__vdso_gettimeofday", b"LINUX_2.6"),
|
||||
("i386", b"__vdso_time", b"LINUX_2.6"),
|
||||
("x86-64", b"__vdso_clock_gettime", b"LINUX_2.6"),
|
||||
("x86-64", b"__vdso_getcpu", b"LINUX_2.6"),
|
||||
("x86-64", b"__vdso_gettimeofday", b"LINUX_2.6"),
|
||||
("x86-64", b"__vdso_time", b"LINUX_2.6"),
|
||||
("x86/32", b"__vdso_clock_gettime", b"LINUX_2.6"),
|
||||
("x86/32", b"__vdso_getcpu", b"LINUX_2.6"),
|
||||
("x86/32", b"__vdso_gettimeofday", b"LINUX_2.6"),
|
||||
("x86/32", b"__vdso_time", b"LINUX_2.6"),
|
||||
):
|
||||
if symbol in buf and version in buf:
|
||||
logger.debug("vdso string: %s %s %s", arch, symbol.decode("ascii"), version.decode("ascii"))
|
||||
return OS.LINUX
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def detect_elf_os(f) -> str:
|
||||
"""
|
||||
f: type Union[BinaryIO, IDAIO, GHIDRAIO]
|
||||
@@ -1023,6 +1523,27 @@ def detect_elf_os(f) -> str:
|
||||
logger.warning("Error guessing OS from symbol table: %s", e)
|
||||
symtab_guess = None
|
||||
|
||||
try:
|
||||
goos_guess = guess_os_from_go_buildinfo(elf)
|
||||
logger.debug("guess: Go buildinfo: %s", goos_guess)
|
||||
except Exception as e:
|
||||
logger.warning("Error guessing OS from Go buildinfo: %s", e)
|
||||
goos_guess = None
|
||||
|
||||
try:
|
||||
gosrc_guess = guess_os_from_go_source(elf)
|
||||
logger.debug("guess: Go source: %s", gosrc_guess)
|
||||
except Exception as e:
|
||||
logger.warning("Error guessing OS from Go source path: %s", e)
|
||||
gosrc_guess = None
|
||||
|
||||
try:
|
||||
vdso_guess = guess_os_from_vdso_strings(elf)
|
||||
logger.debug("guess: vdso strings: %s", vdso_guess)
|
||||
except Exception as e:
|
||||
logger.warning("Error guessing OS from vdso strings: %s", e)
|
||||
symtab_guess = None
|
||||
|
||||
ret = None
|
||||
|
||||
if osabi_guess:
|
||||
@@ -1046,11 +1567,24 @@ def detect_elf_os(f) -> str:
|
||||
elif symtab_guess:
|
||||
ret = symtab_guess
|
||||
|
||||
elif goos_guess:
|
||||
ret = goos_guess
|
||||
|
||||
elif gosrc_guess:
|
||||
# prefer goos_guess to this method,
|
||||
# which is just string interpretation.
|
||||
ret = gosrc_guess
|
||||
|
||||
elif ident_guess:
|
||||
# at the bottom because we don't trust this too much
|
||||
# due to potential for bugs with cross-compilation.
|
||||
ret = ident_guess
|
||||
|
||||
elif vdso_guess:
|
||||
# at the bottom because this is just scanning strings,
|
||||
# which isn't very authoritative.
|
||||
ret = vdso_guess
|
||||
|
||||
return ret.value if ret is not None else "unknown"
|
||||
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
import sys
|
||||
import logging
|
||||
import datetime
|
||||
import contextlib
|
||||
from typing import Set, Dict, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
@@ -154,6 +155,18 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):
|
||||
|
||||
viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
|
||||
|
||||
with contextlib.suppress(Exception):
|
||||
# unfortuately viv raises a raw Exception (not any subclass).
|
||||
# This happens when the module isn't found, such as with a viv upgrade.
|
||||
#
|
||||
# Remove the symbolic switch case solver.
|
||||
# This is only enabled for ELF files, not PE files.
|
||||
# During the following performance investigation, this analysis module
|
||||
# had some terrible worst-case behavior.
|
||||
# We can put up with slightly worse CFG reconstruction in order to avoid this.
|
||||
# https://github.com/mandiant/capa/issues/1989#issuecomment-1948022767
|
||||
vw.delFuncAnalysisModule("vivisect.analysis.generic.symswitchcase")
|
||||
|
||||
vw.analyze()
|
||||
|
||||
logger.debug("%s", get_meta_str(vw))
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
__version__ = "7.0.1"
|
||||
__version__ = "7.1.0"
|
||||
|
||||
|
||||
def get_major_version():
|
||||
|
||||
@@ -91,6 +91,12 @@ For more details about creating and using virtual environments, check out the [v
|
||||
|
||||
##### Install development dependencies
|
||||
|
||||
When developing capa, please use the pinned dependencies found in `requirements.txt`.
|
||||
This ensures that everyone has the exact same, reproducible environment.
|
||||
Please install these dependencies before install capa (from source or from PyPI):
|
||||
|
||||
`$ pip install -r requirements.txt`
|
||||
|
||||
We use the following tools to ensure consistent code style and formatting:
|
||||
- [black](https://github.com/psf/black) code formatter
|
||||
- [isort](https://pypi.org/project/isort/) code formatter
|
||||
@@ -101,7 +107,8 @@ We use the following tools to ensure consistent code style and formatting:
|
||||
|
||||
To install these development dependencies, run:
|
||||
|
||||
`$ pip install -e /local/path/to/src[dev]`
|
||||
- `$ pip install -e /local/path/to/src[dev]` or
|
||||
- `$ pip install -e /local/path/to/src[dev,scripts]` to also install all script dependencies
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same linters & configuration locally as in CI.
|
||||
|
||||
|
||||
118
pyproject.toml
118
pyproject.toml
@@ -32,25 +32,76 @@ classifiers = [
|
||||
"Topic :: Security",
|
||||
]
|
||||
dependencies = [
|
||||
"tqdm==4.66.4",
|
||||
"pyyaml==6.0.1",
|
||||
"tabulate==0.9.0",
|
||||
"colorama==0.4.6",
|
||||
"termcolor==2.4.0",
|
||||
"wcwidth==0.2.13",
|
||||
"ida-settings==2.1.0",
|
||||
"viv-utils[flirt]==0.7.9",
|
||||
"networkx==3.1",
|
||||
"ruamel.yaml==0.18.6",
|
||||
"vivisect==1.1.1",
|
||||
"pefile==2023.2.7",
|
||||
"pyelftools==0.31",
|
||||
"dnfile==0.14.1",
|
||||
"dncil==1.0.2",
|
||||
"pydantic==2.7.1",
|
||||
"rich==13.7.1",
|
||||
"humanize==4.9.0",
|
||||
"protobuf==5.27.0",
|
||||
# ---------------------------------------
|
||||
# As a library, capa uses lower version bounds
|
||||
# when specifying its dependencies. This lets
|
||||
# other programs that use capa (and other libraries)
|
||||
# to find a compatible set of dependency versions.
|
||||
#
|
||||
# We can optionally pin to specific versions or
|
||||
# limit the upper bound when there's a good reason;
|
||||
# but the default is to assume all greater versions
|
||||
# probably work with capa until proven otherwise.
|
||||
#
|
||||
# The following link provides good background:
|
||||
# https://iscinumpy.dev/post/bound-version-constraints/
|
||||
#
|
||||
# When we develop capa, and when we distribute it as
|
||||
# a standalone binary, we'll use specific versions
|
||||
# that are pinned in requirements.txt.
|
||||
# But the requirements for a library are specified here
|
||||
# and are looser.
|
||||
#
|
||||
# Related discussions:
|
||||
#
|
||||
# - https://github.com/mandiant/capa/issues/2053
|
||||
# - https://github.com/mandiant/capa/pull/2059
|
||||
# - https://github.com/mandiant/capa/pull/2079
|
||||
#
|
||||
# ---------------------------------------
|
||||
# The following dependency versions were imported
|
||||
# during June 2024 by truncating specific versions to
|
||||
# their major-most version (major version when possible,
|
||||
# or minor otherwise).
|
||||
# As specific constraints are identified, please provide
|
||||
# comments and context.
|
||||
"tqdm>=4",
|
||||
"pyyaml>=6",
|
||||
"tabulate>=0.9",
|
||||
"colorama>=0.4",
|
||||
"termcolor>=2",
|
||||
"wcwidth>=0.2",
|
||||
"ida-settings>=2",
|
||||
"ruamel.yaml>=0.18",
|
||||
"pefile>=2023.2.7",
|
||||
"pyelftools>=0.31",
|
||||
"pydantic>=2",
|
||||
"rich>=13",
|
||||
"humanize>=4",
|
||||
"protobuf>=5",
|
||||
|
||||
# ---------------------------------------
|
||||
# Dependencies that we develop
|
||||
#
|
||||
# These dependencies are often actively influenced by capa,
|
||||
# so we provide a minimum patch version that includes the
|
||||
# latest bug fixes we need here.
|
||||
"viv-utils[flirt]>=0.7.9",
|
||||
"vivisect>=1.1.1",
|
||||
"dncil>=1.0.2",
|
||||
|
||||
# ---------------------------------------
|
||||
# Dependencies with version caps
|
||||
#
|
||||
# These dependencies must not exceed the version cap,
|
||||
# typically due to dropping support for python releases
|
||||
# we still support.
|
||||
|
||||
# TODO(williballenthin): networkx 3.2 doesn't support python 3.8 while capa does.
|
||||
# https://github.com/mandiant/capa/issues/1966
|
||||
"networkx>=3,<3.2",
|
||||
|
||||
"dnfile>=0.15.0",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
@@ -63,6 +114,10 @@ namespaces = false
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
# Dev and build dependencies are not relaxed because
|
||||
# we want all developer environments to be consistent.
|
||||
# These dependencies are not used in production environments
|
||||
# and should not conflict with other libraries/tooling.
|
||||
"pre-commit==3.5.0",
|
||||
"pytest==8.0.0",
|
||||
"pytest-sugar==1.0.0",
|
||||
@@ -79,14 +134,12 @@ dev = [
|
||||
"flake8-simplify==0.21.0",
|
||||
"flake8-use-pathlib==0.3.0",
|
||||
"flake8-copyright==0.2.4",
|
||||
"ruff==0.4.7",
|
||||
"ruff==0.4.8",
|
||||
"black==24.4.2",
|
||||
"isort==5.13.2",
|
||||
"mypy==1.10.0",
|
||||
"psutil==5.9.2",
|
||||
"stix2==3.0.1",
|
||||
"requests==2.31.0",
|
||||
"mypy-protobuf==3.6.0",
|
||||
"PyGithub==2.3.0",
|
||||
# type stubs for mypy
|
||||
"types-backports==0.1.3",
|
||||
"types-colorama==0.4.15.11",
|
||||
@@ -99,10 +152,21 @@ dev = [
|
||||
"deptry==0.16.1"
|
||||
]
|
||||
build = [
|
||||
"pyinstaller==6.7.0",
|
||||
"setuptools==69.5.1",
|
||||
# Dev and build dependencies are not relaxed because
|
||||
# we want all developer environments to be consistent.
|
||||
# These dependencies are not used in production environments
|
||||
# and should not conflict with other libraries/tooling.
|
||||
"pyinstaller==6.8.0",
|
||||
"setuptools==70.0.0",
|
||||
"build==1.2.1"
|
||||
]
|
||||
scripts = [
|
||||
"jschema_to_python==1.2.3",
|
||||
"psutil==5.9.2",
|
||||
"stix2==3.0.1",
|
||||
"sarif_om==1.0.4",
|
||||
"requests==2.31.0",
|
||||
]
|
||||
|
||||
[tool.deptry]
|
||||
extend_exclude = [
|
||||
@@ -152,6 +216,7 @@ DEP002 = [
|
||||
"mypy",
|
||||
"mypy-protobuf",
|
||||
"pre-commit",
|
||||
"PyGithub",
|
||||
"pyinstaller",
|
||||
"pytest",
|
||||
"pytest-cov",
|
||||
@@ -175,6 +240,9 @@ DEP003 = [
|
||||
"typing_extensions" # TODO(s-ff): remove when Python 3.9 is deprecated, see #1699
|
||||
]
|
||||
|
||||
[tool.deptry.package_module_name_map]
|
||||
PyGithub = "github"
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/mandiant/capa"
|
||||
Repository = "https://github.com/mandiant/capa.git"
|
||||
|
||||
46
requirements.txt
Normal file
46
requirements.txt
Normal file
@@ -0,0 +1,46 @@
|
||||
# Dependencies with specific version constraints
|
||||
# used during development and building the standalone executables.
|
||||
# For these environments, use `pip install -r requirements.txt`
|
||||
# before installing capa from source/pypi. This will ensure
|
||||
# the following specific versions are used.
|
||||
#
|
||||
# Initially generated via: pip freeze | grep -v -- "-e"
|
||||
# Kept up to date by dependabot.
|
||||
annotated-types==0.7.0
|
||||
colorama==0.4.6
|
||||
cxxfilt==0.2.2
|
||||
dncil==1.0.2
|
||||
dnfile==0.15.0
|
||||
funcy==2.0
|
||||
humanize==4.9.0
|
||||
ida-netnode==3.0
|
||||
ida-settings==2.1.0
|
||||
intervaltree==3.1.0
|
||||
markdown-it-py==3.0.0
|
||||
mdurl==0.1.2
|
||||
msgpack==1.0.8
|
||||
networkx==3.1
|
||||
pefile==2023.2.7
|
||||
pip==24.0
|
||||
protobuf==5.27.1
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
pycparser==2.22
|
||||
pydantic==2.7.3
|
||||
pydantic-core==2.18.4
|
||||
pyelftools==0.31
|
||||
pygments==2.18.0
|
||||
python-flirt==0.8.10
|
||||
pyyaml==6.0.1
|
||||
rich==13.7.1
|
||||
ruamel-yaml==0.18.6
|
||||
ruamel-yaml-clib==0.2.8
|
||||
setuptools==70.0.0
|
||||
six==1.16.0
|
||||
sortedcontainers==2.4.0
|
||||
tabulate==0.9.0
|
||||
termcolor==2.4.0
|
||||
tqdm==4.66.4
|
||||
viv-utils==0.7.11
|
||||
vivisect==1.1.1
|
||||
wcwidth==0.2.13
|
||||
2
rules
2
rules
Submodule rules updated: 9e0ffdf7c5...e63c454fbb
358
scripts/capa2sarif.py
Normal file
358
scripts/capa2sarif.py
Normal file
@@ -0,0 +1,358 @@
|
||||
# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
Convert capa json output to sarif schema
|
||||
usage: capa2sarif.py [-h] [-g] [-r] [-t TAG] [--version] capa_output
|
||||
|
||||
Capa to SARIF analysis file
|
||||
positional arguments:
|
||||
capa_output Path to capa JSON output file
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--version show program's version number and exit
|
||||
-t TAG, --tag TAG filter on rule meta field values (ruleid)
|
||||
|
||||
Requires:
|
||||
- sarif_om 1.0.4
|
||||
- jschema_to_python 1.2.3
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
import argparse
|
||||
from typing import List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
from capa.version import __version__
|
||||
|
||||
logger = logging.getLogger("capa2sarif")
|
||||
|
||||
# Dependencies
|
||||
try:
|
||||
from sarif_om import Run, Tool, SarifLog, ToolComponent
|
||||
except ImportError as e:
|
||||
logger.error(
|
||||
"Required import `sarif_om` is not installed. This is solved by installing `python3 -m pip install sarif_om>=1.0.4`. %s",
|
||||
e,
|
||||
)
|
||||
exit(-4)
|
||||
|
||||
try:
|
||||
from jschema_to_python.to_json import to_json
|
||||
except ImportError as e:
|
||||
logger.error(
|
||||
"Required import `jschema_to_python` is not installed. This is solved by installing `python3 -m pip install jschema_to_python>=1.2.3`, %s",
|
||||
e,
|
||||
)
|
||||
exit(-4)
|
||||
|
||||
|
||||
def _parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Capa to SARIF analysis file")
|
||||
|
||||
# Positional argument
|
||||
parser.add_argument("capa_output", help="Path to capa JSON output file")
|
||||
|
||||
# Optional arguments
|
||||
parser.add_argument(
|
||||
"-g",
|
||||
"--ghidra-compat",
|
||||
action="store_true",
|
||||
help="Compatibility for Ghidra 11.0.X",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--radare-compat",
|
||||
action="store_true",
|
||||
help="Compatibility for Radare r2sarif plugin v2.0",
|
||||
)
|
||||
parser.add_argument("-t", "--tag", help="Filter on rule meta field values (ruleid)")
|
||||
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
args = _parse_args()
|
||||
|
||||
try:
|
||||
with Path(args.capa_output).open() as capa_output:
|
||||
json_data = json.load(capa_output)
|
||||
except ValueError:
|
||||
logger.error("Input data was not valid JSON, input should be a capa json output file.")
|
||||
return -1
|
||||
except json.JSONDecodeError:
|
||||
# An exception has occured
|
||||
logger.error("Input data was not valid JSON, input should be a capa json output file.")
|
||||
return -2
|
||||
|
||||
# Marshall json into Sarif
|
||||
# Create baseline sarif structure to be populated from json data
|
||||
sarif_structure: Optional[dict] = _sarif_boilerplate(json_data["meta"], json_data["rules"])
|
||||
if sarif_structure is None:
|
||||
logger.error("An Error has occured creating default sarif structure.")
|
||||
return -3
|
||||
|
||||
_populate_artifact(sarif_structure, json_data["meta"])
|
||||
_populate_invocations(sarif_structure, json_data["meta"])
|
||||
_populate_results(sarif_structure, json_data["rules"], args.ghidra_compat)
|
||||
|
||||
if args.ghidra_compat:
|
||||
# Ghidra can't handle this structure as of 11.0.x
|
||||
if "invocations" in sarif_structure["runs"][0]:
|
||||
del sarif_structure["runs"][0]["invocations"]
|
||||
|
||||
# artifacts must include a description as well with a text field.
|
||||
if "artifacts" in sarif_structure["runs"][0]:
|
||||
sarif_structure["runs"][0]["artifacts"][0]["description"] = {"text": "placeholder"}
|
||||
|
||||
# For better compliance with Ghidra table. Iteraction through properties['additionalProperties']
|
||||
"""
|
||||
"additionalProperties": {
|
||||
"to": "<hex addr>",
|
||||
"offset": 0,
|
||||
"primary": true,
|
||||
"index": <>"",
|
||||
"kind": "<kind column value>",
|
||||
"opIndex": 0,
|
||||
"sourceType": ""
|
||||
}
|
||||
"""
|
||||
|
||||
if args.radare_compat:
|
||||
# Add just enough for passing tests
|
||||
_add_filler_optional(json_data, sarif_structure)
|
||||
|
||||
print(json.dumps(sarif_structure, indent=4)) # noqa: T201
|
||||
return 0
|
||||
|
||||
|
||||
def _sarif_boilerplate(data_meta: dict, data_rules: dict) -> Optional[dict]:
|
||||
# Only track rules that appear in this log, not full 1k
|
||||
rules = []
|
||||
# Parse rules from parsed sarif structure
|
||||
for key in data_rules:
|
||||
# Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
|
||||
# this is not good practice to use long name for ruleID
|
||||
attack_length = len(data_rules[key]["meta"]["attack"])
|
||||
mbc_length = len(data_rules[key]["meta"]["mbc"])
|
||||
if attack_length or mbc_length:
|
||||
id = (
|
||||
data_rules[key]["meta"]["attack"][0]["id"]
|
||||
if attack_length > 0
|
||||
else data_rules[key]["meta"]["mbc"][0]["id"]
|
||||
)
|
||||
else:
|
||||
id = data_rules[key]["meta"]["name"]
|
||||
|
||||
# Append current rule
|
||||
rules.append(
|
||||
{
|
||||
# Default to attack identifier, fall back to MBC, mainly relevant if both are present
|
||||
"id": id,
|
||||
"name": data_rules[key]["meta"]["name"],
|
||||
"shortDescription": {"text": data_rules[key]["meta"]["name"]},
|
||||
"messageStrings": {"default": {"text": data_rules[key]["meta"]["name"]}},
|
||||
"properties": {
|
||||
"namespace": data_rules[key]["meta"]["namespace"] if "namespace" in data_rules[key]["meta"] else [],
|
||||
"scopes": data_rules[key]["meta"]["scopes"],
|
||||
"references": data_rules[key]["meta"]["references"],
|
||||
"lib": data_rules[key]["meta"]["lib"],
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
tool = Tool(
|
||||
driver=ToolComponent(
|
||||
name="Capa",
|
||||
version=__version__,
|
||||
information_uri="https://github.com/mandiant/capa",
|
||||
rules=rules,
|
||||
)
|
||||
)
|
||||
|
||||
# Create a SARIF Log object, populate with a single run
|
||||
sarif_log = SarifLog(
|
||||
version="2.1.0",
|
||||
schema_uri="https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json",
|
||||
runs=[Run(tool=tool, results=[], artifacts=[], invocations=[])],
|
||||
)
|
||||
|
||||
# Convert the SARIF log to a dictionary and then to a JSON string
|
||||
try:
|
||||
sarif_outline = json.loads(to_json(sarif_log))
|
||||
except json.JSONDecodeError:
|
||||
# An exception has occured
|
||||
return None
|
||||
|
||||
return sarif_outline
|
||||
|
||||
|
||||
def _populate_artifact(sarif_log: dict, meta_data: dict) -> None:
|
||||
"""
|
||||
@param sarif_log: dict - sarif data structure including runs
|
||||
@param meta_data: dict - Capa meta output
|
||||
@returns None, updates sarif_log via side-effects
|
||||
"""
|
||||
sample = meta_data["sample"]
|
||||
artifact = {
|
||||
"location": {"uri": sample["path"]},
|
||||
"roles": ["analysisTarget"],
|
||||
"hashes": {
|
||||
"md5": sample["md5"],
|
||||
"sha-1": sample["sha1"],
|
||||
"sha-256": sample["sha256"],
|
||||
},
|
||||
}
|
||||
sarif_log["runs"][0]["artifacts"].append(artifact)
|
||||
|
||||
|
||||
def _populate_invocations(sarif_log: dict, meta_data: dict) -> None:
|
||||
"""
|
||||
@param sarif_log: dict - sarif data structure including runs
|
||||
@param meta_data: dict - Capa meta output
|
||||
@returns None, updates sarif_log via side-effects
|
||||
"""
|
||||
analysis_time = meta_data["timestamp"]
|
||||
argv = meta_data["argv"]
|
||||
analysis = meta_data["analysis"]
|
||||
invoke = {
|
||||
"commandLine": "capa " + " ".join(argv),
|
||||
"arguments": argv if len(argv) > 0 else [],
|
||||
# Format in Zulu time, this may require a conversion from local timezone
|
||||
"endTimeUtc": f"{analysis_time}Z",
|
||||
"executionSuccessful": True,
|
||||
"properties": {
|
||||
"format": analysis["format"],
|
||||
"arch": analysis["arch"],
|
||||
"os": analysis["os"],
|
||||
"extractor": analysis["extractor"],
|
||||
"rule_location": analysis["rules"],
|
||||
"base_address": analysis["base_address"],
|
||||
},
|
||||
}
|
||||
sarif_log["runs"][0]["invocations"].append(invoke)
|
||||
|
||||
|
||||
def _enumerate_evidence(node: dict, related_count: int) -> List[dict]:
|
||||
related_locations = []
|
||||
if node.get("success") and node.get("node", {}).get("type") != "statement":
|
||||
label = ""
|
||||
if node.get("node", {}).get("type") == "feature":
|
||||
if node.get("node", {}).get("feature", {}).get("type") == "api":
|
||||
label = "api: " + node.get("node", {}).get("feature", {}).get("api")
|
||||
elif node.get("node", {}).get("feature", {}).get("type") == "match":
|
||||
label = "match: " + node.get("node", {}).get("feature", {}).get("match")
|
||||
elif node.get("node", {}).get("feature", {}).get("type") == "number":
|
||||
label = f"number: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('number')})"
|
||||
elif node.get("node", {}).get("feature", {}).get("type") == "offset":
|
||||
label = f"offset: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('offset')})"
|
||||
elif node.get("node", {}).get("feature", {}).get("type") == "mnemonic":
|
||||
label = f"mnemonic: {node.get('node', {}).get('feature', {}).get('mnemonic')}"
|
||||
elif node.get("node", {}).get("feature", {}).get("type") == "characteristic":
|
||||
label = f"characteristic: {node.get('node', {}).get('feature', {}).get('characteristic')}"
|
||||
elif node.get("node", {}).get("feature", {}).get("type") == "os":
|
||||
label = f"os: {node.get('node', {}).get('feature', {}).get('os')}"
|
||||
elif node.get("node", {}).get("feature", {}).get("type") == "operand number":
|
||||
label = f"operand: ({node.get('node', {}).get('feature', {}).get('index')} ) {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('operand_number')})"
|
||||
else:
|
||||
logger.error(
|
||||
"Not implemented %s",
|
||||
node.get("node", {}).get("feature", {}).get("type"),
|
||||
)
|
||||
return []
|
||||
else:
|
||||
logger.error("Not implemented %s", node.get("node", {}).get("type"))
|
||||
return []
|
||||
|
||||
for loc in node.get("locations", []):
|
||||
if loc["type"] != "absolute":
|
||||
continue
|
||||
|
||||
related_locations.append(
|
||||
{
|
||||
"id": related_count,
|
||||
"message": {"text": label},
|
||||
"physicalLocation": {"address": {"absoluteAddress": loc["value"]}},
|
||||
}
|
||||
)
|
||||
related_count += 1
|
||||
|
||||
if node.get("success") and node.get("node", {}).get("type") == "statement":
|
||||
for child in node.get("children", []):
|
||||
related_locations += _enumerate_evidence(child, related_count)
|
||||
|
||||
return related_locations
|
||||
|
||||
|
||||
def _populate_results(sarif_log: dict, data_rules: dict, ghidra_compat: bool) -> None:
|
||||
"""
|
||||
@param sarif_log: dict - sarif data structure including runs
|
||||
@param meta_data: dict - Capa meta output
|
||||
@returns None, updates sarif_log via side-effects
|
||||
"""
|
||||
results = sarif_log["runs"][0]["results"]
|
||||
|
||||
# Parse rules from parsed sarif structure
|
||||
for key in data_rules:
|
||||
# Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
|
||||
# this is not good practice to use long name for ruleID.
|
||||
attack_length = len(data_rules[key]["meta"]["attack"])
|
||||
mbc_length = len(data_rules[key]["meta"]["mbc"])
|
||||
if attack_length or mbc_length:
|
||||
id = (
|
||||
data_rules[key]["meta"]["attack"][0]["id"]
|
||||
if attack_length > 0
|
||||
else data_rules[key]["meta"]["mbc"][0]["id"]
|
||||
)
|
||||
else:
|
||||
id = data_rules[key]["meta"]["name"]
|
||||
|
||||
for address, details in data_rules[key]["matches"]:
|
||||
related_cnt = 0
|
||||
related_locations = _enumerate_evidence(details, related_cnt)
|
||||
|
||||
res = {
|
||||
"ruleId": id,
|
||||
"level": "none" if not ghidra_compat else "NONE",
|
||||
"message": {"text": data_rules[key]["meta"]["name"]},
|
||||
"kind": "informational" if not ghidra_compat else "INFORMATIONAL",
|
||||
"locations": [
|
||||
{
|
||||
"physicalLocation": {
|
||||
"address": {
|
||||
"absoluteAddress": address["value"],
|
||||
}
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
if not ghidra_compat:
|
||||
res["relatedLocations"] = related_locations
|
||||
|
||||
results.append(res)
|
||||
|
||||
|
||||
def _add_filler_optional(capa_result: dict, sarif_log: dict) -> None:
|
||||
"""Update sarif file with just enough fields to pass radare tests"""
|
||||
base_address = capa_result["meta"]["analysis"]["base_address"]["value"]
|
||||
# Assume there is only one run, and one binary artifact
|
||||
artifact = sarif_log["runs"][0]["artifacts"][0]
|
||||
if "properties" not in artifact:
|
||||
artifact["properties"] = {}
|
||||
if "additionalProperties" not in artifact["properties"]:
|
||||
artifact["properties"]["additionalProperties"] = {}
|
||||
if "imageBase" not in artifact["properties"]["additionalProperties"]:
|
||||
artifact["properties"]["additionalProperties"]["imageBase"] = base_address
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Submodule tests/data updated: 3f5f77f945...3a7690178b
@@ -389,6 +389,8 @@ def get_data_path_by_name(name) -> Path:
|
||||
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
|
||||
elif name.startswith("1038a2"):
|
||||
return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
|
||||
elif name.startswith("3da7c"):
|
||||
return CD / "data" / "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_"
|
||||
elif name.startswith("nested_typedef"):
|
||||
return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_"
|
||||
elif name.startswith("nested_typeref"):
|
||||
|
||||
@@ -92,6 +92,12 @@ def test_elf_android_notes():
|
||||
assert capa.features.extractors.elf.detect_elf_os(f) == "android"
|
||||
|
||||
|
||||
def test_elf_go_buildinfo():
|
||||
path = get_data_path_by_name("3da7c")
|
||||
with Path(path).open("rb") as f:
|
||||
assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
|
||||
|
||||
|
||||
def test_elf_parse_capa_pyinstaller_header():
|
||||
# error after misidentified large pydata section with address 0; fixed in #1454
|
||||
# compressed ELF header of capa-v5.1.0-linux
|
||||
|
||||
@@ -40,6 +40,10 @@ def get_rule_path():
|
||||
[
|
||||
pytest.param("capa2yara.py", [get_rules_path()]),
|
||||
pytest.param("capafmt.py", [get_rule_path()]),
|
||||
pytest.param(
|
||||
"capa2sarif.py",
|
||||
[Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"],
|
||||
),
|
||||
# testing some variations of linter script
|
||||
pytest.param("lint.py", ["-t", "create directory", get_rules_path()]),
|
||||
# `create directory` rule has native and .NET example PEs
|
||||
|
||||
Reference in New Issue
Block a user