Compare commits

...

26 Commits

Author SHA1 Message Date
Soufiane Fariss
413047a7e8 webui: initial mockup of capa-webui
The following commits introduces capa-webui, a new web-based tool
to render capa existing output format - the result document.

The current project structure is as follows -
- webui/index.html: is the main HTML file, that serves as the entry
point for the web application.
- scripts/main.js: includes event handlers, DOM manipulation code
- webui/assets/css/styles.css: contains the styles for the UI

Webui is meant to be used a standalone static site, though for now
we are splitting this into multiple souce files for ease of use.
We can release a standalone static index.html in the releases.

This initial draft is subject to major structrual changes

Webui is meant to be deployed using github-pages
2024-06-19 23:33:06 +02:00
ygasparis
1975b6455c extract import / export symbols from stripped elf binaries (#2142) 2024-06-18 12:38:02 -06:00
Capa Bot
1360e08389 Sync capa-testfiles submodule 2024-06-18 11:00:26 +00:00
dependabot[bot]
40061b3c42 build(deps): bump viv-utils from 0.7.9 to 0.7.11 (#2150) 2024-06-18 06:36:10 +02:00
dependabot[bot]
45fca7adea build(deps): bump python-flirt from 0.8.6 to 0.8.10 (#2151) 2024-06-18 06:35:50 +02:00
Moritz
482686ab81 Merge pull request #2147 from mandiant/release/v710
bump to v7.1.0
2024-06-14 12:56:46 +02:00
mr-tz
67f8c4d28c bump to v7.1.0 2024-06-14 09:06:04 +00:00
Capa Bot
3f151a342b Sync capa rules submodule 2024-06-14 09:02:02 +00:00
dependabot[bot]
e87e8484b6 build(deps): bump ruff from 0.4.7 to 0.4.8 (#2139)
Bumps [ruff](https://github.com/astral-sh/ruff) from 0.4.7 to 0.4.8.
- [Release notes](https://github.com/astral-sh/ruff/releases)
- [Changelog](https://github.com/astral-sh/ruff/blob/main/CHANGELOG.md)
- [Commits](https://github.com/astral-sh/ruff/compare/v0.4.7...v0.4.8)

---
updated-dependencies:
- dependency-name: ruff
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Willi Ballenthin <wballenthin@google.com>
2024-06-13 13:24:33 +02:00
Willi Ballenthin
8726de0d65 ELF: Detect OS from Go binaries (#1987)
* elf: read segment memory size

* elf: add routine to read mapped memory

* elf: better detect OS for binaries compiled by Go

* elf: guess OS from Go source filenames

* changelog

* elf: mypy

* merge

* elf: add OS detection based on vDSO strings

* elf: document VTGrep searches

* elf: describe further technique to identify Go binaries

* elf: search for `.go.buildinfo` section via @yelhamer

* black

* elf: detect Alpine Linux ident

* elf: log interest symtab entries

* tests: add test for OS detection by Go buildinfo

* loader: handle missing viv modules

* pre-commit: run deptry before tests (which are slow)

* loader: describe removing viv symbolic switch solver

* pyproject: add PyGithub for deptry

* black
2024-06-13 13:23:47 +02:00
Moritz
7d1512a3de Merge pull request #2146 from mandiant/fix/2145
fix black and mypy
2024-06-13 11:49:18 +02:00
Capa Bot
73d76d7aba Sync capa-testfiles submodule 2024-06-13 09:30:44 +00:00
mr-tz
1febb224d1 add scripts dependency group 2024-06-13 07:50:58 +00:00
Moritz
e3ea60d354 Apply suggestions from code review
Co-authored-by: Willi Ballenthin <wballenthin@google.com>
2024-06-13 09:36:12 +02:00
mr-tz
93cd1dcedd add scripts to install step 2024-06-12 15:24:10 +00:00
mr-tz
7b0270980d add capa2sarif dependencies 2024-06-12 15:19:24 +00:00
mr-tz
cce7774705 add scripts section 2024-06-12 15:17:31 +00:00
mr-tz
9ec9a6f439 fix mypy issues 2024-06-12 09:32:03 +00:00
mr-tz
97a3fba2c9 fix black 2024-06-12 09:24:16 +00:00
Capa Bot
893352756f Sync capa rules submodule 2024-06-11 18:11:24 +00:00
malwarefrank
0cc06aa83d dnfile 0.15.0 changed API (#2037)
* dnfile 0.15.0 changed API

* deduplicate str() calls and isort fixes

* revert accidental change to imports ordering

* add table variable annotation

---------

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
Co-authored-by: mr-tz <moritz.raabe@mandiant.com>
2024-06-11 11:46:09 -06:00
dependabot[bot]
1888d0e7e3 build(deps): bump setuptools from 69.5.1 to 70.0.0 (#2135)
Bumps [setuptools](https://github.com/pypa/setuptools) from 69.5.1 to 70.0.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v69.5.1...v70.0.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-06-11 15:03:56 +02:00
ReWithMe
52e24e560b FEAT(capa2sarif) Add SARIF conversion script from json output (#2093)
* feat(capa2sarif): add new sarif conversion script converting json output to sarif schema, update dependencies, and update changelog

* fix(capa2sarif): removing copy and paste transcription errors

* fix(capa2sarif): remove dependencies from pyproject toml to guarded import statements

* chore(capa2sarif): adding node in readme specifying dependency and applied auto formatter for styling

* style(capa2sarif): applied import sorting and fixed typo in invocations function

* test(capa2sarif): adding simple test for capa to sarif conversion script using existing result document

* style(capa2sarif): fixing typo in version string in usage

* style(capa2sarif): isort failing due to reordering of typehint imports

* style(capa2sarif): fixing import order as isort on local machine was not updating code

---------

Co-authored-by: ReversingWithMe <ryanv@rewith.me>
Co-authored-by: Willi Ballenthin <wballenthin@google.com>
2024-06-11 15:01:26 +02:00
dependabot[bot]
c97d2d7244 build(deps): bump pyinstaller from 6.7.0 to 6.8.0 (#2138)
Bumps [pyinstaller](https://github.com/pyinstaller/pyinstaller) from 6.7.0 to 6.8.0.
- [Release notes](https://github.com/pyinstaller/pyinstaller/releases)
- [Changelog](https://github.com/pyinstaller/pyinstaller/blob/develop/doc/CHANGES.rst)
- [Commits](https://github.com/pyinstaller/pyinstaller/compare/v6.7.0...v6.8.0)

---
updated-dependencies:
- dependency-name: pyinstaller
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-06-11 14:36:58 +02:00
Willi Ballenthin
833ec47170 relax pyproject dependency versions and introduce requirements.txt (#2132)
* relax pyproject dependency versions and introduce requirements.txt

closes #2053
closes #2079

* pyproject: document dev/build profile dependency policies

* changelog

* doc: installation: describe requirements.txt usage

* pyproject: don't use dnfile 0.15 yet

---------

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
2024-06-11 14:29:34 +02:00
Willi Ballenthin
07ae30875c features: add aarch64 arch (#2144)
* features: add aarch64 arch
2024-06-11 09:36:04 +02:00
25 changed files with 2098 additions and 128 deletions

View File

@@ -51,7 +51,9 @@ jobs:
- name: Upgrade pip, setuptools
run: python -m pip install --upgrade pip setuptools
- name: Install capa with build requirements
run: pip install -e .[build]
run: |
pip install -r requirements.txt
pip install -e .[build]
- name: Build standalone executable
run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec
- name: Does it run (PE)?

View File

@@ -25,6 +25,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e .[build]
- name: build package
run: |

View File

@@ -35,7 +35,9 @@ jobs:
with:
python-version: "3.11"
- name: Install dependencies
run: pip install -e .[dev]
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
- name: Lint with ruff
run: pre-commit run ruff
- name: Lint with isort
@@ -61,7 +63,9 @@ jobs:
with:
python-version: "3.11"
- name: Install capa
run: pip install -e .[dev]
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
- name: Run rule linter
run: python scripts/lint.py rules/
@@ -96,7 +100,9 @@ jobs:
if: matrix.os == 'ubuntu-20.04'
run: sudo apt-get install -y libyaml-dev
- name: Install capa
run: pip install -e .[dev]
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
- name: Run tests (fast)
# this set of tests runs about 80% of the cases in 20% of the time,
# and should catch most errors quickly.
@@ -131,7 +137,9 @@ jobs:
run: sudo apt-get install -y libyaml-dev
- name: Install capa
if: ${{ env.BN_SERIAL != 0 }}
run: pip install -e .[dev]
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
- name: install Binary Ninja
if: ${{ env.BN_SERIAL != 0 }}
run: |
@@ -188,7 +196,9 @@ jobs:
- name: Install pyyaml
run: sudo apt-get install -y libyaml-dev
- name: Install capa
run: pip install -e .[dev]
run: |
pip install -r requirements.txt
pip install -e .[dev,scripts]
- name: Run tests
run: |
mkdir ./.github/ghidra/project

View File

@@ -110,6 +110,16 @@ repos:
always_run: true
pass_filenames: false
- repo: local
hooks:
- id: deptry
name: deptry
stages: [push, manual]
language: system
entry: deptry .
always_run: true
pass_filenames: false
- repo: local
hooks:
- id: pytest-fast
@@ -128,12 +138,3 @@ repos:
always_run: true
pass_filenames: false
- repo: local
hooks:
- id: deptry
name: deptry
stages: [push, manual]
language: system
entry: deptry .
always_run: true
pass_filenames: false

View File

@@ -1,20 +1,57 @@
# Change Log
## master (unreleased)
- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls
### New Features
### Breaking Changes
### New Rules (0)
-
### Bug Fixes
- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
### capa explorer IDA Pro plugin
### Development
### Raw diffs
- [capa v7.1.0...master](https://github.com/mandiant/capa/compare/v7.1.0...master)
- [capa-rules v7.1.0...master](https://github.com/mandiant/capa-rules/compare/v7.1.0...master)
## v7.1.0
The v7.1.0 release brings large performance improvements to capa's rule matching engine.
Additionally, we've fixed various bugs and added new features for people using and developing capa.
Special thanks to our repeat and new contributors:
* @sjha2048 made their first contribution in https://github.com/mandiant/capa/pull/2000
* @Rohit1123 made their first contribution in https://github.com/mandiant/capa/pull/1990
* @psahithireddy made their first contribution in https://github.com/mandiant/capa/pull/2020
* @Atlas-64 made their first contribution in https://github.com/mandiant/capa/pull/2018
* @s-ff made their first contribution in https://github.com/mandiant/capa/pull/2011
* @samadpls made their first contribution in https://github.com/mandiant/capa/pull/2024
* @acelynnzhang made their first contribution in https://github.com/mandiant/capa/pull/2044
* @RainRat made their first contribution in https://github.com/mandiant/capa/pull/2058
* @ReversingWithMe made their first contribution in https://github.com/mandiant/capa/pull/2093
* @malwarefrank made their first contribution in https://github.com/mandiant/capa/pull/2037
### New Features
- Emit "dotnet" as format to ResultDocument when processing .NET files #2024 @samadpls
- ELF: detect OS from statically-linked Go binaries #1978 @williballenthin
- add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
- document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
- Add json to sarif conversion script @reversingwithme
- render maec/* fields #843 @s-ff
- replace Halo spinner with Rich #2086 @s-ff
- optimize rule matching #2080 @williballenthin
- add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin
- relax dependency version requirements for the capa library #2053 @williballenthin
- add scripts dependency group and update documentation #2145 @mr-tz
### Breaking Changes
### New Rules (17)
### New Rules (25)
- impact/wipe-disk/delete-drive-layout-via-ioctl william.ballenthin@mandiant.com
- host-interaction/driver/interact-with-driver-via-ioctl moritz.raabe@mandiant.com
@@ -33,7 +70,14 @@
- persistence/act-as-time-provider-dll jakub.jozwiak@mandiant.com
- host-interaction/gui/window/hide/hide-graphical-window-from-taskbar jakub.jozwiak@mandiant.com
- compiler/dart/compiled-with-dart jakub.jozwiak@mandiant.com
-
- nursery/bypass-hidden-api-restrictions-via-jni-on-android mehunhoff@google.com
- nursery/get-current-process-filesystem-mounts-on-linux mehunhoff@google.com
- nursery/get-current-process-memory-mapping-on-linux mehunhoff@google.com
- nursery/get-system-property-on-android mehunhoff@google.com
- nursery/hook-routines-via-lsplant mehunhoff@google.com
- nursery/load-packed-dex-via-jiagu-on-android mehunhoff@google.com
- nursery/modify-api-blacklist-or-denylist-via-jni-on-android mehunhoff@google.com
- nursery/truncate-file-on-linux mehunhoff@google.com
### Bug Fixes
@@ -42,7 +86,6 @@
- binja: add support for forwarded export #1646 @xusheng6
- cape: support more report formats #2035 @mr-tz
### capa explorer IDA Pro plugin
- replace deprecated IDA API find_binary with bin_search #1606 @s-ff
@@ -57,8 +100,8 @@
- add deptry support #1497 @s-ff
### Raw diffs
- [capa v7.0.1...master](https://github.com/mandiant/capa/compare/v7.0.1...master)
- [capa-rules v7.0.1...master](https://github.com/mandiant/capa-rules/compare/v7.0.1...master)
- [capa v7.0.1...v7.1.0](https://github.com/mandiant/capa/compare/v7.0.1...v7.1.0)
- [capa-rules v7.0.1...v7.1.0](https://github.com/mandiant/capa-rules/compare/v7.0.1...v7.1.0)
## v7.0.1

View File

@@ -409,9 +409,10 @@ class Bytes(Feature):
# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
ARCH_I386 = "i386"
ARCH_AMD64 = "amd64"
ARCH_AARCH64 = "aarch64"
# dotnet
ARCH_ANY = "any"
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_AARCH64, ARCH_ANY)
class Arch(Feature):

View File

@@ -83,7 +83,7 @@ def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str
return None
try:
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get(token.rid)
except UnicodeDecodeError as e:
logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e)
return None
@@ -119,14 +119,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
access: Optional[str]
# assume .NET imports starting with get_/set_ are used to access a property
if member_ref.Name.startswith("get_"):
member_ref_name: str = str(member_ref.Name)
if member_ref_name.startswith("get_"):
access = FeatureAccess.READ
elif member_ref.Name.startswith("set_"):
elif member_ref_name.startswith("set_"):
access = FeatureAccess.WRITE
else:
access = None
member_ref_name: str = member_ref.Name
if member_ref_name.startswith(("get_", "set_")):
# remove get_/set_ from MemberRef name
member_ref_name = member_ref_name[4:]
@@ -212,7 +212,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
token: int = calculate_dotnet_token_value(method.table.number, method.row_index)
access: Optional[str] = accessor_map.get(token)
method_name: str = method.row.Name
method_name: str = str(method.row.Name)
if method_name.startswith(("get_", "set_")):
# remove get_/set_
method_name = method_name[4:]
@@ -289,8 +289,8 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]
logger.debug("ImplMap[0x%X] ImportScope row is None", rid)
module = ""
else:
module = impl_map.ImportScope.row.Name
method: str = impl_map.ImportName
module = str(impl_map.ImportScope.row.Name)
method: str = str(impl_map.ImportName)
member_forward_table: int
if impl_map.MemberForwarded.table is None:
@@ -320,8 +320,11 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
if row_index - 1 <= 0:
return None
table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(table_index)
if table is None:
return None
try:
table = pe.net.mdtables.tables.get(table_index, [])
return table[row_index - 1]
except IndexError:
return None
@@ -334,7 +337,7 @@ def resolve_nested_typedef_name(
if index in nested_class_table:
typedef_name = []
name = typedef.TypeName
name = str(typedef.TypeName)
# Append the current typedef name
typedef_name.append(name)
@@ -343,24 +346,24 @@ def resolve_nested_typedef_name(
# Iterate through the typedef table to resolve the nested name
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
if table_row is None:
return typedef.TypeNamespace, tuple(typedef_name[::-1])
return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
name = table_row.TypeName
name = str(table_row.TypeName)
typedef_name.append(name)
index = nested_class_table[index]
# Document the root enclosing details
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index])
if table_row is None:
return typedef.TypeNamespace, tuple(typedef_name[::-1])
return str(typedef.TypeNamespace), tuple(typedef_name[::-1])
enclosing_name = table_row.TypeName
enclosing_name = str(table_row.TypeName)
typedef_name.append(enclosing_name)
return table_row.TypeNamespace, tuple(typedef_name[::-1])
return str(table_row.TypeNamespace), tuple(typedef_name[::-1])
else:
return typedef.TypeNamespace, (typedef.TypeName,)
return str(typedef.TypeNamespace), (str(typedef.TypeName),)
def resolve_nested_typeref_name(
@@ -370,29 +373,29 @@ def resolve_nested_typeref_name(
# If the ResolutionScope decodes to a typeRef type then it is nested
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
typeref_name = []
name = typeref.TypeName
name = str(typeref.TypeName)
# Not appending the current typeref name to avoid potential duplicate
# Validate index
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index)
if table_row is None:
return typeref.TypeNamespace, (typeref.TypeName,)
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef):
# Iterate through the typeref table to resolve the nested name
typeref_name.append(name)
name = table_row.TypeName
name = str(table_row.TypeName)
table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index)
if table_row is None:
return typeref.TypeNamespace, tuple(typeref_name[::-1])
return str(typeref.TypeNamespace), tuple(typeref_name[::-1])
# Document the root enclosing details
typeref_name.append(table_row.TypeName)
typeref_name.append(str(table_row.TypeName))
return table_row.TypeNamespace, tuple(typeref_name[::-1])
return str(table_row.TypeNamespace), tuple(typeref_name[::-1])
else:
return typeref.TypeNamespace, (typeref.TypeName,)
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:

View File

@@ -78,12 +78,12 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
for _, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number):
# emit internal .NET namespaces
assert isinstance(typedef, dnfile.mdtable.TypeDefRow)
namespaces.add(typedef.TypeNamespace)
namespaces.add(str(typedef.TypeNamespace))
for _, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number):
# emit external .NET namespaces
assert isinstance(typeref, dnfile.mdtable.TypeRefRow)
namespaces.add(typeref.TypeNamespace)
namespaces.add(str(typeref.TypeNamespace))
# namespaces may be empty, discard
namespaces.discard("")

View File

@@ -58,6 +58,10 @@ class OS(str, Enum):
SYLLABLE = "syllable"
NACL = "nacl"
ANDROID = "android"
DRAGONFLYBSD = "dragonfly BSD"
ILLUMOS = "illumos"
ZOS = "z/os"
UNIX = "unix"
# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
@@ -81,6 +85,8 @@ class Phdr:
paddr: int
filesz: int
buf: bytes
flags: int
memsz: int
@dataclass
@@ -315,24 +321,23 @@ class ELF:
phent_offset = i * self.e_phentsize
phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize]
(p_type,) = struct.unpack_from(self.endian + "I", phent, 0x0)
logger.debug("ph:p_type: 0x%04x", p_type)
if self.bitness == 32:
p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "IIII", phent, 0x4)
p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz, p_flags = struct.unpack_from(
self.endian + "IIIIIII", phent, 0x0
)
elif self.bitness == 64:
p_offset, p_vaddr, p_paddr, p_filesz = struct.unpack_from(self.endian + "QQQQ", phent, 0x8)
p_type, p_flags, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz = struct.unpack_from(
self.endian + "IIQQQQQ", phent, 0x0
)
else:
raise NotImplementedError()
logger.debug("ph:p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
self.f.seek(p_offset)
buf = self.f.read(p_filesz)
if len(buf) != p_filesz:
raise ValueError("failed to read program header content")
return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf)
return Phdr(p_type, p_offset, p_vaddr, p_paddr, p_filesz, buf, p_flags, p_memsz)
@property
def program_headers(self):
@@ -357,8 +362,6 @@ class ELF:
else:
raise NotImplementedError()
logger.debug("sh:sh_offset: 0x%02x sh_size: 0x%04x", sh_offset, sh_size)
self.f.seek(sh_offset)
buf = self.f.read(sh_size)
if len(buf) != sh_size:
@@ -867,6 +870,8 @@ def guess_os_from_ident_directive(elf: ELF) -> Optional[OS]:
return OS.LINUX
elif "Red Hat" in comment:
return OS.LINUX
elif "Alpine" in comment:
return OS.LINUX
elif "Android" in comment:
return OS.ANDROID
@@ -952,11 +957,506 @@ def guess_os_from_symtab(elf: ELF) -> Optional[OS]:
for os, hints in keywords.items():
if any(hint in sym_name for hint in hints):
logger.debug("symtab: %s looks like %s", sym_name, os)
return os
return None
def is_go_binary(elf: ELF) -> bool:
for shdr in elf.section_headers:
if shdr.get_name(elf) == ".note.go.buildid":
logger.debug("go buildinfo: found section .note.go.buildid")
return True
# The `go version` command enumerates sections for the name `.go.buildinfo`
# (in addition to looking for the BUILDINFO_MAGIC) to check if an executable is go or not.
# See references to the `errNotGoExe` error here:
# https://github.com/golang/go/blob/master/src/debug/buildinfo/buildinfo.go#L41
for shdr in elf.section_headers:
if shdr.get_name(elf) == ".go.buildinfo":
logger.debug("go buildinfo: found section .go.buildinfo")
return True
# other strategy used by FLOSS: search for known runtime strings.
# https://github.com/mandiant/flare-floss/blob/b2ca8adfc5edf278861dd6bff67d73da39683b46/floss/language/identify.py#L88
return False
def get_go_buildinfo_data(elf: ELF) -> Optional[bytes]:
for shdr in elf.section_headers:
if shdr.get_name(elf) == ".go.buildinfo":
logger.debug("go buildinfo: found section .go.buildinfo")
return shdr.buf
PT_LOAD = 0x1
PF_X = 1
PF_W = 2
for phdr in elf.program_headers:
if phdr.type != PT_LOAD:
continue
if (phdr.flags & (PF_X | PF_W)) == PF_W:
logger.debug("go buildinfo: found data segment")
return phdr.buf
return None
def read_data(elf: ELF, rva: int, size: int) -> Optional[bytes]:
# ELF segments are for runtime data,
# ELF sections are for link-time data.
# So we want to read Program Headers/Segments.
for phdr in elf.program_headers:
if phdr.vaddr <= rva < phdr.vaddr + phdr.memsz:
segment_data = phdr.buf
# pad the section with NULLs
# assume page alignment is already handled.
# might need more hardening here.
if len(segment_data) < phdr.memsz:
segment_data += b"\x00" * (phdr.memsz - len(segment_data))
segment_offset = rva - phdr.vaddr
return segment_data[segment_offset : segment_offset + size]
return None
def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
if elf.bitness == 32:
struct_size = 8
struct_format = elf.endian + "II"
elif elf.bitness == 64:
struct_size = 16
struct_format = elf.endian + "QQ"
else:
raise ValueError("invalid psize")
struct_buf = read_data(elf, rva, struct_size)
if not struct_buf:
return None
addr, length = struct.unpack_from(struct_format, struct_buf, 0)
return read_data(elf, addr, length)
def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
"""
In a binary compiled by Go, the buildinfo structure may contain
metadata about the build environment, including the configured
GOOS, which specifies the target operating system.
Search for and parse the buildinfo structure,
which may be found in the .go.buildinfo section,
and often contains this metadata inline. Otherwise,
follow a few byte slices to the relevant information.
This strategy is derived from GoReSym.
"""
buf = get_go_buildinfo_data(elf)
if not buf:
logger.debug("go buildinfo: no buildinfo section")
return None
assert isinstance(buf, bytes)
# The build info blob left by the linker is identified by
# a 16-byte header, consisting of:
# - buildInfoMagic (14 bytes),
# - the binary's pointer size (1 byte), and
# - whether the binary is big endian (1 byte).
#
# Then:
# - virtual address to Go string: runtime.buildVersion
# - virtual address to Go string: runtime.modinfo
#
# On 32-bit platforms, the last 8 bytes are unused.
#
# If the endianness has the 2 bit set, then the pointers are zero,
# and the 32-byte header is followed by varint-prefixed string data
# for the two string values we care about.
# https://github.com/mandiant/GoReSym/blob/0860a1b1b4f3495e9fb7e71eb4386bf3e0a7c500/buildinfo/buildinfo.go#L185-L193
BUILDINFO_MAGIC = b"\xFF Go buildinf:"
try:
index = buf.index(BUILDINFO_MAGIC)
except ValueError:
logger.debug("go buildinfo: no buildinfo magic")
return None
psize, flags = struct.unpack_from("<bb", buf, index + len(BUILDINFO_MAGIC))
assert psize in (4, 8)
is_big_endian = flags & 0b01
has_inline_strings = flags & 0b10
logger.debug("go buildinfo: psize: %d big endian: %s inline: %s", psize, is_big_endian, has_inline_strings)
GOOS_TO_OS = {
b"aix": OS.AIX,
b"android": OS.ANDROID,
b"dragonfly": OS.DRAGONFLYBSD,
b"freebsd": OS.FREEBSD,
b"hurd": OS.HURD,
b"illumos": OS.ILLUMOS,
b"linux": OS.LINUX,
b"netbsd": OS.NETBSD,
b"openbsd": OS.OPENBSD,
b"solaris": OS.SOLARIS,
b"zos": OS.ZOS,
b"windows": None, # PE format
b"plan9": None, # a.out format
b"ios": None, # Mach-O format
b"darwin": None, # Mach-O format
b"nacl": None, # dropped in GO 1.14
b"js": None,
}
if has_inline_strings:
# This is the common case/path. Most samples will have an inline GOOS string.
#
# To find samples on VT, use these VTGrep searches:
#
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 02}
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 02}
# If present, the GOOS key will be found within
# the current buildinfo data region.
#
# Brute force the k-v pair, like `GOOS=linux`,
# rather than try to parse the data, which would be fragile.
for key, os in GOOS_TO_OS.items():
if (b"GOOS=" + key) in buf:
logger.debug("go buildinfo: found os: %s", os)
return os
else:
# This is the uncommon path. Most samples will have an inline GOOS string.
#
# To find samples on VT, use the referenced VTGrep content searches.
info_format = {
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
# like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
# in which the modinfo doesn't have GOOS.
(4, False): "<II",
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
# like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
# in which the modinfo doesn't have GOOS.
(8, False): "<QQ",
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
# (no matches on VT today)
(4, True): ">II",
# content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
# like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
# in which the modinfo doesn't have GOOS.
(8, True): ">QQ",
}
build_version_address, modinfo_address = struct.unpack_from(
info_format[(psize, is_big_endian)], buf, index + 0x10
)
logger.debug("go buildinfo: build version address: 0x%x", build_version_address)
logger.debug("go buildinfo: modinfo address: 0x%x", modinfo_address)
build_version = read_go_slice(elf, build_version_address)
if build_version:
logger.debug("go buildinfo: build version: %s", build_version.decode("utf-8"))
modinfo = read_go_slice(elf, modinfo_address)
if modinfo:
if modinfo[-0x11] == ord("\n"):
# Strip module framing: sentinel strings delimiting the module info.
# These are cmd/go/internal/modload/build.infoStart and infoEnd.
# Which should probably be:
# infoStart, _ = hex.DecodeString("3077af0c9274080241e1c107e6d618e6")
# infoEnd, _ = hex.DecodeString("f932433186182072008242104116d8f2")
modinfo = modinfo[0x10:-0x10]
logger.debug("go buildinfo: modinfo: %s", modinfo.decode("utf-8"))
if not modinfo:
return None
for key, os in GOOS_TO_OS.items():
# Brute force the k-v pair, like `GOOS=linux`,
# rather than try to parse the data, which would be fragile.
if (b"GOOS=" + key) in modinfo:
logger.debug("go buildinfo: found os: %s", os)
return os
return None
def guess_os_from_go_source(elf: ELF) -> Optional[OS]:
"""
In a binary compiled by Go, runtime metadata may contain
references to the source filenames, including the
src/runtime/os_* files, whose name indicates the
target operating system.
Confirm the given ELF seems to be built by Go,
and then look for strings that look like
Go source filenames.
This strategy is derived from GoReSym.
"""
if not is_go_binary(elf):
return None
for phdr in elf.program_headers:
buf = phdr.buf
NEEDLE_OS = b"/src/runtime/os_"
try:
index = buf.index(NEEDLE_OS)
except ValueError:
continue
rest = buf[index + len(NEEDLE_OS) : index + len(NEEDLE_OS) + 32]
filename = rest.partition(b".go")[0].decode("utf-8")
logger.debug("go source: filename: /src/runtime/os_%s.go", filename)
# via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
# candidates today:
# - aix
# - android
# - darwin
# - darwin_arm64
# - dragonfly
# - freebsd
# - freebsd2
# - freebsd_amd64
# - freebsd_arm
# - freebsd_arm64
# - freebsd_noauxv
# - freebsd_riscv64
# - illumos
# - js
# - linux
# - linux_arm
# - linux_arm64
# - linux_be64
# - linux_generic
# - linux_loong64
# - linux_mips64x
# - linux_mipsx
# - linux_noauxv
# - linux_novdso
# - linux_ppc64x
# - linux_riscv64
# - linux_s390x
# - linux_x86
# - netbsd
# - netbsd_386
# - netbsd_amd64
# - netbsd_arm
# - netbsd_arm64
# - nonopenbsd
# - only_solaris
# - openbsd
# - openbsd_arm
# - openbsd_arm64
# - openbsd_libc
# - openbsd_mips64
# - openbsd_syscall
# - openbsd_syscall1
# - openbsd_syscall2
# - plan9
# - plan9_arm
# - solaris
# - unix
# - unix_nonlinux
# - wasip1
# - wasm
# - windows
# - windows_arm
# - windows_arm64
OS_FILENAME_TO_OS = {
"aix": OS.AIX,
"android": OS.ANDROID,
"dragonfly": OS.DRAGONFLYBSD,
"freebsd": OS.FREEBSD,
"freebsd2": OS.FREEBSD,
"freebsd_": OS.FREEBSD,
"illumos": OS.ILLUMOS,
"linux": OS.LINUX,
"netbsd": OS.NETBSD,
"only_solaris": OS.SOLARIS,
"openbsd": OS.OPENBSD,
"solaris": OS.SOLARIS,
"unix_nonlinux": OS.UNIX,
}
for prefix, os in OS_FILENAME_TO_OS.items():
if filename.startswith(prefix):
return os
for phdr in elf.program_headers:
buf = phdr.buf
NEEDLE_RT0 = b"/src/runtime/rt0_"
try:
index = buf.index(NEEDLE_RT0)
except ValueError:
continue
rest = buf[index + len(NEEDLE_RT0) : index + len(NEEDLE_RT0) + 32]
filename = rest.partition(b".s")[0].decode("utf-8")
logger.debug("go source: filename: /src/runtime/rt0_%s.s", filename)
# via: https://cs.opensource.google/go/go/+/master:src/runtime/;bpv=1;bpt=0
# candidates today:
# - aix_ppc64
# - android_386
# - android_amd64
# - android_arm
# - android_arm64
# - darwin_amd64
# - darwin_arm64
# - dragonfly_amd64
# - freebsd_386
# - freebsd_amd64
# - freebsd_arm
# - freebsd_arm64
# - freebsd_riscv64
# - illumos_amd64
# - ios_amd64
# - ios_arm64
# - js_wasm
# - linux_386
# - linux_amd64
# - linux_arm
# - linux_arm64
# - linux_loong64
# - linux_mips64x
# - linux_mipsx
# - linux_ppc64
# - linux_ppc64le
# - linux_riscv64
# - linux_s390x
# - netbsd_386
# - netbsd_amd64
# - netbsd_arm
# - netbsd_arm64
# - openbsd_386
# - openbsd_amd64
# - openbsd_arm
# - openbsd_arm64
# - openbsd_mips64
# - openbsd_ppc64
# - openbsd_riscv64
# - plan9_386
# - plan9_amd64
# - plan9_arm
# - solaris_amd64
# - wasip1_wasm
# - windows_386
# - windows_amd64
# - windows_arm
# - windows_arm64
RT0_FILENAME_TO_OS = {
"aix": OS.AIX,
"android": OS.ANDROID,
"dragonfly": OS.DRAGONFLYBSD,
"freebsd": OS.FREEBSD,
"illumos": OS.ILLUMOS,
"linux": OS.LINUX,
"netbsd": OS.NETBSD,
"openbsd": OS.OPENBSD,
"solaris": OS.SOLARIS,
}
for prefix, os in RT0_FILENAME_TO_OS.items():
if filename.startswith(prefix):
return os
return None
def guess_os_from_vdso_strings(elf: ELF) -> Optional[OS]:
"""
The "vDSO" (virtual dynamic shared object) is a small shared
library that the kernel automatically maps into the address space
of all user-space applications.
Some statically linked executables include small dynamic linker
routines that finds these vDSO symbols, using the ASCII
symbol name and version. We can therefore recognize the pairs
(symbol, version) to guess the binary targets Linux.
"""
for phdr in elf.program_headers:
buf = phdr.buf
# We don't really use the arch, but its interesting for documentation
# I suppose we could restrict the arch here to what's in the ELF header,
# but that's even more work. Let's see if this is sufficient.
for arch, symbol, version in (
# via: https://man7.org/linux/man-pages/man7/vdso.7.html
("arm", b"__vdso_gettimeofday", b"LINUX_2.6"),
("arm", b"__vdso_clock_gettime", b"LINUX_2.6"),
("aarch64", b"__kernel_rt_sigreturn", b"LINUX_2.6.39"),
("aarch64", b"__kernel_gettimeofday", b"LINUX_2.6.39"),
("aarch64", b"__kernel_clock_gettime", b"LINUX_2.6.39"),
("aarch64", b"__kernel_clock_getres", b"LINUX_2.6.39"),
("mips", b"__kernel_gettimeofday", b"LINUX_2.6"),
("mips", b"__kernel_clock_gettime", b"LINUX_2.6"),
("ia64", b"__kernel_sigtramp", b"LINUX_2.5"),
("ia64", b"__kernel_syscall_via_break", b"LINUX_2.5"),
("ia64", b"__kernel_syscall_via_epc", b"LINUX_2.5"),
("ppc/32", b"__kernel_clock_getres", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_clock_gettime64", b"LINUX_5.11"),
("ppc/32", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_getcpu", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_sigtramp_rt32", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_sigtramp32", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
("ppc/32", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_clock_getres", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_clock_gettime", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_datapage_offset", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_get_syscall_map", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_get_tbfreq", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_getcpu", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_gettimeofday", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_sigtramp_rt64", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_sync_dicache", b"LINUX_2.6.15"),
("ppc/64", b"__kernel_sync_dicache_p5", b"LINUX_2.6.15"),
("riscv", b"__vdso_rt_sigreturn", b"LINUX_4.15"),
("riscv", b"__vdso_gettimeofday", b"LINUX_4.15"),
("riscv", b"__vdso_clock_gettime", b"LINUX_4.15"),
("riscv", b"__vdso_clock_getres", b"LINUX_4.15"),
("riscv", b"__vdso_getcpu", b"LINUX_4.15"),
("riscv", b"__vdso_flush_icache", b"LINUX_4.15"),
("s390", b"__kernel_clock_getres", b"LINUX_2.6.29"),
("s390", b"__kernel_clock_gettime", b"LINUX_2.6.29"),
("s390", b"__kernel_gettimeofday", b"LINUX_2.6.29"),
("superh", b"__kernel_rt_sigreturn", b"LINUX_2.6"),
("superh", b"__kernel_sigreturn", b"LINUX_2.6"),
("superh", b"__kernel_vsyscall", b"LINUX_2.6"),
("i386", b"__kernel_sigreturn", b"LINUX_2.5"),
("i386", b"__kernel_rt_sigreturn", b"LINUX_2.5"),
("i386", b"__kernel_vsyscall", b"LINUX_2.5"),
("i386", b"__vdso_clock_gettime", b"LINUX_2.6"),
("i386", b"__vdso_gettimeofday", b"LINUX_2.6"),
("i386", b"__vdso_time", b"LINUX_2.6"),
("x86-64", b"__vdso_clock_gettime", b"LINUX_2.6"),
("x86-64", b"__vdso_getcpu", b"LINUX_2.6"),
("x86-64", b"__vdso_gettimeofday", b"LINUX_2.6"),
("x86-64", b"__vdso_time", b"LINUX_2.6"),
("x86/32", b"__vdso_clock_gettime", b"LINUX_2.6"),
("x86/32", b"__vdso_getcpu", b"LINUX_2.6"),
("x86/32", b"__vdso_gettimeofday", b"LINUX_2.6"),
("x86/32", b"__vdso_time", b"LINUX_2.6"),
):
if symbol in buf and version in buf:
logger.debug("vdso string: %s %s %s", arch, symbol.decode("ascii"), version.decode("ascii"))
return OS.LINUX
return None
def detect_elf_os(f) -> str:
"""
f: type Union[BinaryIO, IDAIO, GHIDRAIO]
@@ -1023,6 +1523,27 @@ def detect_elf_os(f) -> str:
logger.warning("Error guessing OS from symbol table: %s", e)
symtab_guess = None
try:
goos_guess = guess_os_from_go_buildinfo(elf)
logger.debug("guess: Go buildinfo: %s", goos_guess)
except Exception as e:
logger.warning("Error guessing OS from Go buildinfo: %s", e)
goos_guess = None
try:
gosrc_guess = guess_os_from_go_source(elf)
logger.debug("guess: Go source: %s", gosrc_guess)
except Exception as e:
logger.warning("Error guessing OS from Go source path: %s", e)
gosrc_guess = None
try:
vdso_guess = guess_os_from_vdso_strings(elf)
logger.debug("guess: vdso strings: %s", vdso_guess)
except Exception as e:
logger.warning("Error guessing OS from vdso strings: %s", e)
symtab_guess = None
ret = None
if osabi_guess:
@@ -1046,11 +1567,24 @@ def detect_elf_os(f) -> str:
elif symtab_guess:
ret = symtab_guess
elif goos_guess:
ret = goos_guess
elif gosrc_guess:
# prefer goos_guess to this method,
# which is just string interpretation.
ret = gosrc_guess
elif ident_guess:
# at the bottom because we don't trust this too much
# due to potential for bugs with cross-compilation.
ret = ident_guess
elif vdso_guess:
# at the bottom because this is just scanning strings,
# which isn't very authoritative.
ret = vdso_guess
return ret.value if ret is not None else "unknown"

View File

@@ -10,8 +10,7 @@ import logging
from typing import Tuple, Iterator
from pathlib import Path
from elftools.elf.elffile import ELFFile, SymbolTableSection
from elftools.elf.relocation import RelocationSection
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
import capa.features.extractors.common
from capa.features.file import Export, Import, Section
@@ -47,17 +46,37 @@ def extract_file_export_names(elf: ELFFile, **kwargs):
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue
logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
for symbol in segment.iter_symbols():
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
continue
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
continue
if symbol.entry.st_value == 0:
continue
if symbol.entry.st_shndx == "SHN_UNDEF":
continue
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
def extract_file_import_names(elf: ELFFile, **kwargs):
# Create a dictionary to store symbol names by their index
symbol_names = {}
# Extract symbol names and store them in the dictionary
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue
for _, symbol in enumerate(section.iter_symbols()):
for _, symbol in enumerate(segment.iter_symbols()):
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
@@ -73,21 +92,19 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
symbol_names[_] = symbol.name
for section in elf.iter_sections():
if not isinstance(section, RelocationSection):
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue
if section["sh_entsize"] == 0:
logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
continue
relocation_tables = segment.get_relocation_tables()
logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))
logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())
for relocation in section.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
for relocation_table in relocation_tables.values():
for relocation in relocation_table.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
def extract_file_section_names(elf: ELFFile, **kwargs):

View File

@@ -8,6 +8,7 @@
import sys
import logging
import datetime
import contextlib
from typing import Set, Dict, List, Optional
from pathlib import Path
@@ -154,6 +155,18 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):
viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])
with contextlib.suppress(Exception):
# unfortuately viv raises a raw Exception (not any subclass).
# This happens when the module isn't found, such as with a viv upgrade.
#
# Remove the symbolic switch case solver.
# This is only enabled for ELF files, not PE files.
# During the following performance investigation, this analysis module
# had some terrible worst-case behavior.
# We can put up with slightly worse CFG reconstruction in order to avoid this.
# https://github.com/mandiant/capa/issues/1989#issuecomment-1948022767
vw.delFuncAnalysisModule("vivisect.analysis.generic.symswitchcase")
vw.analyze()
logger.debug("%s", get_meta_str(vw))

View File

@@ -5,7 +5,7 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
__version__ = "7.0.1"
__version__ = "7.1.0"
def get_major_version():

View File

@@ -91,6 +91,12 @@ For more details about creating and using virtual environments, check out the [v
##### Install development dependencies
When developing capa, please use the pinned dependencies found in `requirements.txt`.
This ensures that everyone has the exact same, reproducible environment.
Please install these dependencies before install capa (from source or from PyPI):
`$ pip install -r requirements.txt`
We use the following tools to ensure consistent code style and formatting:
- [black](https://github.com/psf/black) code formatter
- [isort](https://pypi.org/project/isort/) code formatter
@@ -101,7 +107,8 @@ We use the following tools to ensure consistent code style and formatting:
To install these development dependencies, run:
`$ pip install -e /local/path/to/src[dev]`
- `$ pip install -e /local/path/to/src[dev]` or
- `$ pip install -e /local/path/to/src[dev,scripts]` to also install all script dependencies
We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same linters & configuration locally as in CI.

View File

@@ -32,25 +32,76 @@ classifiers = [
"Topic :: Security",
]
dependencies = [
"tqdm==4.66.4",
"pyyaml==6.0.1",
"tabulate==0.9.0",
"colorama==0.4.6",
"termcolor==2.4.0",
"wcwidth==0.2.13",
"ida-settings==2.1.0",
"viv-utils[flirt]==0.7.9",
"networkx==3.1",
"ruamel.yaml==0.18.6",
"vivisect==1.1.1",
"pefile==2023.2.7",
"pyelftools==0.31",
"dnfile==0.14.1",
"dncil==1.0.2",
"pydantic==2.7.1",
"rich==13.7.1",
"humanize==4.9.0",
"protobuf==5.27.0",
# ---------------------------------------
# As a library, capa uses lower version bounds
# when specifying its dependencies. This lets
# other programs that use capa (and other libraries)
# to find a compatible set of dependency versions.
#
# We can optionally pin to specific versions or
# limit the upper bound when there's a good reason;
# but the default is to assume all greater versions
# probably work with capa until proven otherwise.
#
# The following link provides good background:
# https://iscinumpy.dev/post/bound-version-constraints/
#
# When we develop capa, and when we distribute it as
# a standalone binary, we'll use specific versions
# that are pinned in requirements.txt.
# But the requirements for a library are specified here
# and are looser.
#
# Related discussions:
#
# - https://github.com/mandiant/capa/issues/2053
# - https://github.com/mandiant/capa/pull/2059
# - https://github.com/mandiant/capa/pull/2079
#
# ---------------------------------------
# The following dependency versions were imported
# during June 2024 by truncating specific versions to
# their major-most version (major version when possible,
# or minor otherwise).
# As specific constraints are identified, please provide
# comments and context.
"tqdm>=4",
"pyyaml>=6",
"tabulate>=0.9",
"colorama>=0.4",
"termcolor>=2",
"wcwidth>=0.2",
"ida-settings>=2",
"ruamel.yaml>=0.18",
"pefile>=2023.2.7",
"pyelftools>=0.31",
"pydantic>=2",
"rich>=13",
"humanize>=4",
"protobuf>=5",
# ---------------------------------------
# Dependencies that we develop
#
# These dependencies are often actively influenced by capa,
# so we provide a minimum patch version that includes the
# latest bug fixes we need here.
"viv-utils[flirt]>=0.7.9",
"vivisect>=1.1.1",
"dncil>=1.0.2",
# ---------------------------------------
# Dependencies with version caps
#
# These dependencies must not exceed the version cap,
# typically due to dropping support for python releases
# we still support.
# TODO(williballenthin): networkx 3.2 doesn't support python 3.8 while capa does.
# https://github.com/mandiant/capa/issues/1966
"networkx>=3,<3.2",
"dnfile>=0.15.0",
]
dynamic = ["version"]
@@ -63,6 +114,10 @@ namespaces = false
[project.optional-dependencies]
dev = [
# Dev and build dependencies are not relaxed because
# we want all developer environments to be consistent.
# These dependencies are not used in production environments
# and should not conflict with other libraries/tooling.
"pre-commit==3.5.0",
"pytest==8.0.0",
"pytest-sugar==1.0.0",
@@ -79,14 +134,12 @@ dev = [
"flake8-simplify==0.21.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.4.7",
"ruff==0.4.8",
"black==24.4.2",
"isort==5.13.2",
"mypy==1.10.0",
"psutil==5.9.2",
"stix2==3.0.1",
"requests==2.31.0",
"mypy-protobuf==3.6.0",
"PyGithub==2.3.0",
# type stubs for mypy
"types-backports==0.1.3",
"types-colorama==0.4.15.11",
@@ -99,10 +152,21 @@ dev = [
"deptry==0.16.1"
]
build = [
"pyinstaller==6.7.0",
"setuptools==69.5.1",
# Dev and build dependencies are not relaxed because
# we want all developer environments to be consistent.
# These dependencies are not used in production environments
# and should not conflict with other libraries/tooling.
"pyinstaller==6.8.0",
"setuptools==70.0.0",
"build==1.2.1"
]
scripts = [
"jschema_to_python==1.2.3",
"psutil==5.9.2",
"stix2==3.0.1",
"sarif_om==1.0.4",
"requests==2.31.0",
]
[tool.deptry]
extend_exclude = [
@@ -152,6 +216,7 @@ DEP002 = [
"mypy",
"mypy-protobuf",
"pre-commit",
"PyGithub",
"pyinstaller",
"pytest",
"pytest-cov",
@@ -175,6 +240,9 @@ DEP003 = [
"typing_extensions" # TODO(s-ff): remove when Python 3.9 is deprecated, see #1699
]
[tool.deptry.package_module_name_map]
PyGithub = "github"
[project.urls]
Homepage = "https://github.com/mandiant/capa"
Repository = "https://github.com/mandiant/capa.git"

46
requirements.txt Normal file
View File

@@ -0,0 +1,46 @@
# Dependencies with specific version constraints
# used during development and building the standalone executables.
# For these environments, use `pip install -r requirements.txt`
# before installing capa from source/pypi. This will ensure
# the following specific versions are used.
#
# Initially generated via: pip freeze | grep -v -- "-e"
# Kept up to date by dependabot.
annotated-types==0.7.0
colorama==0.4.6
cxxfilt==0.2.2
dncil==1.0.2
dnfile==0.15.0
funcy==2.0
humanize==4.9.0
ida-netnode==3.0
ida-settings==2.1.0
intervaltree==3.1.0
markdown-it-py==3.0.0
mdurl==0.1.2
msgpack==1.0.8
networkx==3.1
pefile==2023.2.7
pip==24.0
protobuf==5.27.1
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.22
pydantic==2.7.3
pydantic-core==2.18.4
pyelftools==0.31
pygments==2.18.0
python-flirt==0.8.10
pyyaml==6.0.1
rich==13.7.1
ruamel-yaml==0.18.6
ruamel-yaml-clib==0.2.8
setuptools==70.0.0
six==1.16.0
sortedcontainers==2.4.0
tabulate==0.9.0
termcolor==2.4.0
tqdm==4.66.4
viv-utils==0.7.11
vivisect==1.1.1
wcwidth==0.2.13

2
rules

Submodule rules updated: 9e0ffdf7c5...e63c454fbb

358
scripts/capa2sarif.py Normal file
View File

@@ -0,0 +1,358 @@
# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
"""
Convert capa json output to sarif schema
usage: capa2sarif.py [-h] [-g] [-r] [-t TAG] [--version] capa_output
Capa to SARIF analysis file
positional arguments:
capa_output Path to capa JSON output file
optional arguments:
-h, --help show this help message and exit
--version show program's version number and exit
-t TAG, --tag TAG filter on rule meta field values (ruleid)
Requires:
- sarif_om 1.0.4
- jschema_to_python 1.2.3
"""
import sys
import json
import logging
import argparse
from typing import List, Optional
from pathlib import Path
from capa.version import __version__
logger = logging.getLogger("capa2sarif")
# Dependencies
try:
from sarif_om import Run, Tool, SarifLog, ToolComponent
except ImportError as e:
logger.error(
"Required import `sarif_om` is not installed. This is solved by installing `python3 -m pip install sarif_om>=1.0.4`. %s",
e,
)
exit(-4)
try:
from jschema_to_python.to_json import to_json
except ImportError as e:
logger.error(
"Required import `jschema_to_python` is not installed. This is solved by installing `python3 -m pip install jschema_to_python>=1.2.3`, %s",
e,
)
exit(-4)
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Capa to SARIF analysis file")
# Positional argument
parser.add_argument("capa_output", help="Path to capa JSON output file")
# Optional arguments
parser.add_argument(
"-g",
"--ghidra-compat",
action="store_true",
help="Compatibility for Ghidra 11.0.X",
)
parser.add_argument(
"-r",
"--radare-compat",
action="store_true",
help="Compatibility for Radare r2sarif plugin v2.0",
)
parser.add_argument("-t", "--tag", help="Filter on rule meta field values (ruleid)")
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
return parser.parse_args()
def main() -> int:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
args = _parse_args()
try:
with Path(args.capa_output).open() as capa_output:
json_data = json.load(capa_output)
except ValueError:
logger.error("Input data was not valid JSON, input should be a capa json output file.")
return -1
except json.JSONDecodeError:
# An exception has occured
logger.error("Input data was not valid JSON, input should be a capa json output file.")
return -2
# Marshall json into Sarif
# Create baseline sarif structure to be populated from json data
sarif_structure: Optional[dict] = _sarif_boilerplate(json_data["meta"], json_data["rules"])
if sarif_structure is None:
logger.error("An Error has occured creating default sarif structure.")
return -3
_populate_artifact(sarif_structure, json_data["meta"])
_populate_invocations(sarif_structure, json_data["meta"])
_populate_results(sarif_structure, json_data["rules"], args.ghidra_compat)
if args.ghidra_compat:
# Ghidra can't handle this structure as of 11.0.x
if "invocations" in sarif_structure["runs"][0]:
del sarif_structure["runs"][0]["invocations"]
# artifacts must include a description as well with a text field.
if "artifacts" in sarif_structure["runs"][0]:
sarif_structure["runs"][0]["artifacts"][0]["description"] = {"text": "placeholder"}
# For better compliance with Ghidra table. Iteraction through properties['additionalProperties']
"""
"additionalProperties": {
"to": "<hex addr>",
"offset": 0,
"primary": true,
"index": <>"",
"kind": "<kind column value>",
"opIndex": 0,
"sourceType": ""
}
"""
if args.radare_compat:
# Add just enough for passing tests
_add_filler_optional(json_data, sarif_structure)
print(json.dumps(sarif_structure, indent=4)) # noqa: T201
return 0
def _sarif_boilerplate(data_meta: dict, data_rules: dict) -> Optional[dict]:
# Only track rules that appear in this log, not full 1k
rules = []
# Parse rules from parsed sarif structure
for key in data_rules:
# Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
# this is not good practice to use long name for ruleID
attack_length = len(data_rules[key]["meta"]["attack"])
mbc_length = len(data_rules[key]["meta"]["mbc"])
if attack_length or mbc_length:
id = (
data_rules[key]["meta"]["attack"][0]["id"]
if attack_length > 0
else data_rules[key]["meta"]["mbc"][0]["id"]
)
else:
id = data_rules[key]["meta"]["name"]
# Append current rule
rules.append(
{
# Default to attack identifier, fall back to MBC, mainly relevant if both are present
"id": id,
"name": data_rules[key]["meta"]["name"],
"shortDescription": {"text": data_rules[key]["meta"]["name"]},
"messageStrings": {"default": {"text": data_rules[key]["meta"]["name"]}},
"properties": {
"namespace": data_rules[key]["meta"]["namespace"] if "namespace" in data_rules[key]["meta"] else [],
"scopes": data_rules[key]["meta"]["scopes"],
"references": data_rules[key]["meta"]["references"],
"lib": data_rules[key]["meta"]["lib"],
},
}
)
tool = Tool(
driver=ToolComponent(
name="Capa",
version=__version__,
information_uri="https://github.com/mandiant/capa",
rules=rules,
)
)
# Create a SARIF Log object, populate with a single run
sarif_log = SarifLog(
version="2.1.0",
schema_uri="https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json",
runs=[Run(tool=tool, results=[], artifacts=[], invocations=[])],
)
# Convert the SARIF log to a dictionary and then to a JSON string
try:
sarif_outline = json.loads(to_json(sarif_log))
except json.JSONDecodeError:
# An exception has occured
return None
return sarif_outline
def _populate_artifact(sarif_log: dict, meta_data: dict) -> None:
"""
@param sarif_log: dict - sarif data structure including runs
@param meta_data: dict - Capa meta output
@returns None, updates sarif_log via side-effects
"""
sample = meta_data["sample"]
artifact = {
"location": {"uri": sample["path"]},
"roles": ["analysisTarget"],
"hashes": {
"md5": sample["md5"],
"sha-1": sample["sha1"],
"sha-256": sample["sha256"],
},
}
sarif_log["runs"][0]["artifacts"].append(artifact)
def _populate_invocations(sarif_log: dict, meta_data: dict) -> None:
"""
@param sarif_log: dict - sarif data structure including runs
@param meta_data: dict - Capa meta output
@returns None, updates sarif_log via side-effects
"""
analysis_time = meta_data["timestamp"]
argv = meta_data["argv"]
analysis = meta_data["analysis"]
invoke = {
"commandLine": "capa " + " ".join(argv),
"arguments": argv if len(argv) > 0 else [],
# Format in Zulu time, this may require a conversion from local timezone
"endTimeUtc": f"{analysis_time}Z",
"executionSuccessful": True,
"properties": {
"format": analysis["format"],
"arch": analysis["arch"],
"os": analysis["os"],
"extractor": analysis["extractor"],
"rule_location": analysis["rules"],
"base_address": analysis["base_address"],
},
}
sarif_log["runs"][0]["invocations"].append(invoke)
def _enumerate_evidence(node: dict, related_count: int) -> List[dict]:
related_locations = []
if node.get("success") and node.get("node", {}).get("type") != "statement":
label = ""
if node.get("node", {}).get("type") == "feature":
if node.get("node", {}).get("feature", {}).get("type") == "api":
label = "api: " + node.get("node", {}).get("feature", {}).get("api")
elif node.get("node", {}).get("feature", {}).get("type") == "match":
label = "match: " + node.get("node", {}).get("feature", {}).get("match")
elif node.get("node", {}).get("feature", {}).get("type") == "number":
label = f"number: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('number')})"
elif node.get("node", {}).get("feature", {}).get("type") == "offset":
label = f"offset: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('offset')})"
elif node.get("node", {}).get("feature", {}).get("type") == "mnemonic":
label = f"mnemonic: {node.get('node', {}).get('feature', {}).get('mnemonic')}"
elif node.get("node", {}).get("feature", {}).get("type") == "characteristic":
label = f"characteristic: {node.get('node', {}).get('feature', {}).get('characteristic')}"
elif node.get("node", {}).get("feature", {}).get("type") == "os":
label = f"os: {node.get('node', {}).get('feature', {}).get('os')}"
elif node.get("node", {}).get("feature", {}).get("type") == "operand number":
label = f"operand: ({node.get('node', {}).get('feature', {}).get('index')} ) {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('operand_number')})"
else:
logger.error(
"Not implemented %s",
node.get("node", {}).get("feature", {}).get("type"),
)
return []
else:
logger.error("Not implemented %s", node.get("node", {}).get("type"))
return []
for loc in node.get("locations", []):
if loc["type"] != "absolute":
continue
related_locations.append(
{
"id": related_count,
"message": {"text": label},
"physicalLocation": {"address": {"absoluteAddress": loc["value"]}},
}
)
related_count += 1
if node.get("success") and node.get("node", {}).get("type") == "statement":
for child in node.get("children", []):
related_locations += _enumerate_evidence(child, related_count)
return related_locations
def _populate_results(sarif_log: dict, data_rules: dict, ghidra_compat: bool) -> None:
"""
@param sarif_log: dict - sarif data structure including runs
@param meta_data: dict - Capa meta output
@returns None, updates sarif_log via side-effects
"""
results = sarif_log["runs"][0]["results"]
# Parse rules from parsed sarif structure
for key in data_rules:
# Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID
# this is not good practice to use long name for ruleID.
attack_length = len(data_rules[key]["meta"]["attack"])
mbc_length = len(data_rules[key]["meta"]["mbc"])
if attack_length or mbc_length:
id = (
data_rules[key]["meta"]["attack"][0]["id"]
if attack_length > 0
else data_rules[key]["meta"]["mbc"][0]["id"]
)
else:
id = data_rules[key]["meta"]["name"]
for address, details in data_rules[key]["matches"]:
related_cnt = 0
related_locations = _enumerate_evidence(details, related_cnt)
res = {
"ruleId": id,
"level": "none" if not ghidra_compat else "NONE",
"message": {"text": data_rules[key]["meta"]["name"]},
"kind": "informational" if not ghidra_compat else "INFORMATIONAL",
"locations": [
{
"physicalLocation": {
"address": {
"absoluteAddress": address["value"],
}
},
}
],
}
if not ghidra_compat:
res["relatedLocations"] = related_locations
results.append(res)
def _add_filler_optional(capa_result: dict, sarif_log: dict) -> None:
"""Update sarif file with just enough fields to pass radare tests"""
base_address = capa_result["meta"]["analysis"]["base_address"]["value"]
# Assume there is only one run, and one binary artifact
artifact = sarif_log["runs"][0]["artifacts"][0]
if "properties" not in artifact:
artifact["properties"] = {}
if "additionalProperties" not in artifact["properties"]:
artifact["properties"]["additionalProperties"] = {}
if "imageBase" not in artifact["properties"]["additionalProperties"]:
artifact["properties"]["additionalProperties"]["imageBase"] = base_address
if __name__ == "__main__":
sys.exit(main())

View File

@@ -389,6 +389,8 @@ def get_data_path_by_name(name) -> Path:
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
elif name.startswith("1038a2"):
return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_"
elif name.startswith("3da7c"):
return CD / "data" / "3da7c2c70a2d93ac4643f20339d5c7d61388bddd77a4a5fd732311efad78e535.elf_"
elif name.startswith("nested_typedef"):
return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_"
elif name.startswith("nested_typeref"):

View File

@@ -14,17 +14,11 @@ from capa.features.extractors.elffile import extract_file_export_names, extract_
CD = Path(__file__).resolve().parent
SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"
STRIPPED_SAMPLE_PATH = CD / "data" / "bb38149ff4b5c95722b83f24ca27a42b.elf_"
def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
path = Path(SAMPLE_PATH)
def check_import_features(sample_path, expected_imports):
path = Path(sample_path)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
imports = list(extract_file_import_names(elf))
@@ -40,6 +34,52 @@ def test_elffile_import_features():
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."
def check_export_features(sample_path, expected_exports):
path = Path(sample_path)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))
# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]
# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
def test_stripped_elffile_import_features():
expected_imports = ["__cxa_atexit", "__cxa_finalize", "__stack_chk_fail", "fclose", "fopen", "__android_log_print"]
check_import_features(STRIPPED_SAMPLE_PATH, expected_imports)
def test_stripped_elffile_export_features():
expected_exports = [
"_ZN7_JNIEnv14GetArrayLengthEP7_jarray",
"Java_o_ac_a",
"Java_o_ac_b",
"_Z6existsPKc",
"_ZN7_JNIEnv17GetStringUTFCharsEP8_jstringPh",
"_ZN7_JNIEnv21GetObjectArrayElementEP13_jobjectArrayi",
"_ZN7_JNIEnv21ReleaseStringUTFCharsEP8_jstringPKc",
]
check_export_features(STRIPPED_SAMPLE_PATH, expected_exports)
def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
check_import_features(SAMPLE_PATH, expected_imports)
def test_elffile_export_features():
expected_exports = [
"deregister_tm_clones",
@@ -55,17 +95,4 @@ def test_elffile_export_features():
"_IO_stdin_used",
"__libc_csu_init",
]
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))
# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."
# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]
# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
check_export_features(SAMPLE_PATH, expected_exports)

View File

@@ -92,6 +92,12 @@ def test_elf_android_notes():
assert capa.features.extractors.elf.detect_elf_os(f) == "android"
def test_elf_go_buildinfo():
path = get_data_path_by_name("3da7c")
with Path(path).open("rb") as f:
assert capa.features.extractors.elf.detect_elf_os(f) == "linux"
def test_elf_parse_capa_pyinstaller_header():
# error after misidentified large pydata section with address 0; fixed in #1454
# compressed ELF header of capa-v5.1.0-linux

View File

@@ -40,6 +40,10 @@ def get_rule_path():
[
pytest.param("capa2yara.py", [get_rules_path()]),
pytest.param("capafmt.py", [get_rule_path()]),
pytest.param(
"capa2sarif.py",
[Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"],
),
# testing some variations of linter script
pytest.param("lint.py", ["-t", "create directory", get_rules_path()]),
# `create directory` rule has native and .NET example PEs

355
webui/assets/css/style.css Executable file
View File

@@ -0,0 +1,355 @@
/*
* Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at: [package root]/LICENSE.txt
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
/* TODO(s-ff): simplfy and refactor the CSS styling */
/* TODO(s-ff): change font, do not use Product Sans */
@font-face {
font-family: 'Product Sans';
font-style: normal;
src: local('Open Sans'), local('OpenSans'), url(https://fonts.gstatic.com/s/productsans/v5/HYvgU2fE2nRJvZ5JFAumwegdm0LZdjqr5-oayXSOefg.woff2) format('woff2');
}
/* Reset and general styles */
*,
*::before,
*::after {
box-sizing: border-box;
}
input {
font: inherit;
}
fieldset {
padding: 16px;
display: grid;
gap: 16px;
}
legend {
font-weight: 700;
}
html {
font-family: 'Product Sans', sans-serif;
}
body {
max-width: 95%;
margin: 0 auto;
padding: 8px;
}
/* Tree styles */
.tree {
--font-size: 1rem;
--line-height: 1.75;
--spacing: calc(var(--line-height) * 1em);
--thickness: 0.35px;
--radius: 8px;
line-height: var(--line-height);
font-size: var(--font-size);
}
.tree li {
display: block;
position: relative;
padding-left: calc(2 * var(--spacing) - var(--radius) - var(--thickness));
}
.tree ul {
margin-left: calc(var(--radius) - var(--spacing));
padding-left: 0;
}
.tree ul li {
border-left: var(--thickness) solid grey;
}
.tree ul li:last-child {
border-color: transparent;
}
.tree ul li::before {
content: "";
display: block;
position: absolute;
top: calc(var(--spacing) / -2);
left: calc(-1 * var(--thickness));
width: calc(var(--spacing) + var(--thickness));
height: calc(var(--spacing) + var(--thickness) / 2);
border: solid grey;
border-width: 0 0 var(--thickness) var(--thickness);
}
.tree summary {
display: block;
cursor: pointer;
}
.tree summary::marker,
.tree summary::-webkit-details-marker {
display: none;
}
.tree summary:focus {
outline: none;
}
.tree summary:focus-visible {
outline: var(--thickness) dotted #000;
}
.tree li::after,
.tree summary::before {
content: "";
display: block;
position: absolute;
top: calc(var(--spacing) / 2 - var(--radius));
left: calc(var(--spacing) - var(--radius) - var(--thickness) / 2);
width: calc(2 * var(--radius));
height: calc(2 * var(--radius));
border-radius: 50%;
background: #ddd;
}
.tree summary::before {
content: "+";
z-index: 1;
background: #7fa0bb;
color: white;
line-height: calc(2 * var(--radius));
text-align: center;
}
.tree summary .summary-content {
display: flex;
justify-content: space-between;
align-items: center;
}
.tree summary .summary-content .statement-type,
.feature-type {
background-color: #f0f0f0;
border-radius: 4px;
color: #000;
display: inline-block;
font-size: 0.7em;
padding: 1px 6px;
cursor: pointer;
}
.tree summary .summary-content .rule-title,
.feature-value {
margin-left: 1rem;
flex-grow: 1;
}
.feature-value {
color: green;
}
.feature-location {
margin-left: 5px;
font-size: 0.8em;
color: #000;
}
.tree summary .summary-content .namespace {
background-color: #f0f0f0;
border-radius: 4px;
color: #000;
display: inline-block;
font-size: 0.75em;
padding: 2px 6px;
cursor: pointer;
content: "";
}
.tree details[open] > summary::before {
content: "";
}
/* Rule source tooltip styles */
.tree .rule-title {
position: relative;
}
.tree .rule-title:hover::after {
content: attr(source);
font-family: 'Courier New', monospace;
font-size: 0.7em;
line-height: 1.2em;
color: black;
width: max-content;
padding: 8px;
border-radius: 6px;
background: #eee;
position: absolute;
left: 350px;
z-index: 1000;
white-space: pre-wrap;
opacity: 0;
}
.tree .rule-title:hover::after {
opacity: 1;
}
.tree .rule-title:hover::before {
display: none;
}
/* Controls styles */
.controls {
display: flex;
align-items: center;
margin-bottom: 10px;
}
.hover-toggle-label {
margin-right: 20px;
}
#searchContainer {
flex-grow: 1;
}
#searchContainer input {
width: 100%;
padding: 5px;
}
/* Metadata table styles */
.metadata-container {
margin-bottom: 20px;
}
.metadata-table {
border-collapse: collapse;
table-layout: fixed;
width: 100%;
}
.metadata-table td {
border: 1px solid #ddd;
padding: 8px;
}
.metadata-table tr:nth-child(even) {
background-color: #f2f2f2;
}
.metadata-table tr:hover {
background-color: #ddd;
}
.metadata-table td:first-child {
font-weight: bold;
width: 140px;
}
/* Banner styles */
.banner {
background-color: #007bff;
margin: 7px 0;
display: flex;
justify-content: space-between;
align-items: center;
padding: 5px 15px;
border-radius: 5px;
text-align: left;
color: #fff;
font-size: 14px;
font-family: 'Product-Sans', sans-serif;
cursor: initial;
}
.bannerContent {
padding-right: 15px;
}
.bannerLinks {
display: flex;
align-items: center;
border-left: 2px solid #ffffffad;
padding-left: 15px;
cursor: initial;
}
.banner a {
display: inline;
color: #fff;
text-decoration-color: #fff;
}
.banner a:hover {
color: #343a40;
opacity: 0.8;
}
/* Matched rule styles */
.matched-rule li {
position: relative;
}
.matched-rule .feature-location {
position: absolute;
right: 580px;
top: 50%;
transform: translateY(-50%);
font-size: 0.8em;
color: #000;
}
/* Tree table styles */
.tree-table {
width: 100%;
border-collapse: collapse;
}
.tree-table th {
padding: 8px;
text-align: left;
vertical-align: top;
}
.tree-table th {
background-color: #f0f0f0;
font-weight: bold;
}
.tree-table th:nth-child(1) {
width: 60%;
}
.tree-table th:nth-child(2),
.tree-table th:nth-child(3) {
width: 20%;
border-left: 1px solid #ddd;
}
.tree-table .tree {
margin-top: 0;
}
/* Button styles */
button {
background-color: #f0f0f0;
border: none;
border-radius: 4px;
color: #000;
cursor: pointer;
font-size: 0.9em;
margin-left: 10px;
padding: 5px 10px;
}

142
webui/index.html Executable file
View File

@@ -0,0 +1,142 @@
<!--
Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at: [package root]/LICENSE.txt
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
-->
<html>
<head>
<!-- TODO(s-ff): include a favicon -->
<title>CAPA WebUI</title>
<meta charset="utf-8" />
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="assets/css/style.css">
<script src="scripts/main.js"> </script>
<script>
// TODO(s-ff): the search now only search for feature value, fix it to
// to also search for the rule titles.
// unstable
function searchTree(node, keyword) {
if (node.nodeType === Node.ELEMENT_NODE) {
if (node.classList.contains('rule-title') || node.classList.contains('feature-value')) {
const textContent = node.textContent.toLowerCase();
if (textContent.includes(keyword.toLowerCase())) {
node.closest('.matched-rule').style.display = 'block';
expandParentDetails(node);
} else {
node.closest('.matched-rule').style.display = 'none';
}
}
for (const child of node.childNodes) {
searchTree(child, keyword);
}
}
}
function expandParentDetails(node) {
const parentDetails = node.closest('details');
if (parentDetails) {
parentDetails.open = true;
expandParentDetails(parentDetails.parentNode);
}
}
function collapseAllDetails() {
const detailsElements = document.querySelectorAll('details');
detailsElements.forEach(details => {
details.open = false;
});
}
function expandAllDetails() {
const detailsElements = document.querySelectorAll('details');
detailsElements.forEach(details => {
details.open = true;
});
}
function toggleExpandCollapse() {
const togglebutton = document.getElementById('toggleExpand');
const tree = document.getElementById('tree');
if (togglebutton.textContent === 'Expand All') {
expandAllDetails();
togglebutton.textContent = 'Collapse All';
} else {
collapseAllDetails();
togglebutton.textContent = 'Expand All';
}
}
// unstable
function search(value) {
const tree = document.getElementById('tree');
if (value.trim() === '') {
// If the search bar is cleared, reset the tree to its initial state
const matchedRules = document.querySelectorAll('.matched-rule');
matchedRules.forEach(rule => {
rule.style.display = 'block';
});
collapseAllDetails();
} else {
searchTree(tree, value);
}
}
document.addEventListener('DOMContentLoaded', function() {
document.getElementById('toggleExpand').addEventListener('click', toggleExpandCollapse);
});
</script>
</head>
<body>
<header class="header">
<div class="banner">
<span class="bannerContent" role="alert">This is a pre-alpha release of capa-webui. Please report any bugs, enhacements, or features in the Github issues</span>
<div class="bannerLinks">
<div>
<a href="https://github.com/mandiant/capa">CAPA v7.0.1 on Github</a>
</div>
</div>
</div>
</header>
<h1></h1>
<!-- TODO(s-ff): these controls are not prefectly aligned -->
<div class="controls">
<button id="toggleExpand">Expand All</button>
<!-- TODO(s-ff): allow users to disable showing rule logic on hover -->
<label class="hover-toggle-label">
<input type="checkbox" id="enableHover" checked>
Show rule logic on hover
</label>
<div id="searchContainer">
<input onkeyup="search(this.value)" type="text" placeholder="Type to search... (unstable)" />
</div>
</div>
<table class="tree-table">
<thead>
<tr>
<th>Rule Title</th>
<th>Feature Address</th>
<th>Namespace</th>
</tr>
</thead>
</table>
<ul class="tree" id="tree">
<!-- This will contain the rendered JSON capa result document -->
</ul>
</body>
</html>

330
webui/scripts/main.js Executable file
View File

@@ -0,0 +1,330 @@
/*
* Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at: [package root]/LICENSE.txt
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
// TODO(s-ff): simply all the functions and introduce smaller helper functions
// for creating elements, i.e. createOrStatement, isRangeStatement, .. etc.
/**
* Renders the JSON data representing the CAPA results into an HTML tree structure.
* @param {Object} json - The JSON data containing the CAPA results.
*/
function renderJSON(json) {
const tree = document.getElementById('tree');
// Iterate over each rule in the JSON
for (const ruleName in json.rules) {
const rule = json.rules[ruleName];
// Create a list item for the matched rule
const ruleElement = document.createElement('li');
ruleElement.className = 'matched-rule';
ruleElement.setAttribute('name', ruleName);
// Create a details element for the rule
const ruleDetails = document.createElement('details');
// Create a summary element for the rule
const ruleSummary = document.createElement('summary');
// Create a div for the summary content
const ruleSummaryContent = document.createElement('div');
ruleSummaryContent.className = 'summary-content';
// Add the statement type, rule title, and namespace to the summary content
const statementType = document.createElement('span');
statementType.className = 'statement-type';
statementType.textContent = 'rule';
ruleSummaryContent.appendChild(statementType);
const ruleTitle = document.createElement('span');
ruleTitle.className = 'rule-title';
ruleTitle.setAttribute('source', rule.source);
ruleTitle.textContent = rule.meta.name;
ruleSummaryContent.appendChild(ruleTitle);
const namespace = document.createElement('span');
namespace.className = 'namespace';
//namespace.textContent = rule.meta.namespace;
if (rule.meta.namespace) {
namespace.textContent = rule.meta.namespace;
} else {
namespace.style.display = 'none';
}
ruleSummaryContent.appendChild(namespace);
// Append the summary content to the summary element
ruleSummary.appendChild(ruleSummaryContent);
// Append the summary element to the details element
ruleDetails.appendChild(ruleSummary);
// Create a list for the rule's matches
const matchesList = document.createElement('ul');
// Iterate over each match in the rule
for (const match of rule.matches) {
// Render the match and its children recursively
const matchElement = renderMatch(match);
matchesList.appendChild(matchElement);
}
// Append the matches list to the details element
ruleDetails.appendChild(matchesList);
// Append the rule details to the rule list item
ruleElement.appendChild(ruleDetails);
// Append the rule list item to the tree
tree.appendChild(ruleElement);
}
}
/**
* Recursively renders a match and its children into an HTML structure.
* @param {Object} match - The match object to be rendered.
* @returns {HTMLElement} The rendered match element.
*/
function renderMatch(match) {
// Create a list item for the match
const matchElement = document.createElement('li');
matchElement.className = 'match-container';
// Check if the match is a range statement
if (match[1].node.type === 'statement' && match[1].node.statement.type === 'range') {
const rangeElement = document.createElement('li');
rangeElement.className = 'match-container';
const rangeType = document.createElement('span');
rangeType.className = 'feature-type';
rangeType.textContent = `count(${match[1].node.statement.child.type}(${match[1].node.statement.child[match[1].node.statement.child.type]}))`;
rangeElement.appendChild(rangeType);
const rangeValue = document.createElement('span');
rangeValue.className = 'feature-value';
const minima = match[1].node.statement.min
const maxima = match[1].node.statement.max
if (minima == maxima) {
rangeValue.textContent = `${minima}`;
rangeType.textContent = `count(${match[1].node.statement.child.type})`;
} else {
rangeValue.textContent = `${minima} or more`;
}
rangeElement.appendChild(rangeValue);
const rangeLocation = document.createElement('span');
rangeLocation.className = 'feature-location';
if (match[1].locations[0].value) {
const locations = match[1].locations.map(loc => '0x' + loc.value.toString(16).toUpperCase());
rangeLocation.textContent = locations.join(', ');
}
rangeElement.appendChild(rangeLocation);
return rangeElement;
}
// If the match is a feature, render its type and value
if (match[1].node.type === 'feature') {
const featureType = document.createElement('span');
featureType.className = 'feature-type';
featureType.textContent = match[1].node.feature.type;
matchElement.appendChild(featureType);
const featureValue = document.createElement('span');
featureValue.className = 'feature-value';
featureValue.textContent = match[1].node.feature[match[1].node.feature.type];
matchElement.appendChild(featureValue);
const featureLocation = document.createElement('span');
featureLocation.className = 'feature-location';
if (match[1].locations[0].value) {
const locations = match[1].locations.map(loc => '0x' + loc.value.toString(16).toUpperCase());
featureLocation.textContent = locations.join(', ');
}
matchElement.appendChild(featureLocation);
} else {
// Check if the match is an optional statement - these always have `success: true`.
const isOptional = match[1].node.statement.type === 'optional';
// If it's an optional statement, check if any of its children have success set to true
if (isOptional) {
const hasSuccessfulChild = match[1].children.some(child => child.success);
// If none of the children have success set to true, don't render the optional statement
if (!hasSuccessfulChild) {
return null;
}
}
// Create a details element for the match
const matchDetails = document.createElement('details');
// Create a summary element for the match
const matchSummary = document.createElement('summary');
// Create a div for the summary content
const matchSummaryContent = document.createElement('div');
matchSummaryContent.className = 'summary-content';
// Add the statement type to the summary content
const statementType = document.createElement('span');
statementType.className = 'statement-type';
statementType.textContent = match[1].node.statement.type;
matchSummaryContent.appendChild(statementType);
// Append the summary content to the summary element
matchSummary.appendChild(matchSummaryContent);
// Append the summary element to the details element
matchDetails.appendChild(matchSummary);
// Create a list for the match's children
const childrenList = document.createElement('ul');
// Iterate over each child in the match
for (const child of match[1].children) {
// Recursively render the child if it is successful
if (child.success) {
const childElement = renderMatch([null, child]);
if (childElement !== null) {
childrenList.appendChild(childElement);
}
}
}
// Append the children list to the details element
matchDetails.appendChild(childrenList);
// Append the match details to the match list item
matchElement.appendChild(matchDetails);
}
return matchElement;
}
/**
* Determines the statement type based on the feature object.
* @param {Object} feature - The feature object.
* @returns {string} The statement type.
*/
function getStatementType(feature) {
if (feature.type === 'and' || feature.type === 'or' || feature.type === 'not') {
return feature.type;
} else if (feature.match) {
return 'match';
} else {
return 'feature';
}
}
/**
* Checks if the given feature is a leaf feature (i.e., not a compound feature).
* @param {Object} feature - The feature object.
* @returns {boolean} True if the feature is a leaf feature, false otherwise.
*/
function isLeafFeature(feature) {
return feature.type !== 'and' && feature.type !== 'or' && feature.type !== 'not' && !feature.match;
}
/**
* Adds a metadata table to the document body.
* @param {Object} metadata - The metadata object containing sample and analysis information.
*/
function addMetadataTable(metadata) {
const metadataContainer = document.createElement("div");
metadataContainer.className = "metadata-container";
const table = document.createElement("table");
table.className = "metadata-table";
const rows = [{
key: "MD5",
value: metadata.sample.md5
},
{
key: "SHA1",
value: metadata.sample.sha1
},
{
key: "SHA256",
value: metadata.sample.sha256
},
{
key: "Extractor",
value: metadata.analysis.extractor
},
{
key: "Analysis",
value: metadata.flavor
},
{
key: "OS",
value: metadata.analysis.os
},
{
key: "Format",
value: metadata.analysis.format
},
{
key: "Arch",
value: metadata.analysis.arch
},
{
key: "Path",
value: metadata.sample.path
},
{
key: "Base Address",
value: "0x" + metadata.analysis.base_address.value.toString(16)
},
{
key: "Version",
value: metadata.version
},
{
key: "Timestamp",
value: metadata.timestamp
},
{
key: "Function Count",
value: Object.keys(metadata.analysis.feature_counts).length
}
];
rows.forEach((row) => {
const tr = document.createElement("tr");
const keyTd = document.createElement("td");
keyTd.textContent = row.key;
tr.appendChild(keyTd);
const valueTd = document.createElement("td");
valueTd.textContent = row.value;
tr.appendChild(valueTd);
table.appendChild(tr);
});
metadataContainer.appendChild(table);
document.body.insertBefore(metadataContainer, document.querySelector("h1"));
}
// TODO(s-ff): introduce a "Upload from local" and "Load from URL" options
/* For now we are using a static al-khaser_64.exe rdoc for testing */
let url = 'https://raw.githubusercontent.com/mandiant/capa-testfiles/master/rd/al-khaser_x64.exe_.json';
fetch(url)
.then(res => res.json())
.then(result_document => {
addMetadataTable(result_document.meta);
renderJSON(result_document);
})
.catch(err => {
throw err
});