mirror of
https://github.com/mandiant/capa.git
synced 2026-02-04 19:12:01 -08:00
Merge branch 'master' into Aayush-Goel-04/Issue#322
This commit is contained in:
10
CHANGELOG.md
10
CHANGELOG.md
@@ -5,16 +5,24 @@
|
||||
### New Features
|
||||
- ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan
|
||||
- ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff
|
||||
- binja: add support for forwarded exports #1646 @xusheng6
|
||||
- binja: add support for symtab names #1504 @xusheng6
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
### New Rules (1)
|
||||
### New Rules (4)
|
||||
|
||||
- nursery/get-ntoskrnl-base-address @mr-tz
|
||||
- host-interaction/network/connectivity/set-tcp-connection-state @johnk3r
|
||||
- nursery/capture-process-snapshot-data @mr-tz
|
||||
- collection/network/capture-packets-using-sharppcap jakub.jozwiak@mandiant.com
|
||||
-
|
||||
|
||||
### Bug Fixes
|
||||
- ghidra: fix ints_to_bytes performance #1761 @mike-hunhoff
|
||||
- binja: improve function call site detection @xusheng6
|
||||
- binja: use binaryninja.load to open files @xusheng6
|
||||
- binja: bump binja version to 3.5 #1789 @xusheng6
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
[](https://pypi.org/project/flare-capa)
|
||||
[](https://github.com/mandiant/capa/releases)
|
||||
[](https://github.com/mandiant/capa-rules)
|
||||
[](https://github.com/mandiant/capa-rules)
|
||||
[](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
|
||||
[](https://github.com/mandiant/capa/releases)
|
||||
[](LICENSE.txt)
|
||||
|
||||
@@ -17,7 +17,7 @@ import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section, FunctionName
|
||||
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.binja.helpers import unmangle_c_name
|
||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||
|
||||
|
||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[int, int]]:
|
||||
@@ -82,6 +82,24 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
||||
if name != unmangled_name:
|
||||
yield Export(unmangled_name), AbsoluteVirtualAddress(sym.address)
|
||||
|
||||
for sym in bv.get_symbols_of_type(SymbolType.DataSymbol):
|
||||
if sym.binding not in [SymbolBinding.GlobalBinding]:
|
||||
continue
|
||||
|
||||
name = sym.short_name
|
||||
if not name.startswith("__forwarder_name"):
|
||||
continue
|
||||
|
||||
# Due to https://github.com/Vector35/binaryninja-api/issues/4641, in binja version 3.5, the symbol's name
|
||||
# does not contain the DLL name. As a workaround, we read the C string at the symbol's address, which contains
|
||||
# both the DLL name and the function name.
|
||||
# Once the above issue is closed in the next binjs stable release, we can update the code here to use the
|
||||
# symbol name directly.
|
||||
name = read_c_string(bv, sym.address, 1024)
|
||||
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(name)
|
||||
yield Export(forwarded_name), AbsoluteVirtualAddress(sym.address)
|
||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
|
||||
|
||||
|
||||
def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""extract function imports
|
||||
@@ -125,15 +143,17 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre
|
||||
"""
|
||||
for sym_name in bv.symbols:
|
||||
for sym in bv.symbols[sym_name]:
|
||||
if sym.type == SymbolType.LibraryFunctionSymbol:
|
||||
name = sym.short_name
|
||||
yield FunctionName(name), sym.address
|
||||
if name.startswith("_"):
|
||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||
# extract features for both the mangled and un-mangled representations.
|
||||
# e.g. `_fwrite` -> `fwrite`
|
||||
# see: https://stackoverflow.com/a/2628384/87207
|
||||
yield FunctionName(name[1:]), sym.address
|
||||
if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]:
|
||||
continue
|
||||
|
||||
name = sym.short_name
|
||||
yield FunctionName(name), sym.address
|
||||
if name.startswith("_"):
|
||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||
# extract features for both the mangled and un-mangled representations.
|
||||
# e.g. `_fwrite` -> `fwrite`
|
||||
# see: https://stackoverflow.com/a/2628384/87207
|
||||
yield FunctionName(name[1:]), sym.address
|
||||
|
||||
|
||||
def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
@@ -7,8 +7,9 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from binaryninja import Function, BinaryView, LowLevelILOperation
|
||||
from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation
|
||||
|
||||
from capa.features.file import FunctionName
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors import loops
|
||||
@@ -23,13 +24,27 @@ def extract_function_calls_to(fh: FunctionHandle):
|
||||
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
||||
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
||||
# considered a caller to the function
|
||||
if caller.llil is not None and caller.llil.operation in [
|
||||
llil = caller.llil
|
||||
if (llil is None) or llil.operation not in [
|
||||
LowLevelILOperation.LLIL_CALL,
|
||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||
LowLevelILOperation.LLIL_JUMP,
|
||||
LowLevelILOperation.LLIL_TAILCALL,
|
||||
]:
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address)
|
||||
continue
|
||||
|
||||
if llil.dest.value.type not in [
|
||||
RegisterValueType.ImportedAddressValue,
|
||||
RegisterValueType.ConstantValue,
|
||||
RegisterValueType.ConstantPointerValue,
|
||||
]:
|
||||
continue
|
||||
|
||||
address = llil.dest.value.value
|
||||
if address != func.start:
|
||||
continue
|
||||
|
||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address)
|
||||
|
||||
|
||||
def extract_function_loop(fh: FunctionHandle):
|
||||
@@ -59,10 +74,31 @@ def extract_recursive_call(fh: FunctionHandle):
|
||||
yield Characteristic("recursive call"), fh.address
|
||||
|
||||
|
||||
def extract_function_name(fh: FunctionHandle):
|
||||
"""extract function names (e.g., symtab names)"""
|
||||
func: Function = fh.inner
|
||||
bv: BinaryView = func.view
|
||||
if bv is None:
|
||||
return
|
||||
|
||||
for sym in bv.get_symbols(func.start):
|
||||
if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]:
|
||||
continue
|
||||
|
||||
name = sym.short_name
|
||||
yield FunctionName(name), sym.address
|
||||
if name.startswith("_"):
|
||||
# some linkers may prefix linked routines with a `_` to avoid name collisions.
|
||||
# extract features for both the mangled and un-mangled representations.
|
||||
# e.g. `_fwrite` -> `fwrite`
|
||||
# see: https://stackoverflow.com/a/2628384/87207
|
||||
yield FunctionName(name[1:]), sym.address
|
||||
|
||||
|
||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
for func_handler in FUNCTION_HANDLERS:
|
||||
for feature, addr in func_handler(fh):
|
||||
yield feature, addr
|
||||
|
||||
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
|
||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
|
||||
|
||||
@@ -9,7 +9,7 @@ import re
|
||||
from typing import List, Callable
|
||||
from dataclasses import dataclass
|
||||
|
||||
from binaryninja import LowLevelILInstruction
|
||||
from binaryninja import BinaryView, LowLevelILInstruction
|
||||
from binaryninja.architecture import InstructionTextToken
|
||||
|
||||
|
||||
@@ -51,3 +51,19 @@ def unmangle_c_name(name: str) -> str:
|
||||
return match.group(1)
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
||||
s: List[str] = []
|
||||
while len(s) < max_len:
|
||||
try:
|
||||
c = bv.read(offset + len(s), 1)[0]
|
||||
except Exception:
|
||||
break
|
||||
|
||||
if c == 0:
|
||||
break
|
||||
|
||||
s.append(chr(c))
|
||||
|
||||
return "".join(s)
|
||||
|
||||
@@ -94,28 +94,32 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
||||
candidate_addrs.append(stub_addr)
|
||||
|
||||
for address in candidate_addrs:
|
||||
sym = func.view.get_symbol_at(address)
|
||||
if sym is None or sym.type not in [SymbolType.ImportAddressSymbol, SymbolType.ImportedFunctionSymbol]:
|
||||
continue
|
||||
for sym in func.view.get_symbols(address):
|
||||
if sym is None or sym.type not in [
|
||||
SymbolType.ImportAddressSymbol,
|
||||
SymbolType.ImportedFunctionSymbol,
|
||||
SymbolType.FunctionSymbol,
|
||||
]:
|
||||
continue
|
||||
|
||||
sym_name = sym.short_name
|
||||
sym_name = sym.short_name
|
||||
|
||||
lib_name = ""
|
||||
import_lib = bv.lookup_imported_object_library(sym.address)
|
||||
if import_lib is not None:
|
||||
lib_name = import_lib[0].name
|
||||
if lib_name.endswith(".dll"):
|
||||
lib_name = lib_name[:-4]
|
||||
elif lib_name.endswith(".so"):
|
||||
lib_name = lib_name[:-3]
|
||||
lib_name = ""
|
||||
import_lib = bv.lookup_imported_object_library(sym.address)
|
||||
if import_lib is not None:
|
||||
lib_name = import_lib[0].name
|
||||
if lib_name.endswith(".dll"):
|
||||
lib_name = lib_name[:-4]
|
||||
elif lib_name.endswith(".so"):
|
||||
lib_name = lib_name[:-3]
|
||||
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name):
|
||||
yield API(name), ih.address
|
||||
|
||||
if sym_name.startswith("_"):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name):
|
||||
yield API(name), ih.address
|
||||
|
||||
if sym_name.startswith("_"):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]):
|
||||
yield API(name), ih.address
|
||||
|
||||
|
||||
def extract_insn_number_features(
|
||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||
|
||||
@@ -558,7 +558,8 @@ def get_extractor(
|
||||
sys.path.append(str(bn_api))
|
||||
|
||||
try:
|
||||
from binaryninja import BinaryView, BinaryViewType
|
||||
import binaryninja
|
||||
from binaryninja import BinaryView
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"Cannot import binaryninja module. Please install the Binary Ninja Python API first: "
|
||||
@@ -568,7 +569,7 @@ def get_extractor(
|
||||
import capa.features.extractors.binja.extractor
|
||||
|
||||
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
|
||||
bv: BinaryView = BinaryViewType.get_view_of_file(str(path))
|
||||
bv: BinaryView = binaryninja.load(str(path))
|
||||
if bv is None:
|
||||
raise RuntimeError(f"Binary Ninja cannot open file {path}")
|
||||
|
||||
|
||||
BIN
doc/capa_quickstart.pdf
Normal file
BIN
doc/capa_quickstart.pdf
Normal file
Binary file not shown.
@@ -45,7 +45,7 @@ dependencies = [
|
||||
"ruamel.yaml==0.17.32",
|
||||
"vivisect==1.1.1",
|
||||
"pefile==2023.2.7",
|
||||
"pyelftools==0.29",
|
||||
"pyelftools==0.30",
|
||||
"dnfile==0.13.0",
|
||||
"dncil==1.0.2",
|
||||
"pydantic==2.1.1",
|
||||
@@ -62,7 +62,7 @@ packages = ["capa"]
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pre-commit==3.4.0",
|
||||
"pytest==7.4.0",
|
||||
"pytest==7.4.2",
|
||||
"pytest-sugar==0.9.7",
|
||||
"pytest-instafail==0.5.0",
|
||||
"pytest-cov==4.1.0",
|
||||
@@ -77,7 +77,7 @@ dev = [
|
||||
"flake8-simplify==0.20.0",
|
||||
"flake8-use-pathlib==0.3.0",
|
||||
"flake8-copyright==0.2.4",
|
||||
"ruff==0.0.286",
|
||||
"ruff==0.0.291",
|
||||
"black==23.7.0",
|
||||
"isort==5.11.4",
|
||||
"mypy==1.5.1",
|
||||
@@ -98,7 +98,7 @@ dev = [
|
||||
build = [
|
||||
"pyinstaller==5.10.1",
|
||||
"setuptools==68.0.0",
|
||||
"build==0.10.0"
|
||||
"build==1.0.3"
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: eba332e702...a1e83cf147
Submodule tests/data updated: faf741a538...87bd888e19
@@ -159,7 +159,8 @@ def get_dnfile_extractor(path: Path):
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_binja_extractor(path: Path):
|
||||
from binaryninja import Settings, BinaryViewType
|
||||
import binaryninja
|
||||
from binaryninja import Settings
|
||||
|
||||
import capa.features.extractors.binja.extractor
|
||||
|
||||
@@ -168,7 +169,7 @@ def get_binja_extractor(path: Path):
|
||||
if path.name.endswith("kernel32-64.dll_"):
|
||||
old_pdb = settings.get_bool("pdb.loadGlobalSymbols")
|
||||
settings.set_bool("pdb.loadGlobalSymbols", False)
|
||||
bv = BinaryViewType.get_view_of_file(str(path))
|
||||
bv = binaryninja.load(str(path))
|
||||
if path.name.endswith("kernel32-64.dll_"):
|
||||
settings.set_bool("pdb.loadGlobalSymbols", old_pdb)
|
||||
|
||||
|
||||
@@ -36,16 +36,10 @@ except ImportError:
|
||||
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
|
||||
@fixtures.parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_PRESENCE_TESTS,
|
||||
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_binja_features(sample, scope, feature, expected):
|
||||
if isinstance(feature, capa.features.file.Export) and "." in str(feature.value):
|
||||
pytest.xfail("skip Binja unsupported forwarded export feature, see #1646")
|
||||
|
||||
if feature == capa.features.common.Characteristic("forwarded export"):
|
||||
pytest.xfail("skip Binja unsupported forwarded export feature, see #1646")
|
||||
|
||||
fixtures.do_test_feature_presence(fixtures.get_binja_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@@ -69,4 +63,4 @@ def test_standalone_binja_backend():
|
||||
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
|
||||
def test_binja_version():
|
||||
version = binaryninja.core_version_info()
|
||||
assert version.major == 3 and version.minor == 4
|
||||
assert version.major == 3 and version.minor == 5
|
||||
|
||||
Reference in New Issue
Block a user