Merge branch 'mandiant:master' into Aayush-Goel-04/Issue#1411

This commit is contained in:
Aayush Goel
2023-06-06 13:04:19 +05:30
committed by GitHub
8 changed files with 137 additions and 5 deletions

View File

@@ -1,6 +1,7 @@
# Change Log
## master (unreleased)
- extract function and API names from ELF symtab entries @yelhamer https://github.com/mandiant/capa-rules/issues/736
### New Features
- Utility script to detect feature overlap between new and existing CAPA rules [#1451](https://github.com/mandiant/capa/issues/1451) [@Aayush-Goel-04](https://github.com/aayush-goel-04)

View File

@@ -91,6 +91,20 @@ class Shdr:
entsize: int
buf: bytes
@classmethod
def from_viv(cls, section, buf: bytes) -> "Shdr":
return cls(
section.sh_name,
section.sh_type,
section.sh_flags,
section.sh_addr,
section.sh_offset,
section.sh_size,
section.sh_link,
section.sh_entsize,
buf,
)
class ELF:
def __init__(self, f: BinaryIO):
@@ -695,6 +709,29 @@ class SymTab:
for symbol in self.symbols:
yield symbol
@classmethod
def from_Elf(cls, ElfBinary) -> Optional["SymTab"]:
endian = "<" if ElfBinary.getEndian() == 0 else ">"
bitness = ElfBinary.bits
SHT_SYMTAB = 0x2
for section in ElfBinary.sections:
if section.sh_info & SHT_SYMTAB:
strtab_section = ElfBinary.sections[section.sh_link]
sh_symtab = Shdr.from_viv(section, ElfBinary.readAtOffset(section.sh_offset, section.sh_size))
sh_strtab = Shdr.from_viv(
strtab_section, ElfBinary.readAtOffset(strtab_section.sh_offset, strtab_section.sh_size)
)
try:
return cls(endian, bitness, sh_symtab, sh_strtab)
except NameError:
return None
except:
# all exceptions that could be encountered by
# cls._parse() imply a faulty symbol's table.
raise CorruptElfFile("malformed symbol's table")
def guess_os_from_osabi(elf: ELF) -> Optional[OS]:
return elf.ei_osabi

View File

@@ -6,7 +6,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import List, Tuple, Iterator
from typing import Any, Dict, List, Tuple, Iterator
import viv_utils
import viv_utils.flirt
@@ -49,8 +49,11 @@ class VivisectFeatureExtractor(FeatureExtractor):
yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf)
def get_functions(self) -> Iterator[FunctionHandle]:
cache: Dict[str, Any] = {}
for va in sorted(self.vw.getFunctions()):
yield FunctionHandle(address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va))
yield FunctionHandle(
address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache}
)
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
yield from capa.features.extractors.viv.function.extract_features(fh)

View File

@@ -11,9 +11,11 @@ import envi
import viv_utils
import vivisect.const
from capa.features.file import FunctionName
from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops
from capa.features.extractors.elf import SymTab
from capa.features.extractors.base_extractor import FunctionHandle
@@ -30,6 +32,28 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature
raise NotImplementedError
def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
if fh.inner.vw.metadata["Format"] == "elf":
# the file's symbol table gets added to the metadata of the vivisect workspace.
# this is in order to eliminate the computational overhead of refetching symtab each time.
if "symtab" not in fh.ctx["cache"]:
try:
fh.ctx["cache"]["symtab"] = SymTab.from_Elf(fh.inner.vw.parsedbin)
except:
fh.ctx["cache"]["symtab"] = None
symtab = fh.ctx["cache"]["symtab"]
if symtab:
for symbol in symtab.get_symbols():
sym_name = symtab.get_name(symbol)
sym_value = symbol.value
sym_info = symbol.info
STT_FUNC = 0x2
if sym_value == fh.address and sym_info & STT_FUNC != 0:
yield FunctionName(sym_name), fh.address
def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
f: viv_utils.Function = fhandle.inner
for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE):
@@ -79,4 +103,8 @@ def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
yield feature, addr
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop)
FUNCTION_HANDLERS = (
extract_function_symtab_names,
extract_function_calls_to,
extract_function_loop,
)

View File

@@ -19,9 +19,11 @@ import envi.archs.amd64.disasm
import capa.features.extractors.helpers
import capa.features.extractors.viv.helpers
from capa.features.file import FunctionName
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.elf import Shdr, SymTab
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
@@ -109,6 +111,26 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato
if not target:
return
if f.vw.metadata["Format"] == "elf":
if "symtab" not in fh.ctx["cache"]:
# the symbol table gets stored as a function's attribute in order to avoid running
# this code everytime the call is made, thus preventing the computational overhead.
try:
fh.ctx["cache"]["symtab"] = SymTab.from_Elf(f.vw.parsedbin)
except:
fh.ctx["cache"]["symtab"] = None
symtab = fh.ctx["cache"]["symtab"]
if symtab:
for symbol in symtab.get_symbols():
sym_name = symtab.get_name(symbol)
sym_value = symbol.value
sym_info = symbol.info
STT_FUNC = 0x2
if sym_value == target and sym_info & STT_FUNC != 0:
yield API(sym_name), ih.address
if viv_utils.flirt.is_library_function(f.vw, target):
name = viv_utils.get_function_name(f.vw, target)
yield API(name), ih.address

View File

@@ -761,6 +761,47 @@ FEATURE_PRESENCE_TESTS = sorted(
key=lambda t: (t[0], t[1]),
)
# this list should be merged into the one above (FEATURE_PRESENSE_TESTS)
# once the debug symbol functionality has been added to all backends
FEATURE_SYMTAB_FUNC_TESTS = [
(
"2bf18d",
"function=0x4027b3,bb=0x402861,insn=0x40286d",
capa.features.insn.API("__GI_connect"),
True,
),
(
"2bf18d",
"function=0x4027b3,bb=0x402861,insn=0x40286d",
capa.features.insn.API("connect"),
True,
),
(
"2bf18d",
"function=0x4027b3,bb=0x402861,insn=0x40286d",
capa.features.insn.API("__libc_connect"),
True,
),
(
"2bf18d",
"function=0x4088a4",
capa.features.file.FunctionName("__GI_connect"),
True,
),
(
"2bf18d",
"function=0x4088a4",
capa.features.file.FunctionName("connect"),
True,
),
(
"2bf18d",
"function=0x4088a4",
capa.features.file.FunctionName("__libc_connect"),
True,
),
]
FEATURE_PRESENCE_TESTS_DOTNET = sorted(
[
("b9f5b", "file", Arch(ARCH_I386), True),

View File

@@ -11,7 +11,7 @@ from fixtures import *
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS,
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
)
def test_viv_features(sample, scope, feature, expected):