mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 15:16:22 -08:00
extractors accept Path instance
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
|
||||
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
|
||||
@@ -25,9 +26,9 @@ if spec is not None:
|
||||
"""
|
||||
|
||||
|
||||
def find_binja_path() -> str:
|
||||
def find_binja_path() -> Path:
|
||||
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
|
||||
return bytes.fromhex(raw_output).decode("utf8")
|
||||
return Path(bytes.fromhex(raw_output).decode("utf8"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import dnfile
|
||||
from dncil.cil.opcode import OpCodes
|
||||
@@ -68,9 +69,9 @@ class DnFileFeatureExtractorCache:
|
||||
|
||||
|
||||
class DnfileFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
|
||||
# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
|
||||
# most relevant at instruction scope
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import dnfile
|
||||
import pefile
|
||||
@@ -74,10 +75,10 @@ GLOBAL_HANDLERS = (
|
||||
|
||||
|
||||
class DnfileFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
self.path: str = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
self.path: Path = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
|
||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||
return AbsoluteVirtualAddress(0x0)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator, cast
|
||||
from pathlib import Path
|
||||
|
||||
import dnfile
|
||||
import pefile
|
||||
@@ -158,10 +159,10 @@ GLOBAL_HANDLERS = (
|
||||
|
||||
|
||||
class DotnetFileFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
self.path: str = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
self.path: Path = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
|
||||
def get_base_address(self):
|
||||
return NO_ADDRESS
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
import io
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
from elftools.elf.elffile import ELFFile, SymbolTableSection
|
||||
|
||||
@@ -107,11 +108,10 @@ GLOBAL_HANDLERS = (
|
||||
|
||||
|
||||
class ElfFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
self.path = path
|
||||
with open(self.path, "rb") as f:
|
||||
self.elf = ELFFile(io.BytesIO(f.read()))
|
||||
self.path: Path = path
|
||||
self.elf = ELFFile(io.BytesIO(path.read_bytes()))
|
||||
|
||||
def get_base_address(self):
|
||||
# virtual address of the first segment with type LOAD
|
||||
@@ -120,15 +120,13 @@ class ElfFeatureExtractor(FeatureExtractor):
|
||||
return AbsoluteVirtualAddress(segment.header.p_vaddr)
|
||||
|
||||
def extract_global_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
buf = self.path.read_bytes()
|
||||
|
||||
for feature, addr in extract_global_features(self.elf, buf):
|
||||
yield feature, addr
|
||||
|
||||
def extract_file_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
buf = self.path.read_bytes()
|
||||
|
||||
for feature, addr in extract_file_features(self.elf, buf):
|
||||
yield feature, addr
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import pefile
|
||||
|
||||
@@ -173,23 +174,21 @@ GLOBAL_HANDLERS = (
|
||||
|
||||
|
||||
class PefileFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
self.path = path
|
||||
self.pe = pefile.PE(path)
|
||||
self.path: Path = path
|
||||
self.pe = pefile.PE(str(path))
|
||||
|
||||
def get_base_address(self):
|
||||
return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase)
|
||||
|
||||
def extract_global_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
buf = Path(self.path).read_bytes()
|
||||
|
||||
yield from extract_global_features(self.pe, buf)
|
||||
|
||||
def extract_file_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
buf = Path(self.path).read_bytes()
|
||||
|
||||
yield from extract_file_features(self.pe, buf)
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import logging
|
||||
from typing import Any, Dict, List, Tuple, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import viv_utils
|
||||
import viv_utils.flirt
|
||||
@@ -25,12 +26,11 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VivisectFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, vw, path, os):
|
||||
def __init__(self, vw, path: Path, os):
|
||||
super().__init__()
|
||||
self.vw = vw
|
||||
self.path = path
|
||||
with open(self.path, "rb") as f:
|
||||
self.buf = f.read()
|
||||
self.buf = path.read_bytes()
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
|
||||
@@ -78,7 +78,7 @@ def get_format(sample: Path) -> str:
|
||||
|
||||
for feature, _ in extract_format(buf):
|
||||
if feature == Format(FORMAT_PE):
|
||||
dnfile_extractor = DnfileFeatureExtractor(str(sample))
|
||||
dnfile_extractor = DnfileFeatureExtractor(sample)
|
||||
if dnfile_extractor.is_dotnet_file():
|
||||
feature = Format(FORMAT_DOTNET)
|
||||
|
||||
|
||||
14
capa/main.py
14
capa/main.py
@@ -533,7 +533,7 @@ def get_extractor(
|
||||
if format_ == FORMAT_DOTNET:
|
||||
import capa.features.extractors.dnfile.extractor
|
||||
|
||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path))
|
||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
|
||||
elif backend == BACKEND_BINJA:
|
||||
from capa.features.extractors.binja.find_binja_api import find_binja_path
|
||||
@@ -542,8 +542,8 @@ def get_extractor(
|
||||
# We need to fist find the binja API installation path and add it into sys.path
|
||||
if is_running_standalone():
|
||||
bn_api = find_binja_path()
|
||||
if Path(bn_api).exists():
|
||||
sys.path.append(bn_api)
|
||||
if bn_api.exists():
|
||||
sys.path.append(str(bn_api))
|
||||
|
||||
try:
|
||||
from binaryninja import BinaryView, BinaryViewType
|
||||
@@ -586,14 +586,14 @@ def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]:
|
||||
file_extractors: List[FeatureExtractor] = list()
|
||||
|
||||
if format_ == FORMAT_PE:
|
||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample)))
|
||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
|
||||
|
||||
elif format_ == FORMAT_DOTNET:
|
||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample)))
|
||||
file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(str(sample)))
|
||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
|
||||
file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample))
|
||||
|
||||
elif format_ == capa.features.extractors.common.FORMAT_ELF:
|
||||
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(str(sample)))
|
||||
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
|
||||
|
||||
return file_extractors
|
||||
|
||||
|
||||
@@ -129,7 +129,7 @@ def fixup_viv(path: Path, extractor):
|
||||
def get_pefile_extractor(path: Path):
|
||||
import capa.features.extractors.pefile
|
||||
|
||||
extractor = capa.features.extractors.pefile.PefileFeatureExtractor(str(path))
|
||||
extractor = capa.features.extractors.pefile.PefileFeatureExtractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path.as_posix())
|
||||
@@ -140,7 +140,7 @@ def get_pefile_extractor(path: Path):
|
||||
def get_dotnetfile_extractor(path: Path):
|
||||
import capa.features.extractors.dotnetfile
|
||||
|
||||
extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(str(path))
|
||||
extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path.as_posix())
|
||||
@@ -152,7 +152,7 @@ def get_dotnetfile_extractor(path: Path):
|
||||
def get_dnfile_extractor(path: Path):
|
||||
import capa.features.extractors.dnfile.extractor
|
||||
|
||||
extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path))
|
||||
extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
|
||||
# overload the extractor so that the fixture exposes `extractor.path`
|
||||
setattr(extractor, "path", path.as_posix())
|
||||
@@ -232,7 +232,7 @@ def extract_instruction_features(extractor, fh, bbh, ih) -> Dict[Feature, Set[Ad
|
||||
|
||||
|
||||
# note: to reduce the testing time it's recommended to reuse already existing test samples, if possible
|
||||
def get_data_path_by_name(name):
|
||||
def get_data_path_by_name(name) -> Path:
|
||||
if name == "mimikatz":
|
||||
return CD / "data" / "mimikatz.exe_"
|
||||
elif name == "kernel32":
|
||||
@@ -1048,7 +1048,6 @@ FEATURE_COUNT_TESTS_DOTNET = [
|
||||
|
||||
|
||||
def do_test_feature_presence(get_extractor, sample, scope, feature, expected):
|
||||
print(sample)
|
||||
extractor = get_extractor(sample)
|
||||
features = scope(extractor)
|
||||
if expected:
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from pathlib import PosixPath
|
||||
|
||||
import pytest
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
@@ -27,5 +29,4 @@ def test_pefile_features(sample, scope, feature, expected):
|
||||
|
||||
if ".elf" in sample.name:
|
||||
pytest.xfail("pefile doesn't handle ELF files")
|
||||
|
||||
fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)
|
||||
|
||||
Reference in New Issue
Block a user