extractors accept Path instance

This commit is contained in:
Aayush Goel
2023-07-11 00:41:36 +05:30
parent a949698b86
commit d1a1c6875b
11 changed files with 43 additions and 42 deletions

View File

@@ -6,6 +6,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
import subprocess import subprocess
from pathlib import Path
# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because # When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try # we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
@@ -25,9 +26,9 @@ if spec is not None:
""" """
def find_binja_path() -> str: def find_binja_path() -> Path:
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip() raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
return bytes.fromhex(raw_output).decode("utf8") return Path(bytes.fromhex(raw_output).decode("utf8"))
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -9,6 +9,7 @@
from __future__ import annotations from __future__ import annotations
from typing import Dict, List, Tuple, Union, Iterator, Optional from typing import Dict, List, Tuple, Union, Iterator, Optional
from pathlib import Path
import dnfile import dnfile
from dncil.cil.opcode import OpCodes from dncil.cil.opcode import OpCodes
@@ -68,9 +69,9 @@ class DnFileFeatureExtractorCache:
class DnfileFeatureExtractor(FeatureExtractor): class DnfileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str): def __init__(self, path: Path):
super().__init__() super().__init__()
self.pe: dnfile.dnPE = dnfile.dnPE(path) self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction # pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
# most relevant at instruction scope # most relevant at instruction scope

View File

@@ -1,5 +1,6 @@
import logging import logging
from typing import Tuple, Iterator from typing import Tuple, Iterator
from pathlib import Path
import dnfile import dnfile
import pefile import pefile
@@ -74,10 +75,10 @@ GLOBAL_HANDLERS = (
class DnfileFeatureExtractor(FeatureExtractor): class DnfileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str): def __init__(self, path: Path):
super().__init__() super().__init__()
self.path: str = path self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(path) self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
def get_base_address(self) -> AbsoluteVirtualAddress: def get_base_address(self) -> AbsoluteVirtualAddress:
return AbsoluteVirtualAddress(0x0) return AbsoluteVirtualAddress(0x0)

View File

@@ -1,5 +1,6 @@
import logging import logging
from typing import Tuple, Iterator, cast from typing import Tuple, Iterator, cast
from pathlib import Path
import dnfile import dnfile
import pefile import pefile
@@ -158,10 +159,10 @@ GLOBAL_HANDLERS = (
class DotnetFileFeatureExtractor(FeatureExtractor): class DotnetFileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str): def __init__(self, path: Path):
super().__init__() super().__init__()
self.path: str = path self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(path) self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
def get_base_address(self): def get_base_address(self):
return NO_ADDRESS return NO_ADDRESS

View File

@@ -8,6 +8,7 @@
import io import io
import logging import logging
from typing import Tuple, Iterator from typing import Tuple, Iterator
from pathlib import Path
from elftools.elf.elffile import ELFFile, SymbolTableSection from elftools.elf.elffile import ELFFile, SymbolTableSection
@@ -107,11 +108,10 @@ GLOBAL_HANDLERS = (
class ElfFeatureExtractor(FeatureExtractor): class ElfFeatureExtractor(FeatureExtractor):
def __init__(self, path: str): def __init__(self, path: Path):
super().__init__() super().__init__()
self.path = path self.path: Path = path
with open(self.path, "rb") as f: self.elf = ELFFile(io.BytesIO(path.read_bytes()))
self.elf = ELFFile(io.BytesIO(f.read()))
def get_base_address(self): def get_base_address(self):
# virtual address of the first segment with type LOAD # virtual address of the first segment with type LOAD
@@ -120,15 +120,13 @@ class ElfFeatureExtractor(FeatureExtractor):
return AbsoluteVirtualAddress(segment.header.p_vaddr) return AbsoluteVirtualAddress(segment.header.p_vaddr)
def extract_global_features(self): def extract_global_features(self):
with open(self.path, "rb") as f: buf = self.path.read_bytes()
buf = f.read()
for feature, addr in extract_global_features(self.elf, buf): for feature, addr in extract_global_features(self.elf, buf):
yield feature, addr yield feature, addr
def extract_file_features(self): def extract_file_features(self):
with open(self.path, "rb") as f: buf = self.path.read_bytes()
buf = f.read()
for feature, addr in extract_file_features(self.elf, buf): for feature, addr in extract_file_features(self.elf, buf):
yield feature, addr yield feature, addr

View File

@@ -7,6 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
import logging import logging
from pathlib import Path
import pefile import pefile
@@ -173,23 +174,21 @@ GLOBAL_HANDLERS = (
class PefileFeatureExtractor(FeatureExtractor): class PefileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str): def __init__(self, path: Path):
super().__init__() super().__init__()
self.path = path self.path: Path = path
self.pe = pefile.PE(path) self.pe = pefile.PE(str(path))
def get_base_address(self): def get_base_address(self):
return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase) return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase)
def extract_global_features(self): def extract_global_features(self):
with open(self.path, "rb") as f: buf = Path(self.path).read_bytes()
buf = f.read()
yield from extract_global_features(self.pe, buf) yield from extract_global_features(self.pe, buf)
def extract_file_features(self): def extract_file_features(self):
with open(self.path, "rb") as f: buf = Path(self.path).read_bytes()
buf = f.read()
yield from extract_file_features(self.pe, buf) yield from extract_file_features(self.pe, buf)

View File

@@ -7,6 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
import logging import logging
from typing import Any, Dict, List, Tuple, Iterator from typing import Any, Dict, List, Tuple, Iterator
from pathlib import Path
import viv_utils import viv_utils
import viv_utils.flirt import viv_utils.flirt
@@ -25,12 +26,11 @@ logger = logging.getLogger(__name__)
class VivisectFeatureExtractor(FeatureExtractor): class VivisectFeatureExtractor(FeatureExtractor):
def __init__(self, vw, path, os): def __init__(self, vw, path: Path, os):
super().__init__() super().__init__()
self.vw = vw self.vw = vw
self.path = path self.path = path
with open(self.path, "rb") as f: self.buf = path.read_bytes()
self.buf = f.read()
# pre-compute these because we'll yield them at *every* scope. # pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = [] self.global_features: List[Tuple[Feature, Address]] = []

View File

@@ -78,7 +78,7 @@ def get_format(sample: Path) -> str:
for feature, _ in extract_format(buf): for feature, _ in extract_format(buf):
if feature == Format(FORMAT_PE): if feature == Format(FORMAT_PE):
dnfile_extractor = DnfileFeatureExtractor(str(sample)) dnfile_extractor = DnfileFeatureExtractor(sample)
if dnfile_extractor.is_dotnet_file(): if dnfile_extractor.is_dotnet_file():
feature = Format(FORMAT_DOTNET) feature = Format(FORMAT_DOTNET)

View File

@@ -533,7 +533,7 @@ def get_extractor(
if format_ == FORMAT_DOTNET: if format_ == FORMAT_DOTNET:
import capa.features.extractors.dnfile.extractor import capa.features.extractors.dnfile.extractor
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path)) return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
elif backend == BACKEND_BINJA: elif backend == BACKEND_BINJA:
from capa.features.extractors.binja.find_binja_api import find_binja_path from capa.features.extractors.binja.find_binja_api import find_binja_path
@@ -542,8 +542,8 @@ def get_extractor(
# We need to fist find the binja API installation path and add it into sys.path # We need to fist find the binja API installation path and add it into sys.path
if is_running_standalone(): if is_running_standalone():
bn_api = find_binja_path() bn_api = find_binja_path()
if Path(bn_api).exists(): if bn_api.exists():
sys.path.append(bn_api) sys.path.append(str(bn_api))
try: try:
from binaryninja import BinaryView, BinaryViewType from binaryninja import BinaryView, BinaryViewType
@@ -586,14 +586,14 @@ def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]:
file_extractors: List[FeatureExtractor] = list() file_extractors: List[FeatureExtractor] = list()
if format_ == FORMAT_PE: if format_ == FORMAT_PE:
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample))) file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
elif format_ == FORMAT_DOTNET: elif format_ == FORMAT_DOTNET:
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample))) file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(str(sample))) file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample))
elif format_ == capa.features.extractors.common.FORMAT_ELF: elif format_ == capa.features.extractors.common.FORMAT_ELF:
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(str(sample))) file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
return file_extractors return file_extractors

View File

@@ -129,7 +129,7 @@ def fixup_viv(path: Path, extractor):
def get_pefile_extractor(path: Path): def get_pefile_extractor(path: Path):
import capa.features.extractors.pefile import capa.features.extractors.pefile
extractor = capa.features.extractors.pefile.PefileFeatureExtractor(str(path)) extractor = capa.features.extractors.pefile.PefileFeatureExtractor(path)
# overload the extractor so that the fixture exposes `extractor.path` # overload the extractor so that the fixture exposes `extractor.path`
setattr(extractor, "path", path.as_posix()) setattr(extractor, "path", path.as_posix())
@@ -140,7 +140,7 @@ def get_pefile_extractor(path: Path):
def get_dotnetfile_extractor(path: Path): def get_dotnetfile_extractor(path: Path):
import capa.features.extractors.dotnetfile import capa.features.extractors.dotnetfile
extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(str(path)) extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path)
# overload the extractor so that the fixture exposes `extractor.path` # overload the extractor so that the fixture exposes `extractor.path`
setattr(extractor, "path", path.as_posix()) setattr(extractor, "path", path.as_posix())
@@ -152,7 +152,7 @@ def get_dotnetfile_extractor(path: Path):
def get_dnfile_extractor(path: Path): def get_dnfile_extractor(path: Path):
import capa.features.extractors.dnfile.extractor import capa.features.extractors.dnfile.extractor
extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path)) extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
# overload the extractor so that the fixture exposes `extractor.path` # overload the extractor so that the fixture exposes `extractor.path`
setattr(extractor, "path", path.as_posix()) setattr(extractor, "path", path.as_posix())
@@ -232,7 +232,7 @@ def extract_instruction_features(extractor, fh, bbh, ih) -> Dict[Feature, Set[Ad
# note: to reduce the testing time it's recommended to reuse already existing test samples, if possible # note: to reduce the testing time it's recommended to reuse already existing test samples, if possible
def get_data_path_by_name(name): def get_data_path_by_name(name) -> Path:
if name == "mimikatz": if name == "mimikatz":
return CD / "data" / "mimikatz.exe_" return CD / "data" / "mimikatz.exe_"
elif name == "kernel32": elif name == "kernel32":
@@ -1048,7 +1048,6 @@ FEATURE_COUNT_TESTS_DOTNET = [
def do_test_feature_presence(get_extractor, sample, scope, feature, expected): def do_test_feature_presence(get_extractor, sample, scope, feature, expected):
print(sample)
extractor = get_extractor(sample) extractor = get_extractor(sample)
features = scope(extractor) features = scope(extractor)
if expected: if expected:

View File

@@ -5,6 +5,8 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License # Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
from pathlib import PosixPath
import pytest import pytest
import fixtures import fixtures
from fixtures import * from fixtures import *
@@ -27,5 +29,4 @@ def test_pefile_features(sample, scope, feature, expected):
if ".elf" in sample.name: if ".elf" in sample.name:
pytest.xfail("pefile doesn't handle ELF files") pytest.xfail("pefile doesn't handle ELF files")
fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected) fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)