diff --git a/capa/features/extractors/binja/find_binja_api.py b/capa/features/extractors/binja/find_binja_api.py index f9001c41..5433ff47 100644 --- a/capa/features/extractors/binja/find_binja_api.py +++ b/capa/features/extractors/binja/find_binja_api.py @@ -6,6 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import subprocess +from pathlib import Path # When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because # we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try @@ -25,9 +26,9 @@ if spec is not None: """ -def find_binja_path() -> str: +def find_binja_path() -> Path: raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip() - return bytes.fromhex(raw_output).decode("utf8") + return Path(bytes.fromhex(raw_output).decode("utf8")) if __name__ == "__main__": diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py index ad180257..3863d51a 100644 --- a/capa/features/extractors/dnfile/extractor.py +++ b/capa/features/extractors/dnfile/extractor.py @@ -9,6 +9,7 @@ from __future__ import annotations from typing import Dict, List, Tuple, Union, Iterator, Optional +from pathlib import Path import dnfile from dncil.cil.opcode import OpCodes @@ -68,9 +69,9 @@ class DnFileFeatureExtractorCache: class DnfileFeatureExtractor(FeatureExtractor): - def __init__(self, path: str): + def __init__(self, path: Path): super().__init__() - self.pe: dnfile.dnPE = dnfile.dnPE(path) + self.pe: dnfile.dnPE = dnfile.dnPE(str(path)) # pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction # most relevant at instruction scope diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py index ef6b3999..58ae1539 100644 --- a/capa/features/extractors/dnfile_.py +++ b/capa/features/extractors/dnfile_.py @@ -1,5 +1,6 @@ import logging from typing import Tuple, Iterator +from pathlib import Path import dnfile import pefile @@ -74,10 +75,10 @@ GLOBAL_HANDLERS = ( class DnfileFeatureExtractor(FeatureExtractor): - def __init__(self, path: str): + def __init__(self, path: Path): super().__init__() - self.path: str = path - self.pe: dnfile.dnPE = dnfile.dnPE(path) + self.path: Path = path + self.pe: dnfile.dnPE = dnfile.dnPE(str(path)) def get_base_address(self) -> AbsoluteVirtualAddress: return AbsoluteVirtualAddress(0x0) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 7a1abb57..a64e51f3 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -1,5 +1,6 @@ import logging from typing import Tuple, Iterator, cast +from pathlib import Path import dnfile import pefile @@ -158,10 +159,10 @@ GLOBAL_HANDLERS = ( class DotnetFileFeatureExtractor(FeatureExtractor): - def __init__(self, path: str): + def __init__(self, path: Path): super().__init__() - self.path: str = path - self.pe: dnfile.dnPE = dnfile.dnPE(path) + self.path: Path = path + self.pe: dnfile.dnPE = dnfile.dnPE(str(path)) def get_base_address(self): return NO_ADDRESS diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index d4f61a06..1ed1aa9e 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -8,6 +8,7 @@ import io import logging from typing import Tuple, Iterator +from pathlib import Path from elftools.elf.elffile import ELFFile, SymbolTableSection @@ -107,11 +108,10 @@ GLOBAL_HANDLERS = ( class ElfFeatureExtractor(FeatureExtractor): - def __init__(self, path: str): + def __init__(self, path: Path): super().__init__() - self.path = path - with open(self.path, "rb") as f: - self.elf = ELFFile(io.BytesIO(f.read())) + self.path: Path = path + self.elf = ELFFile(io.BytesIO(path.read_bytes())) def get_base_address(self): # virtual address of the first segment with type LOAD @@ -120,15 +120,13 @@ class ElfFeatureExtractor(FeatureExtractor): return AbsoluteVirtualAddress(segment.header.p_vaddr) def extract_global_features(self): - with open(self.path, "rb") as f: - buf = f.read() + buf = self.path.read_bytes() for feature, addr in extract_global_features(self.elf, buf): yield feature, addr def extract_file_features(self): - with open(self.path, "rb") as f: - buf = f.read() + buf = self.path.read_bytes() for feature, addr in extract_file_features(self.elf, buf): yield feature, addr diff --git a/capa/features/extractors/pefile.py b/capa/features/extractors/pefile.py index cf4f16c4..078e6ff0 100644 --- a/capa/features/extractors/pefile.py +++ b/capa/features/extractors/pefile.py @@ -7,6 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging +from pathlib import Path import pefile @@ -173,23 +174,21 @@ GLOBAL_HANDLERS = ( class PefileFeatureExtractor(FeatureExtractor): - def __init__(self, path: str): + def __init__(self, path: Path): super().__init__() - self.path = path - self.pe = pefile.PE(path) + self.path: Path = path + self.pe = pefile.PE(str(path)) def get_base_address(self): return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase) def extract_global_features(self): - with open(self.path, "rb") as f: - buf = f.read() + buf = Path(self.path).read_bytes() yield from extract_global_features(self.pe, buf) def extract_file_features(self): - with open(self.path, "rb") as f: - buf = f.read() + buf = Path(self.path).read_bytes() yield from extract_file_features(self.pe, buf) diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index 16b97ef3..588ff88c 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -7,6 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging from typing import Any, Dict, List, Tuple, Iterator +from pathlib import Path import viv_utils import viv_utils.flirt @@ -25,12 +26,11 @@ logger = logging.getLogger(__name__) class VivisectFeatureExtractor(FeatureExtractor): - def __init__(self, vw, path, os): + def __init__(self, vw, path: Path, os): super().__init__() self.vw = vw self.path = path - with open(self.path, "rb") as f: - self.buf = f.read() + self.buf = path.read_bytes() # pre-compute these because we'll yield them at *every* scope. self.global_features: List[Tuple[Feature, Address]] = [] diff --git a/capa/helpers.py b/capa/helpers.py index b6f947fc..c6b5e7ac 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -78,7 +78,7 @@ def get_format(sample: Path) -> str: for feature, _ in extract_format(buf): if feature == Format(FORMAT_PE): - dnfile_extractor = DnfileFeatureExtractor(str(sample)) + dnfile_extractor = DnfileFeatureExtractor(sample) if dnfile_extractor.is_dotnet_file(): feature = Format(FORMAT_DOTNET) diff --git a/capa/main.py b/capa/main.py index 6024c169..f9a5805c 100644 --- a/capa/main.py +++ b/capa/main.py @@ -533,7 +533,7 @@ def get_extractor( if format_ == FORMAT_DOTNET: import capa.features.extractors.dnfile.extractor - return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path)) + return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) elif backend == BACKEND_BINJA: from capa.features.extractors.binja.find_binja_api import find_binja_path @@ -542,8 +542,8 @@ def get_extractor( # We need to fist find the binja API installation path and add it into sys.path if is_running_standalone(): bn_api = find_binja_path() - if Path(bn_api).exists(): - sys.path.append(bn_api) + if bn_api.exists(): + sys.path.append(str(bn_api)) try: from binaryninja import BinaryView, BinaryViewType @@ -586,14 +586,14 @@ def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]: file_extractors: List[FeatureExtractor] = list() if format_ == FORMAT_PE: - file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample))) + file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample)) elif format_ == FORMAT_DOTNET: - file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample))) - file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(str(sample))) + file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample)) + file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)) elif format_ == capa.features.extractors.common.FORMAT_ELF: - file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(str(sample))) + file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample)) return file_extractors diff --git a/tests/fixtures.py b/tests/fixtures.py index 4e40b0c7..9950fa66 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -129,7 +129,7 @@ def fixup_viv(path: Path, extractor): def get_pefile_extractor(path: Path): import capa.features.extractors.pefile - extractor = capa.features.extractors.pefile.PefileFeatureExtractor(str(path)) + extractor = capa.features.extractors.pefile.PefileFeatureExtractor(path) # overload the extractor so that the fixture exposes `extractor.path` setattr(extractor, "path", path.as_posix()) @@ -140,7 +140,7 @@ def get_pefile_extractor(path: Path): def get_dotnetfile_extractor(path: Path): import capa.features.extractors.dotnetfile - extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(str(path)) + extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path) # overload the extractor so that the fixture exposes `extractor.path` setattr(extractor, "path", path.as_posix()) @@ -152,7 +152,7 @@ def get_dotnetfile_extractor(path: Path): def get_dnfile_extractor(path: Path): import capa.features.extractors.dnfile.extractor - extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path)) + extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path) # overload the extractor so that the fixture exposes `extractor.path` setattr(extractor, "path", path.as_posix()) @@ -232,7 +232,7 @@ def extract_instruction_features(extractor, fh, bbh, ih) -> Dict[Feature, Set[Ad # note: to reduce the testing time it's recommended to reuse already existing test samples, if possible -def get_data_path_by_name(name): +def get_data_path_by_name(name) -> Path: if name == "mimikatz": return CD / "data" / "mimikatz.exe_" elif name == "kernel32": @@ -1048,7 +1048,6 @@ FEATURE_COUNT_TESTS_DOTNET = [ def do_test_feature_presence(get_extractor, sample, scope, feature, expected): - print(sample) extractor = get_extractor(sample) features = scope(extractor) if expected: diff --git a/tests/test_pefile_features.py b/tests/test_pefile_features.py index 62c14f96..79a68088 100644 --- a/tests/test_pefile_features.py +++ b/tests/test_pefile_features.py @@ -5,6 +5,8 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. +from pathlib import PosixPath + import pytest import fixtures from fixtures import * @@ -27,5 +29,4 @@ def test_pefile_features(sample, scope, feature, expected): if ".elf" in sample.name: pytest.xfail("pefile doesn't handle ELF files") - fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)