mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 15:16:22 -08:00
extractors accept Path instance
This commit is contained in:
@@ -6,6 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
|
# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
|
||||||
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
|
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
|
||||||
@@ -25,9 +26,9 @@ if spec is not None:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def find_binja_path() -> str:
|
def find_binja_path() -> Path:
|
||||||
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
|
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
|
||||||
return bytes.fromhex(raw_output).decode("utf8")
|
return Path(bytes.fromhex(raw_output).decode("utf8"))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
from dncil.cil.opcode import OpCodes
|
from dncil.cil.opcode import OpCodes
|
||||||
@@ -68,9 +69,9 @@ class DnFileFeatureExtractorCache:
|
|||||||
|
|
||||||
|
|
||||||
class DnfileFeatureExtractor(FeatureExtractor):
|
class DnfileFeatureExtractor(FeatureExtractor):
|
||||||
def __init__(self, path: str):
|
def __init__(self, path: Path):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||||
|
|
||||||
# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
|
# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
|
||||||
# most relevant at instruction scope
|
# most relevant at instruction scope
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Tuple, Iterator
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
import pefile
|
import pefile
|
||||||
@@ -74,10 +75,10 @@ GLOBAL_HANDLERS = (
|
|||||||
|
|
||||||
|
|
||||||
class DnfileFeatureExtractor(FeatureExtractor):
|
class DnfileFeatureExtractor(FeatureExtractor):
|
||||||
def __init__(self, path: str):
|
def __init__(self, path: Path):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.path: str = path
|
self.path: Path = path
|
||||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||||
|
|
||||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||||
return AbsoluteVirtualAddress(0x0)
|
return AbsoluteVirtualAddress(0x0)
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator, cast
|
from typing import Tuple, Iterator, cast
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
import pefile
|
import pefile
|
||||||
@@ -158,10 +159,10 @@ GLOBAL_HANDLERS = (
|
|||||||
|
|
||||||
|
|
||||||
class DotnetFileFeatureExtractor(FeatureExtractor):
|
class DotnetFileFeatureExtractor(FeatureExtractor):
|
||||||
def __init__(self, path: str):
|
def __init__(self, path: Path):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.path: str = path
|
self.path: Path = path
|
||||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||||
|
|
||||||
def get_base_address(self):
|
def get_base_address(self):
|
||||||
return NO_ADDRESS
|
return NO_ADDRESS
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Tuple, Iterator
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from elftools.elf.elffile import ELFFile, SymbolTableSection
|
from elftools.elf.elffile import ELFFile, SymbolTableSection
|
||||||
|
|
||||||
@@ -107,11 +108,10 @@ GLOBAL_HANDLERS = (
|
|||||||
|
|
||||||
|
|
||||||
class ElfFeatureExtractor(FeatureExtractor):
|
class ElfFeatureExtractor(FeatureExtractor):
|
||||||
def __init__(self, path: str):
|
def __init__(self, path: Path):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.path = path
|
self.path: Path = path
|
||||||
with open(self.path, "rb") as f:
|
self.elf = ELFFile(io.BytesIO(path.read_bytes()))
|
||||||
self.elf = ELFFile(io.BytesIO(f.read()))
|
|
||||||
|
|
||||||
def get_base_address(self):
|
def get_base_address(self):
|
||||||
# virtual address of the first segment with type LOAD
|
# virtual address of the first segment with type LOAD
|
||||||
@@ -120,15 +120,13 @@ class ElfFeatureExtractor(FeatureExtractor):
|
|||||||
return AbsoluteVirtualAddress(segment.header.p_vaddr)
|
return AbsoluteVirtualAddress(segment.header.p_vaddr)
|
||||||
|
|
||||||
def extract_global_features(self):
|
def extract_global_features(self):
|
||||||
with open(self.path, "rb") as f:
|
buf = self.path.read_bytes()
|
||||||
buf = f.read()
|
|
||||||
|
|
||||||
for feature, addr in extract_global_features(self.elf, buf):
|
for feature, addr in extract_global_features(self.elf, buf):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
def extract_file_features(self):
|
def extract_file_features(self):
|
||||||
with open(self.path, "rb") as f:
|
buf = self.path.read_bytes()
|
||||||
buf = f.read()
|
|
||||||
|
|
||||||
for feature, addr in extract_file_features(self.elf, buf):
|
for feature, addr in extract_file_features(self.elf, buf):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import pefile
|
import pefile
|
||||||
|
|
||||||
@@ -173,23 +174,21 @@ GLOBAL_HANDLERS = (
|
|||||||
|
|
||||||
|
|
||||||
class PefileFeatureExtractor(FeatureExtractor):
|
class PefileFeatureExtractor(FeatureExtractor):
|
||||||
def __init__(self, path: str):
|
def __init__(self, path: Path):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.path = path
|
self.path: Path = path
|
||||||
self.pe = pefile.PE(path)
|
self.pe = pefile.PE(str(path))
|
||||||
|
|
||||||
def get_base_address(self):
|
def get_base_address(self):
|
||||||
return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase)
|
return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase)
|
||||||
|
|
||||||
def extract_global_features(self):
|
def extract_global_features(self):
|
||||||
with open(self.path, "rb") as f:
|
buf = Path(self.path).read_bytes()
|
||||||
buf = f.read()
|
|
||||||
|
|
||||||
yield from extract_global_features(self.pe, buf)
|
yield from extract_global_features(self.pe, buf)
|
||||||
|
|
||||||
def extract_file_features(self):
|
def extract_file_features(self):
|
||||||
with open(self.path, "rb") as f:
|
buf = Path(self.path).read_bytes()
|
||||||
buf = f.read()
|
|
||||||
|
|
||||||
yield from extract_file_features(self.pe, buf)
|
yield from extract_file_features(self.pe, buf)
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List, Tuple, Iterator
|
from typing import Any, Dict, List, Tuple, Iterator
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import viv_utils
|
import viv_utils
|
||||||
import viv_utils.flirt
|
import viv_utils.flirt
|
||||||
@@ -25,12 +26,11 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class VivisectFeatureExtractor(FeatureExtractor):
|
class VivisectFeatureExtractor(FeatureExtractor):
|
||||||
def __init__(self, vw, path, os):
|
def __init__(self, vw, path: Path, os):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.vw = vw
|
self.vw = vw
|
||||||
self.path = path
|
self.path = path
|
||||||
with open(self.path, "rb") as f:
|
self.buf = path.read_bytes()
|
||||||
self.buf = f.read()
|
|
||||||
|
|
||||||
# pre-compute these because we'll yield them at *every* scope.
|
# pre-compute these because we'll yield them at *every* scope.
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: List[Tuple[Feature, Address]] = []
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ def get_format(sample: Path) -> str:
|
|||||||
|
|
||||||
for feature, _ in extract_format(buf):
|
for feature, _ in extract_format(buf):
|
||||||
if feature == Format(FORMAT_PE):
|
if feature == Format(FORMAT_PE):
|
||||||
dnfile_extractor = DnfileFeatureExtractor(str(sample))
|
dnfile_extractor = DnfileFeatureExtractor(sample)
|
||||||
if dnfile_extractor.is_dotnet_file():
|
if dnfile_extractor.is_dotnet_file():
|
||||||
feature = Format(FORMAT_DOTNET)
|
feature = Format(FORMAT_DOTNET)
|
||||||
|
|
||||||
|
|||||||
14
capa/main.py
14
capa/main.py
@@ -533,7 +533,7 @@ def get_extractor(
|
|||||||
if format_ == FORMAT_DOTNET:
|
if format_ == FORMAT_DOTNET:
|
||||||
import capa.features.extractors.dnfile.extractor
|
import capa.features.extractors.dnfile.extractor
|
||||||
|
|
||||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path))
|
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||||
|
|
||||||
elif backend == BACKEND_BINJA:
|
elif backend == BACKEND_BINJA:
|
||||||
from capa.features.extractors.binja.find_binja_api import find_binja_path
|
from capa.features.extractors.binja.find_binja_api import find_binja_path
|
||||||
@@ -542,8 +542,8 @@ def get_extractor(
|
|||||||
# We need to fist find the binja API installation path and add it into sys.path
|
# We need to fist find the binja API installation path and add it into sys.path
|
||||||
if is_running_standalone():
|
if is_running_standalone():
|
||||||
bn_api = find_binja_path()
|
bn_api = find_binja_path()
|
||||||
if Path(bn_api).exists():
|
if bn_api.exists():
|
||||||
sys.path.append(bn_api)
|
sys.path.append(str(bn_api))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from binaryninja import BinaryView, BinaryViewType
|
from binaryninja import BinaryView, BinaryViewType
|
||||||
@@ -586,14 +586,14 @@ def get_file_extractors(sample: Path, format_: str) -> List[FeatureExtractor]:
|
|||||||
file_extractors: List[FeatureExtractor] = list()
|
file_extractors: List[FeatureExtractor] = list()
|
||||||
|
|
||||||
if format_ == FORMAT_PE:
|
if format_ == FORMAT_PE:
|
||||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample)))
|
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
|
||||||
|
|
||||||
elif format_ == FORMAT_DOTNET:
|
elif format_ == FORMAT_DOTNET:
|
||||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(str(sample)))
|
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
|
||||||
file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(str(sample)))
|
file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample))
|
||||||
|
|
||||||
elif format_ == capa.features.extractors.common.FORMAT_ELF:
|
elif format_ == capa.features.extractors.common.FORMAT_ELF:
|
||||||
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(str(sample)))
|
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
|
||||||
|
|
||||||
return file_extractors
|
return file_extractors
|
||||||
|
|
||||||
|
|||||||
@@ -129,7 +129,7 @@ def fixup_viv(path: Path, extractor):
|
|||||||
def get_pefile_extractor(path: Path):
|
def get_pefile_extractor(path: Path):
|
||||||
import capa.features.extractors.pefile
|
import capa.features.extractors.pefile
|
||||||
|
|
||||||
extractor = capa.features.extractors.pefile.PefileFeatureExtractor(str(path))
|
extractor = capa.features.extractors.pefile.PefileFeatureExtractor(path)
|
||||||
|
|
||||||
# overload the extractor so that the fixture exposes `extractor.path`
|
# overload the extractor so that the fixture exposes `extractor.path`
|
||||||
setattr(extractor, "path", path.as_posix())
|
setattr(extractor, "path", path.as_posix())
|
||||||
@@ -140,7 +140,7 @@ def get_pefile_extractor(path: Path):
|
|||||||
def get_dotnetfile_extractor(path: Path):
|
def get_dotnetfile_extractor(path: Path):
|
||||||
import capa.features.extractors.dotnetfile
|
import capa.features.extractors.dotnetfile
|
||||||
|
|
||||||
extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(str(path))
|
extractor = capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path)
|
||||||
|
|
||||||
# overload the extractor so that the fixture exposes `extractor.path`
|
# overload the extractor so that the fixture exposes `extractor.path`
|
||||||
setattr(extractor, "path", path.as_posix())
|
setattr(extractor, "path", path.as_posix())
|
||||||
@@ -152,7 +152,7 @@ def get_dotnetfile_extractor(path: Path):
|
|||||||
def get_dnfile_extractor(path: Path):
|
def get_dnfile_extractor(path: Path):
|
||||||
import capa.features.extractors.dnfile.extractor
|
import capa.features.extractors.dnfile.extractor
|
||||||
|
|
||||||
extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(str(path))
|
extractor = capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||||
|
|
||||||
# overload the extractor so that the fixture exposes `extractor.path`
|
# overload the extractor so that the fixture exposes `extractor.path`
|
||||||
setattr(extractor, "path", path.as_posix())
|
setattr(extractor, "path", path.as_posix())
|
||||||
@@ -232,7 +232,7 @@ def extract_instruction_features(extractor, fh, bbh, ih) -> Dict[Feature, Set[Ad
|
|||||||
|
|
||||||
|
|
||||||
# note: to reduce the testing time it's recommended to reuse already existing test samples, if possible
|
# note: to reduce the testing time it's recommended to reuse already existing test samples, if possible
|
||||||
def get_data_path_by_name(name):
|
def get_data_path_by_name(name) -> Path:
|
||||||
if name == "mimikatz":
|
if name == "mimikatz":
|
||||||
return CD / "data" / "mimikatz.exe_"
|
return CD / "data" / "mimikatz.exe_"
|
||||||
elif name == "kernel32":
|
elif name == "kernel32":
|
||||||
@@ -1048,7 +1048,6 @@ FEATURE_COUNT_TESTS_DOTNET = [
|
|||||||
|
|
||||||
|
|
||||||
def do_test_feature_presence(get_extractor, sample, scope, feature, expected):
|
def do_test_feature_presence(get_extractor, sample, scope, feature, expected):
|
||||||
print(sample)
|
|
||||||
extractor = get_extractor(sample)
|
extractor = get_extractor(sample)
|
||||||
features = scope(extractor)
|
features = scope(extractor)
|
||||||
if expected:
|
if expected:
|
||||||
|
|||||||
@@ -5,6 +5,8 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
from pathlib import PosixPath
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import fixtures
|
import fixtures
|
||||||
from fixtures import *
|
from fixtures import *
|
||||||
@@ -27,5 +29,4 @@ def test_pefile_features(sample, scope, feature, expected):
|
|||||||
|
|
||||||
if ".elf" in sample.name:
|
if ".elf" in sample.name:
|
||||||
pytest.xfail("pefile doesn't handle ELF files")
|
pytest.xfail("pefile doesn't handle ELF files")
|
||||||
|
|
||||||
fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)
|
fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)
|
||||||
|
|||||||
Reference in New Issue
Block a user