com: move database into python files (#1924)

* com: move database into python files

* com: pep8 and lints

* com: fix generated string feature type

* pyinstaller: remove reference to old assets directory
This commit is contained in:
Willi Ballenthin
2024-01-11 14:06:24 +01:00
committed by GitHub
parent 9e5cc07a48
commit ad46b33bb7
7 changed files with 31982 additions and 43 deletions

View File

@@ -17,7 +17,6 @@ a = Analysis(
# when invoking pyinstaller from the project root,
# this gets invoked from the directory of the spec file,
# i.e. ./.github/pyinstaller
("../../assets", "assets"),
("../../rules", "rules"),
("../../sigs", "sigs"),
("../../cache", "cache"),

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,36 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from enum import Enum
from typing import Dict, List
from capa.helpers import assert_never
class ComType(Enum):
CLASS = "class"
INTERFACE = "interface"
COM_PREFIXES = {
ComType.CLASS: "CLSID_",
ComType.INTERFACE: "IID_",
}
def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
# lazy load these python files since they are so large.
# that is, don't load them unless a COM feature is being handled.
import capa.features.com.classes
import capa.features.com.interfaces
if com_type == ComType.CLASS:
return capa.features.com.classes.COM_CLASSES
elif com_type == ComType.INTERFACE:
return capa.features.com.interfaces.COM_INTERFACES
else:
assert_never(com_type)

3696
capa/features/com/classes.py Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -8,8 +8,6 @@
import io
import re
import gzip
import json
import uuid
import codecs
import logging
@@ -39,11 +37,13 @@ import capa.perf
import capa.engine as ceng
import capa.features
import capa.optimizer
import capa.features.com
import capa.features.file
import capa.features.insn
import capa.features.common
import capa.features.basicblock
from capa.engine import Statement, FeatureSet
from capa.features.com import ComType
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Feature
from capa.features.address import Address
@@ -328,42 +328,16 @@ def ensure_feature_valid_for_scopes(scopes: Scopes, feature: Union[Feature, Stat
raise InvalidRule(f"feature {feature} not supported for scopes {scopes}")
class ComType(Enum):
CLASS = "class"
INTERFACE = "interface"
# COM data source https://github.com/stevemk14ebr/COM-Code-Helper/tree/master
VALID_COM_TYPES = {
ComType.CLASS: {"db_path": "assets/classes.json.gz", "prefix": "CLSID_"},
ComType.INTERFACE: {"db_path": "assets/interfaces.json.gz", "prefix": "IID_"},
}
@lru_cache(maxsize=None)
def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
com_db_path: Path = capa.main.get_default_root() / VALID_COM_TYPES[com_type]["db_path"]
if not com_db_path.exists():
raise IOError(f"COM database path '{com_db_path}' does not exist or cannot be accessed")
try:
with gzip.open(com_db_path, "rb") as gzfile:
return json.loads(gzfile.read().decode("utf-8"))
except Exception as e:
raise IOError(f"Error loading COM database from '{com_db_path}'") from e
def translate_com_feature(com_name: str, com_type: ComType) -> ceng.Or:
com_db = load_com_database(com_type)
guid_strings: Optional[List[str]] = com_db.get(com_name)
if guid_strings is None or len(guid_strings) == 0:
def translate_com_feature(com_name: str, com_type: ComType) -> ceng.Statement:
com_db = capa.features.com.load_com_database(com_type)
guids: Optional[List[str]] = com_db.get(com_name)
if not guids:
logger.error(" %s doesn't exist in COM %s database", com_name, com_type)
raise InvalidRule(f"'{com_name}' doesn't exist in COM {com_type} database")
com_features: List = []
for guid_string in guid_strings:
hex_chars = guid_string.replace("-", "")
com_features: List[Feature] = []
for guid in guids:
hex_chars = guid.replace("-", "")
h = [hex_chars[i : i + 2] for i in range(0, len(hex_chars), 2)]
reordered_hex_pairs = [
h[3],
@@ -384,9 +358,10 @@ def translate_com_feature(com_name: str, com_type: ComType) -> ceng.Or:
h[15],
]
guid_bytes = bytes.fromhex("".join(reordered_hex_pairs))
prefix = VALID_COM_TYPES[com_type]["prefix"]
com_features.append(capa.features.common.StringFactory(guid_string, f"{prefix+com_name} as GUID string"))
com_features.append(capa.features.common.Bytes(guid_bytes, f"{prefix+com_name} as bytes"))
prefix = capa.features.com.COM_PREFIXES[com_type]
symbol = prefix + com_name
com_features.append(capa.features.common.String(guid, f"{symbol} as GUID string"))
com_features.append(capa.features.common.Bytes(guid_bytes, f"{symbol} as bytes"))
return ceng.Or(com_features)
@@ -824,11 +799,13 @@ def build_statements(d, scopes: Scopes):
return feature
elif key.startswith("com/"):
com_type = str(key[len("com/") :]).upper()
if com_type not in [item.name for item in ComType]:
raise InvalidRule(f"unexpected COM type: {com_type}")
com_type_name = str(key[len("com/") :])
try:
com_type = ComType(com_type_name)
except ValueError:
raise InvalidRule(f"unexpected COM type: {com_type_name}")
value, description = parse_description(d[key], key, d.get("description"))
return translate_com_feature(value, ComType[com_type])
return translate_com_feature(value, com_type)
else:
Feature = parse_feature(key)