diff --git a/capa/features/extractors/vmray/call.py b/capa/features/extractors/vmray/call.py index 45fc73ea..e0f1059a 100644 --- a/capa/features/extractors/vmray/call.py +++ b/capa/features/extractors/vmray/call.py @@ -9,26 +9,36 @@ import logging from typing import Tuple, Iterator from capa.features.insn import API, Number -from capa.features.common import Feature +from capa.features.common import String, Feature from capa.features.address import Address -from capa.features.extractors.vmray.models import PARAM_TYPE_PTR, FunctionCall +from capa.features.extractors.vmray.models import PARAM_TYPE_INT, PARAM_TYPE_STR, Param, FunctionCall, hexint from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle logger = logging.getLogger(__name__) +def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: + if param.deref is not None: + if param.deref.value is not None: + if param.deref.type_ in PARAM_TYPE_INT: + yield Number(hexint(param.deref.value)), ch.address + elif param.deref.type_ in PARAM_TYPE_STR: + yield String(param.deref.value), ch.address + elif param.value is not None: + if param.type_ in PARAM_TYPE_INT: + yield Number(hexint(param.value)), ch.address + + def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: call: FunctionCall = ch.inner if call.params_in: for param in call.params_in.params: - if param.type_ not in PARAM_TYPE_PTR and param.value is not None: - yield Number(param.value), ch.address + yield from get_call_param_features(param, ch) if call.params_out: for param in call.params_out.params: - if param.type_ not in PARAM_TYPE_PTR and param.value is not None: - yield Number(param.value), ch.address + yield from get_call_param_features(param, ch) yield API(call.name), ch.address diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index 7fb9b8f7..9bb46c13 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List, Optional +from typing import Dict, List, Union, Optional from pydantic import Field, BaseModel from typing_extensions import Annotated @@ -38,15 +38,33 @@ PARAM_TYPE = ( """ PARAM_TYPE_PTR = ("void_ptr", "ptr") +PARAM_TYPE_STR = ("str",) +PARAM_TYPE_INT = ( + "signed_8bit", + "unsigned_8bit", + "signed_16bit", + "unsigned_16bit", + "signed_32bit", + "unsigned_32bit", + "signed_64bit", + "unsigned_64bit", + "double", + "bool", + "unknown", +) -def validate_hex_int(value): +def hexint(value: Union[str, int]) -> int: if isinstance(value, str): return int(value, 16) if value.startswith("0x") else int(value, 10) else: return value +def validate_hex_int(value: Union[str, int]) -> int: + return hexint(value) + + def validate_param_list(value): if isinstance(value, list): return value @@ -57,10 +75,16 @@ def validate_param_list(value): HexInt = Annotated[int, BeforeValidator(validate_hex_int)] +class ParamDeref(BaseModel): + type_: str = Field(alias="type") + value: Optional[str] = None + + class Param(BaseModel): name: str type_: str = Field(alias="type") - value: Optional[HexInt] = None + value: Optional[str] = None + deref: Optional[ParamDeref] = None # params may be stored as a list of Param or a single Param