vmray: emit number and string call features for pointer dereference

This commit is contained in:
Mike Hunhoff
2024-07-12 18:35:50 -06:00
parent e2f5eb7d30
commit 4bbe9e1ce9
2 changed files with 43 additions and 9 deletions

View File

@@ -9,26 +9,36 @@ import logging
from typing import Tuple, Iterator
from capa.features.insn import API, Number
from capa.features.common import Feature
from capa.features.common import String, Feature
from capa.features.address import Address
from capa.features.extractors.vmray.models import PARAM_TYPE_PTR, FunctionCall
from capa.features.extractors.vmray.models import PARAM_TYPE_INT, PARAM_TYPE_STR, Param, FunctionCall, hexint
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
logger = logging.getLogger(__name__)
def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
if param.deref is not None:
if param.deref.value is not None:
if param.deref.type_ in PARAM_TYPE_INT:
yield Number(hexint(param.deref.value)), ch.address
elif param.deref.type_ in PARAM_TYPE_STR:
yield String(param.deref.value), ch.address
elif param.value is not None:
if param.type_ in PARAM_TYPE_INT:
yield Number(hexint(param.value)), ch.address
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
call: FunctionCall = ch.inner
if call.params_in:
for param in call.params_in.params:
if param.type_ not in PARAM_TYPE_PTR and param.value is not None:
yield Number(param.value), ch.address
yield from get_call_param_features(param, ch)
if call.params_out:
for param in call.params_out.params:
if param.type_ not in PARAM_TYPE_PTR and param.value is not None:
yield Number(param.value), ch.address
yield from get_call_param_features(param, ch)
yield API(call.name), ch.address

View File

@@ -6,7 +6,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Dict, List, Optional
from typing import Dict, List, Union, Optional
from pydantic import Field, BaseModel
from typing_extensions import Annotated
@@ -38,15 +38,33 @@ PARAM_TYPE = (
"""
PARAM_TYPE_PTR = ("void_ptr", "ptr")
PARAM_TYPE_STR = ("str",)
PARAM_TYPE_INT = (
"signed_8bit",
"unsigned_8bit",
"signed_16bit",
"unsigned_16bit",
"signed_32bit",
"unsigned_32bit",
"signed_64bit",
"unsigned_64bit",
"double",
"bool",
"unknown",
)
def validate_hex_int(value):
def hexint(value: Union[str, int]) -> int:
if isinstance(value, str):
return int(value, 16) if value.startswith("0x") else int(value, 10)
else:
return value
def validate_hex_int(value: Union[str, int]) -> int:
return hexint(value)
def validate_param_list(value):
if isinstance(value, list):
return value
@@ -57,10 +75,16 @@ def validate_param_list(value):
HexInt = Annotated[int, BeforeValidator(validate_hex_int)]
class ParamDeref(BaseModel):
type_: str = Field(alias="type")
value: Optional[str] = None
class Param(BaseModel):
name: str
type_: str = Field(alias="type")
value: Optional[HexInt] = None
value: Optional[str] = None
deref: Optional[ParamDeref] = None
# params may be stored as a list of Param or a single Param