vmray: skip non-printable strings (#2551)

This commit is contained in:
Mike Hunhoff
2025-01-08 08:40:32 -07:00
committed by GitHub
parent 462e11443e
commit c3c93685e2
3 changed files with 11 additions and 5 deletions

View File

@@ -14,6 +14,7 @@
- vmray: load more analysis archives @mr-tz
- dynamic: only check file limitations for static file formats @mr-tz
- vmray: skip non-printable strings @mike-hunhoff
### capa Explorer Web

View File

@@ -9,6 +9,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import re
import string
import contextlib
from collections import namedtuple
@@ -19,6 +20,7 @@ ASCII_RE_4 = re.compile(b"([%s]{%d,})" % (ASCII_BYTE, 4))
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"]
SLICE_SIZE = 4096
PRINTABLE_CHAR_SET = set(string.printable)
String = namedtuple("String", ["s", "offset"])
@@ -84,3 +86,7 @@ def extract_unicode_strings(buf, n=4):
for match in r.finditer(buf):
with contextlib.suppress(UnicodeDecodeError):
yield String(match.group().decode("utf-16"), match.start())
def is_printable_str(s: str) -> bool:
return set(s).issubset(PRINTABLE_CHAR_SET)

View File

@@ -12,6 +12,7 @@ import capa.features.extractors.helpers
from capa.features.insn import API, Number
from capa.features.common import String, Feature
from capa.features.address import Address
from capa.features.extractors.strings import is_printable_str
from capa.features.extractors.vmray.models import PARAM_TYPE_INT, PARAM_TYPE_STR, Param, FunctionCall, hexint
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
@@ -27,11 +28,9 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feat
if param.deref.type_ in PARAM_TYPE_INT:
yield Number(hexint(param.deref.value)), ch.address
elif param.deref.type_ in PARAM_TYPE_STR:
# TODO(mr-tz): remove FPS like " \\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\..."
# https://github.com/mandiant/capa/issues/2432
# parsing the data up to here results in double-escaped backslashes, remove those here
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
if is_printable_str(param.deref.value):
# parsing the data up to here results in double-escaped backslashes, remove those here
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
else:
logger.debug("skipping deref param type %s", param.deref.type_)
elif param.value is not None: