mirror of
https://github.com/mandiant/capa.git
synced 2025-12-05 20:40:05 -08:00
vmray: skip non-printable strings (#2551)
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
|
||||
- vmray: load more analysis archives @mr-tz
|
||||
- dynamic: only check file limitations for static file formats @mr-tz
|
||||
- vmray: skip non-printable strings @mike-hunhoff
|
||||
|
||||
### capa Explorer Web
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import re
|
||||
import string
|
||||
import contextlib
|
||||
from collections import namedtuple
|
||||
|
||||
@@ -19,6 +20,7 @@ ASCII_RE_4 = re.compile(b"([%s]{%d,})" % (ASCII_BYTE, 4))
|
||||
UNICODE_RE_4 = re.compile(b"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
|
||||
REPEATS = [b"A", b"\x00", b"\xfe", b"\xff"]
|
||||
SLICE_SIZE = 4096
|
||||
PRINTABLE_CHAR_SET = set(string.printable)
|
||||
|
||||
String = namedtuple("String", ["s", "offset"])
|
||||
|
||||
@@ -84,3 +86,7 @@ def extract_unicode_strings(buf, n=4):
|
||||
for match in r.finditer(buf):
|
||||
with contextlib.suppress(UnicodeDecodeError):
|
||||
yield String(match.group().decode("utf-16"), match.start())
|
||||
|
||||
|
||||
def is_printable_str(s: str) -> bool:
|
||||
return set(s).issubset(PRINTABLE_CHAR_SET)
|
||||
|
||||
@@ -12,6 +12,7 @@ import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number
|
||||
from capa.features.common import String, Feature
|
||||
from capa.features.address import Address
|
||||
from capa.features.extractors.strings import is_printable_str
|
||||
from capa.features.extractors.vmray.models import PARAM_TYPE_INT, PARAM_TYPE_STR, Param, FunctionCall, hexint
|
||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
||||
|
||||
@@ -27,11 +28,9 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feat
|
||||
if param.deref.type_ in PARAM_TYPE_INT:
|
||||
yield Number(hexint(param.deref.value)), ch.address
|
||||
elif param.deref.type_ in PARAM_TYPE_STR:
|
||||
# TODO(mr-tz): remove FPS like " \\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\..."
|
||||
# https://github.com/mandiant/capa/issues/2432
|
||||
|
||||
# parsing the data up to here results in double-escaped backslashes, remove those here
|
||||
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
|
||||
if is_printable_str(param.deref.value):
|
||||
# parsing the data up to here results in double-escaped backslashes, remove those here
|
||||
yield String(param.deref.value.replace("\\\\", "\\")), ch.address
|
||||
else:
|
||||
logger.debug("skipping deref param type %s", param.deref.type_)
|
||||
elif param.value is not None:
|
||||
|
||||
Reference in New Issue
Block a user