mirror of
https://github.com/mandiant/capa.git
synced 2026-03-12 21:23:12 -07:00
loader: skip PE files with unrealistically large section virtual sizes (#2905)
* loader: skip PE files with unrealistically large section virtual sizes Some malformed PE samples declare section virtual sizes orders of magnitude larger than the file itself (e.g. a ~400 KB file with a 900 MB section). vivisect attempts to map these regions, causing unbounded CPU and memory consumption (see #1989). Add _is_probably_corrupt_pe() which uses pefile (fast_load=True) to check whether any section's Misc_VirtualSize exceeds max(file_size * 128, 512 MB). If the check fires, get_workspace() raises CorruptFile before vivisect is invoked, keeping the existing exception handling path consistent. Thresholds are intentionally conservative to avoid false positives on large but legitimate binaries. When pefile is unavailable the helper returns False and behaviour is unchanged. Fixes #1989. * changelog: add entry for #1989 corrupt PE large sections * loader: apply Gemini review improvements - Extend corrupt-PE check to FORMAT_AUTO so malformed PE files cannot bypass the guard when format is auto-detected (the helper returns False for non-PE files so there is no false-positive risk). - Replace magic literals 128 and 512*1024*1024 with named constants _VSIZE_FILE_RATIO and _MAX_REASONABLE_VSIZE for clarity. - Remove redundant int() cast around getattr(Misc_VirtualSize); keep the `or 0` guard for corrupt files where pefile may return None. - Extend test to cover FORMAT_AUTO path alongside FORMAT_PE. * tests: remove mock-only corrupt PE test per maintainer request williballenthin noted the test doesn't add real value since it only exercises the mock, not the actual heuristic. Removing it per feedback. * fix: resolve flake8 NIC002 implicit string concat and add missing test Fix the implicit string concatenation across multiple lines that caused code_style CI to fail. Also add the test_corrupt_pe_with_unrealistic_section_size_short_circuits test that was described in the PR body but not committed.
This commit is contained in:
committed by
GitHub
parent
2c9e30c3e1
commit
1f4a16cbcc
@@ -44,6 +44,7 @@
|
||||
- render: use default styling for dynamic -vv API/call details so they are easier to see @devs6186 #1865
|
||||
- loader: handle struct.error from dnfile and show clear CorruptFile message @devs6186 #2442
|
||||
- address: fix TypeError when sorting locations containing mixed address types @devs6186 #2195
|
||||
- loader: skip PE files with unrealistically large section virtual sizes to prevent resource exhaustion @devs6186 #1989
|
||||
|
||||
### capa Explorer Web
|
||||
- webui: fix 404 for "View rule in capa-rules" by using encodeURIComponent for rule name in URL @devs6186 #2482
|
||||
|
||||
@@ -126,6 +126,57 @@ def get_meta_str(vw):
|
||||
return f"{', '.join(meta)}, number of functions: {len(vw.getFunctions())}"
|
||||
|
||||
|
||||
def _is_probably_corrupt_pe(path: Path) -> bool:
|
||||
"""
|
||||
Heuristic check for obviously malformed PE samples that provoke
|
||||
pathological behavior in vivisect (see GH-1989).
|
||||
|
||||
We treat a PE as "probably corrupt" when any section declares an
|
||||
unrealistically large virtual size compared to the file size, e.g.
|
||||
hundreds of megabytes in a tiny file. Such cases lead vivisect to
|
||||
try to map enormous regions and can exhaust CPU/memory.
|
||||
"""
|
||||
try:
|
||||
import pefile
|
||||
except Exception:
|
||||
# If pefile is unavailable, fall back to existing behavior.
|
||||
return False
|
||||
|
||||
try:
|
||||
pe = pefile.PE(str(path), fast_load=True)
|
||||
except pefile.PEFormatError:
|
||||
# Not a PE file (or badly formed); let existing checks handle it.
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
try:
|
||||
file_size = path.stat().st_size
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
if file_size <= 0:
|
||||
return False
|
||||
|
||||
# Flag sections whose declared virtual size is wildly disproportionate
|
||||
# to the file size (e.g. 900MB section in a ~400KB sample).
|
||||
_VSIZE_FILE_RATIO = 128
|
||||
_MAX_REASONABLE_VSIZE = 512 * 1024 * 1024 # 512 MB
|
||||
max_reasonable = max(file_size * _VSIZE_FILE_RATIO, _MAX_REASONABLE_VSIZE)
|
||||
|
||||
for section in getattr(pe, "sections", []):
|
||||
vsize = getattr(section, "Misc_VirtualSize", 0) or 0
|
||||
if vsize > max_reasonable:
|
||||
logger.debug(
|
||||
"detected unrealistic PE section virtual size: 0x%x (file size: 0x%x), treating as corrupt",
|
||||
vsize,
|
||||
file_size,
|
||||
)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
|
||||
"""
|
||||
load the program at the given path into a vivisect workspace using the given format.
|
||||
@@ -149,6 +200,12 @@ def get_workspace(path: Path, input_format: str, sigpaths: list[Path]):
|
||||
|
||||
logger.debug("generating vivisect workspace for: %s", path)
|
||||
|
||||
if input_format in (FORMAT_PE, FORMAT_AUTO) and _is_probably_corrupt_pe(path):
|
||||
raise CorruptFile(
|
||||
"PE file appears to contain unrealistically large sections and is likely corrupt"
|
||||
+ " - skipping analysis to avoid excessive resource usage."
|
||||
)
|
||||
|
||||
try:
|
||||
if input_format == FORMAT_AUTO:
|
||||
if not is_supported_format(path):
|
||||
|
||||
@@ -19,7 +19,7 @@ import pytest
|
||||
import envi.exc
|
||||
|
||||
from capa.loader import CorruptFile, get_workspace
|
||||
from capa.features.common import FORMAT_ELF
|
||||
from capa.features.common import FORMAT_PE, FORMAT_ELF
|
||||
|
||||
|
||||
def test_segmentation_violation_handling():
|
||||
@@ -38,3 +38,23 @@ def test_segmentation_violation_handling():
|
||||
|
||||
with pytest.raises(CorruptFile, match="Invalid memory access"):
|
||||
get_workspace(fake_path, FORMAT_ELF, [])
|
||||
|
||||
|
||||
def test_corrupt_pe_with_unrealistic_section_size_short_circuits():
|
||||
"""
|
||||
Test that a PE with an unrealistically large section virtual size
|
||||
is caught early and raises CorruptFile before vivisect is invoked.
|
||||
|
||||
See #1989.
|
||||
"""
|
||||
fake_path = Path("/tmp/fake_corrupt.exe")
|
||||
|
||||
with (
|
||||
patch("capa.loader._is_probably_corrupt_pe", return_value=True),
|
||||
patch("viv_utils.getWorkspace") as mock_workspace,
|
||||
):
|
||||
with pytest.raises(CorruptFile, match="unrealistically large sections"):
|
||||
get_workspace(fake_path, FORMAT_PE, [])
|
||||
|
||||
# vivisect should never have been called
|
||||
mock_workspace.assert_not_called()
|
||||
|
||||
Reference in New Issue
Block a user