diff --git a/CHANGELOG.md b/CHANGELOG.md index 543db9ac..ac3ba707 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,10 +25,11 @@ ### Bug Fixes - extractor: fix vivisect loop detection corner case #1310 @mr-tz - match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff -- explorer: fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff - extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6 ### capa explorer IDA Pro plugin +- fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff +- improve embedded PE detection performance and reduce FP potential #1344 @mike-hunhoff ### Development diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 0d380a88..532d5a89 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -21,12 +21,14 @@ from capa.features.file import Export, Import, Section, FunctionName from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress +MAX_OFFSET_PE_AFTER_MZ = 0x200 + def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: """check segment for embedded PE adapted for IDA from: - https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19 + https://github.com/vivisect/vivisect/blob/91e8419a861f49779f18316f155311967e696836/PE/carve.py#L25 """ seg_max = seg.end_ea mz_xor = [ @@ -40,13 +42,14 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: todo = [] for mzx, pex, i in mz_xor: + # find all segment offsets containing XOR'd "MZ" bytes for off in capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx): todo.append((off, mzx, pex, i)) while len(todo): off, mzx, pex, i = todo.pop() - # The MZ header has one field we will check e_lfanew is at 0x3c + # MZ header has one field we will check e_lfanew is at 0x3c e_lfanew = off + 0x3C if seg_max < (e_lfanew + 4): @@ -54,6 +57,10 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: newoff = struct.unpack(" MAX_OFFSET_PE_AFTER_MZ: + continue + peoff = off + newoff if seg_max < (peoff + 2): continue @@ -61,9 +68,6 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: if idc.get_bytes(peoff, 2) == pex: yield off, i - for nextres in capa.features.extractors.ida.helpers.find_byte_sequence(off + 1, seg.end_ea, mzx): - todo.append((nextres, mzx, pex, i)) - def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: """extract embedded PE features