From aacfcaaa239db18b5c68986163bf38f6823fb18b Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 3 Mar 2023 09:52:50 -0700 Subject: [PATCH 1/4] explorer: improve embedded PE detection --- capa/features/extractors/ida/file.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 0d380a88..532d5a89 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -21,12 +21,14 @@ from capa.features.file import Export, Import, Section, FunctionName from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress +MAX_OFFSET_PE_AFTER_MZ = 0x200 + def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: """check segment for embedded PE adapted for IDA from: - https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19 + https://github.com/vivisect/vivisect/blob/91e8419a861f49779f18316f155311967e696836/PE/carve.py#L25 """ seg_max = seg.end_ea mz_xor = [ @@ -40,13 +42,14 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: todo = [] for mzx, pex, i in mz_xor: + # find all segment offsets containing XOR'd "MZ" bytes for off in capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx): todo.append((off, mzx, pex, i)) while len(todo): off, mzx, pex, i = todo.pop() - # The MZ header has one field we will check e_lfanew is at 0x3c + # MZ header has one field we will check e_lfanew is at 0x3c e_lfanew = off + 0x3C if seg_max < (e_lfanew + 4): @@ -54,6 +57,10 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: newoff = struct.unpack(" MAX_OFFSET_PE_AFTER_MZ: + continue + peoff = off + newoff if seg_max < (peoff + 2): continue @@ -61,9 +68,6 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: if idc.get_bytes(peoff, 2) == pex: yield off, i - for nextres in capa.features.extractors.ida.helpers.find_byte_sequence(off + 1, seg.end_ea, mzx): - todo.append((nextres, mzx, pex, i)) - def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: """extract embedded PE features From 14c18727db75e6b04dfb8b588f331173ed31ef63 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Fri, 3 Mar 2023 09:55:45 -0700 Subject: [PATCH 2/4] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 543db9ac..52f6de8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ - match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff - explorer: fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff - extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6 +- explorer: improve embedded PE detection #1344 @mike-hunhoff ### capa explorer IDA Pro plugin From 02dc42154bf97b71ae730e49149e95ed51bfdaf0 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 6 Mar 2023 08:53:57 -0700 Subject: [PATCH 3/4] Update CHANGELOG.md Co-authored-by: Willi Ballenthin --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52f6de8e..439b0827 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ - match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff - explorer: fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff - extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6 -- explorer: improve embedded PE detection #1344 @mike-hunhoff +- explorer: improve embedded PE detection performance and reduce FP potential #1344 @mike-hunhoff ### capa explorer IDA Pro plugin From 95f23dafe57930ffef8f8df1aed4dacce4389bc8 Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Mon, 6 Mar 2023 08:55:32 -0700 Subject: [PATCH 4/4] Update CHANGELOG.md --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 439b0827..ac3ba707 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,11 +25,11 @@ ### Bug Fixes - extractor: fix vivisect loop detection corner case #1310 @mr-tz - match: extend OS characteristic to match OS_ANY to all supported OSes #1324 @mike-hunhoff -- explorer: fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff - extractor: fix IDA and vivisect string and bytes features overlap and tests #1327 #1336 @xusheng6 -- explorer: improve embedded PE detection performance and reduce FP potential #1344 @mike-hunhoff ### capa explorer IDA Pro plugin +- fix exception when plugin loaded in IDA hosted under idat #1341 @mike-hunhoff +- improve embedded PE detection performance and reduce FP potential #1344 @mike-hunhoff ### Development