mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 07:10:29 -08:00
159 lines
4.8 KiB
Python
159 lines
4.8 KiB
Python
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and limitations under the License.
|
|
|
|
import struct
|
|
|
|
import idc
|
|
import idaapi
|
|
import idautils
|
|
|
|
import capa.features.extractors.helpers
|
|
import capa.features.extractors.strings
|
|
import capa.features.extractors.ida.helpers
|
|
from capa.features import String, Characteristic
|
|
from capa.features.file import Export, Import, Section
|
|
|
|
|
|
def check_segment_for_pe(seg):
|
|
"""check segment for embedded PE
|
|
|
|
adapted for IDA from:
|
|
https://github.com/vivisect/vivisect/blob/7be4037b1cecc4551b397f840405a1fc606f9b53/PE/carve.py#L19
|
|
|
|
args:
|
|
seg (IDA segment_t)
|
|
"""
|
|
seg_max = seg.end_ea
|
|
mz_xor = [
|
|
(
|
|
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
|
capa.features.extractors.helpers.xor_static(b"PE", i),
|
|
i,
|
|
)
|
|
for i in range(256)
|
|
]
|
|
todo = [
|
|
(capa.features.extractors.ida.helpers.find_byte_sequence(seg.start_ea, seg.end_ea, mzx), mzx, pex, i)
|
|
for mzx, pex, i in mz_xor
|
|
]
|
|
todo = [(off, mzx, pex, i) for (off, mzx, pex, i) in todo if off != idaapi.BADADDR]
|
|
|
|
while len(todo):
|
|
off, mzx, pex, i = todo.pop()
|
|
|
|
# The MZ header has one field we will check e_lfanew is at 0x3c
|
|
e_lfanew = off + 0x3C
|
|
|
|
if seg_max < (e_lfanew + 4):
|
|
continue
|
|
|
|
newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
|
|
|
|
peoff = off + newoff
|
|
if seg_max < (peoff + 2):
|
|
continue
|
|
|
|
if idc.get_bytes(peoff, 2) == pex:
|
|
yield (off, i)
|
|
|
|
nextres = capa.features.extractors.ida.helpers.find_byte_sequence(off + 1, seg.end_ea, mzx)
|
|
if nextres != -1:
|
|
todo.append((nextres, mzx, pex, i))
|
|
|
|
|
|
def extract_file_embedded_pe():
|
|
"""extract embedded PE features
|
|
|
|
IDA must load resource sections for this to be complete
|
|
- '-R' from console
|
|
- Check 'Load resource sections' when opening binary in IDA manually
|
|
"""
|
|
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
|
|
for (ea, _) in check_segment_for_pe(seg):
|
|
yield Characteristic("embedded pe"), ea
|
|
|
|
|
|
def extract_file_export_names():
|
|
""" extract function exports """
|
|
for (_, _, ea, name) in idautils.Entries():
|
|
yield Export(name), ea
|
|
|
|
|
|
def extract_file_import_names():
|
|
"""extract function imports
|
|
|
|
1. imports by ordinal:
|
|
- modulename.#ordinal
|
|
|
|
2. imports by name, results in two features to support importname-only
|
|
matching:
|
|
- modulename.importname
|
|
- importname
|
|
"""
|
|
for (ea, info) in capa.features.extractors.ida.helpers.get_file_imports().items():
|
|
if info[1]:
|
|
yield Import("%s.%s" % (info[0], info[1])), ea
|
|
yield Import(info[1]), ea
|
|
if info[2]:
|
|
yield Import("%s.#%s" % (info[0], str(info[2]))), ea
|
|
|
|
|
|
def extract_file_section_names():
|
|
"""extract section names
|
|
|
|
IDA must load resource sections for this to be complete
|
|
- '-R' from console
|
|
- Check 'Load resource sections' when opening binary in IDA manually
|
|
"""
|
|
for seg in capa.features.extractors.ida.helpers.get_segments(skip_header_segments=True):
|
|
yield Section(idaapi.get_segm_name(seg)), seg.start_ea
|
|
|
|
|
|
def extract_file_strings():
|
|
"""extract ASCII and UTF-16 LE strings
|
|
|
|
IDA must load resource sections for this to be complete
|
|
- '-R' from console
|
|
- Check 'Load resource sections' when opening binary in IDA manually
|
|
"""
|
|
for seg in capa.features.extractors.ida.helpers.get_segments():
|
|
seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)
|
|
|
|
for s in capa.features.extractors.strings.extract_ascii_strings(seg_buff):
|
|
yield String(s.s), (seg.start_ea + s.offset)
|
|
|
|
for s in capa.features.extractors.strings.extract_unicode_strings(seg_buff):
|
|
yield String(s.s), (seg.start_ea + s.offset)
|
|
|
|
|
|
def extract_features():
|
|
""" extract file features """
|
|
for file_handler in FILE_HANDLERS:
|
|
for feature, va in file_handler():
|
|
yield feature, va
|
|
|
|
|
|
FILE_HANDLERS = (
|
|
extract_file_export_names,
|
|
extract_file_import_names,
|
|
extract_file_strings,
|
|
extract_file_section_names,
|
|
extract_file_embedded_pe,
|
|
)
|
|
|
|
|
|
def main():
|
|
""" """
|
|
import pprint
|
|
|
|
pprint.pprint(list(extract_features()))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|