mirror of
https://github.com/mandiant/capa.git
synced 2025-12-22 15:16:22 -08:00
freeze: fix schema to support overlapping functions
This commit is contained in:
@@ -13,7 +13,7 @@ import zlib
|
|||||||
import logging
|
import logging
|
||||||
import collections
|
import collections
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, List, Type, Tuple, Set
|
from typing import Any, Set, Dict, List, Type, Tuple
|
||||||
|
|
||||||
from pydantic import Field, BaseModel
|
from pydantic import Field, BaseModel
|
||||||
|
|
||||||
@@ -139,6 +139,7 @@ class FunctionFeature(HashableModel):
|
|||||||
function != address because, e.g., the feature may be found *within* the scope (function).
|
function != address because, e.g., the feature may be found *within* the scope (function).
|
||||||
versus right at its starting address.
|
versus right at its starting address.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
function: Address
|
function: Address
|
||||||
address: Address
|
address: Address
|
||||||
feature: Feature
|
feature: Feature
|
||||||
@@ -153,6 +154,7 @@ class BasicBlockFeature(HashableModel):
|
|||||||
basic_block != address because, e.g., the feature may be found *within* the scope (basic block).
|
basic_block != address because, e.g., the feature may be found *within* the scope (basic block).
|
||||||
versus right at its starting address.
|
versus right at its starting address.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
basic_block: Address
|
basic_block: Address
|
||||||
address: Address
|
address: Address
|
||||||
feature: Feature
|
feature: Feature
|
||||||
@@ -167,44 +169,44 @@ class InstructionFeature(HashableModel):
|
|||||||
instruction != address because, e.g., the feature may be found *within* the scope (basic block),
|
instruction != address because, e.g., the feature may be found *within* the scope (basic block),
|
||||||
versus right at its starting address.
|
versus right at its starting address.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
instruction: Address
|
instruction: Address
|
||||||
address: Address
|
address: Address
|
||||||
feature: Feature
|
feature: Feature
|
||||||
|
|
||||||
|
|
||||||
class Features(BaseModel):
|
class InstructionFeatures(BaseModel):
|
||||||
global_: List[GlobalFeature] = Field(alias="global")
|
address: Address
|
||||||
file: List[FileFeature]
|
features: List[InstructionFeature]
|
||||||
function: List[FunctionFeature]
|
|
||||||
basic_block: List[BasicBlockFeature] = Field(alias="basic block")
|
|
||||||
instruction: List[InstructionFeature]
|
class BasicBlockFeatures(BaseModel):
|
||||||
|
address: Address
|
||||||
|
features: List[BasicBlockFeature]
|
||||||
|
instructions: List[InstructionFeatures]
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionFeatures(BaseModel):
|
||||||
|
address: Address
|
||||||
|
features: List[FunctionFeature]
|
||||||
|
basic_blocks: List[BasicBlockFeatures] = Field(alias="basic block")
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
allow_population_by_field_name = True
|
allow_population_by_field_name = True
|
||||||
|
|
||||||
|
|
||||||
class InstructionLayout(BaseModel):
|
class Features(BaseModel):
|
||||||
address: Address
|
global_: List[GlobalFeature] = Field(alias="global")
|
||||||
|
file: List[FileFeature]
|
||||||
|
functions: List[FunctionFeatures]
|
||||||
|
|
||||||
|
class Config:
|
||||||
class BasicBlockLayout(BaseModel):
|
allow_population_by_field_name = True
|
||||||
address: Address
|
|
||||||
instructions: List[InstructionLayout]
|
|
||||||
|
|
||||||
|
|
||||||
class FunctionLayout(BaseModel):
|
|
||||||
address: Address
|
|
||||||
basic_blocks: List[BasicBlockLayout]
|
|
||||||
|
|
||||||
|
|
||||||
class Layout(BaseModel):
|
|
||||||
functions: List[FunctionLayout]
|
|
||||||
|
|
||||||
|
|
||||||
class Freeze(BaseModel):
|
class Freeze(BaseModel):
|
||||||
version: int = 2
|
version: int = 2
|
||||||
base_address: Address = Field(alias="base address")
|
base_address: Address = Field(alias="base address")
|
||||||
layout: Layout
|
|
||||||
features: Features
|
features: Features
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
@@ -233,85 +235,73 @@ def dumps(extractor: capa.features.extractors.base_extractor.FeatureExtractor) -
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
function_features: Set[FunctionFeature] = set()
|
function_features: List[FunctionFeatures] = []
|
||||||
basic_block_features: Set[BasicBlockFeature] = set()
|
|
||||||
instruction_features: Set[InstructionFeature] = set()
|
|
||||||
function_layouts: List[FunctionLayout] = []
|
|
||||||
|
|
||||||
for f in extractor.get_functions():
|
for f in extractor.get_functions():
|
||||||
faddr = Address.from_capa(f.address)
|
faddr = Address.from_capa(f.address)
|
||||||
|
ffeatures = [
|
||||||
for feature, addr in extractor.extract_function_features(f):
|
|
||||||
function_features.add(
|
|
||||||
FunctionFeature(
|
FunctionFeature(
|
||||||
function=faddr,
|
function=faddr,
|
||||||
address=Address.from_capa(addr),
|
address=Address.from_capa(addr),
|
||||||
feature=Feature.from_capa(feature),
|
feature=Feature.from_capa(feature),
|
||||||
)
|
)
|
||||||
)
|
for feature, addr in extractor.extract_function_features(f)
|
||||||
|
]
|
||||||
|
|
||||||
basic_block_layouts: List[BasicBlockLayout] = []
|
basic_blocks = []
|
||||||
for bb in extractor.get_basic_blocks(f):
|
for bb in extractor.get_basic_blocks(f):
|
||||||
bbaddr = Address.from_capa(bb.address)
|
bbaddr = Address.from_capa(bb.address)
|
||||||
|
bbfeatures = [
|
||||||
for feature, addr in extractor.extract_basic_block_features(f, bb):
|
|
||||||
basic_block_features.add(
|
|
||||||
BasicBlockFeature(
|
BasicBlockFeature(
|
||||||
basic_block=bbaddr,
|
basic_block=bbaddr,
|
||||||
address=Address.from_capa(addr),
|
address=Address.from_capa(addr),
|
||||||
feature=Feature.from_capa(feature),
|
feature=Feature.from_capa(feature),
|
||||||
)
|
)
|
||||||
)
|
for feature, addr in extractor.extract_basic_block_features(f, bb)
|
||||||
|
]
|
||||||
|
|
||||||
instruction_layouts: List[InstructionLayout] = []
|
instructions = []
|
||||||
for insn in extractor.get_instructions(f, bb):
|
for insn in extractor.get_instructions(f, bb):
|
||||||
iaddr = Address.from_capa(insn.address)
|
iaddr = Address.from_capa(insn.address)
|
||||||
|
ifeatures = [
|
||||||
for feature, addr in extractor.extract_insn_features(f, bb, insn):
|
|
||||||
instruction_features.add(
|
|
||||||
InstructionFeature(
|
InstructionFeature(
|
||||||
instruction=iaddr,
|
instruction=iaddr,
|
||||||
address=Address.from_capa(addr),
|
address=Address.from_capa(addr),
|
||||||
feature=Feature.from_capa(feature),
|
feature=Feature.from_capa(feature),
|
||||||
)
|
)
|
||||||
)
|
for feature, addr in extractor.extract_insn_features(f, bb, insn)
|
||||||
|
]
|
||||||
instruction_layouts.append(
|
instructions.append(
|
||||||
InstructionLayout(
|
InstructionFeatures(
|
||||||
address=iaddr,
|
address=iaddr,
|
||||||
|
features=ifeatures,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
basic_block_layouts.append(
|
basic_blocks.append(
|
||||||
BasicBlockLayout(
|
BasicBlockFeatures(
|
||||||
address=bbaddr,
|
address=bbaddr,
|
||||||
instructions=instruction_layouts,
|
features=bbfeatures,
|
||||||
|
instructions=instructions,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
function_layouts.append(
|
function_features.append(
|
||||||
FunctionLayout(
|
FunctionFeatures(
|
||||||
address=faddr,
|
address=faddr,
|
||||||
basic_blocks=basic_block_layouts,
|
features=ffeatures,
|
||||||
|
basic_blocks=basic_blocks,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
layout = Layout(
|
|
||||||
functions=function_layouts,
|
|
||||||
)
|
|
||||||
|
|
||||||
features = Features(
|
features = Features(
|
||||||
global_=global_features,
|
global_=global_features,
|
||||||
file=file_features,
|
file=file_features,
|
||||||
function=list(function_features),
|
functions=function_features,
|
||||||
basic_block=list(basic_block_features),
|
|
||||||
instruction=list(instruction_features),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
freeze = Freeze(
|
freeze = Freeze(
|
||||||
version=2,
|
version=2,
|
||||||
base_address=Address.from_capa(extractor.get_base_address()),
|
base_address=Address.from_capa(extractor.get_base_address()),
|
||||||
layout=layout,
|
|
||||||
features=features,
|
features=features,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -326,37 +316,19 @@ def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
|||||||
if freeze.version != 2:
|
if freeze.version != 2:
|
||||||
raise ValueError("unsupported freeze format version: %d", freeze.version)
|
raise ValueError("unsupported freeze format version: %d", freeze.version)
|
||||||
|
|
||||||
function_features_by_address: Dict[
|
|
||||||
capa.features.address.Address, List[Tuple[capa.features.address.Address, capa.features.common.Feature]]
|
|
||||||
] = collections.defaultdict(list)
|
|
||||||
for f in freeze.features.function:
|
|
||||||
function_features_by_address[f.function.to_capa()].append((f.address.to_capa(), f.feature.to_capa()))
|
|
||||||
|
|
||||||
basic_block_features_by_address: Dict[
|
|
||||||
capa.features.address.Address, List[Tuple[capa.features.address.Address, capa.features.common.Feature]]
|
|
||||||
] = collections.defaultdict(list)
|
|
||||||
for bb in freeze.features.basic_block:
|
|
||||||
basic_block_features_by_address[bb.basic_block.to_capa()].append((bb.address.to_capa(), bb.feature.to_capa()))
|
|
||||||
|
|
||||||
instruction_features_by_address: Dict[
|
|
||||||
capa.features.address.Address, List[Tuple[capa.features.address.Address, capa.features.common.Feature]]
|
|
||||||
] = collections.defaultdict(list)
|
|
||||||
for i in freeze.features.instruction:
|
|
||||||
instruction_features_by_address[i.instruction.to_capa()].append((i.address.to_capa(), i.feature.to_capa()))
|
|
||||||
|
|
||||||
return null.NullFeatureExtractor(
|
return null.NullFeatureExtractor(
|
||||||
base_address=freeze.base_address.to_capa(),
|
base_address=freeze.base_address.to_capa(),
|
||||||
global_features=[f.feature.to_capa() for f in freeze.features.global_],
|
global_features=[f.feature.to_capa() for f in freeze.features.global_],
|
||||||
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
|
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
|
||||||
functions={
|
functions={
|
||||||
f.address.to_capa(): null.FunctionFeatures(
|
f.address.to_capa(): null.FunctionFeatures(
|
||||||
features=function_features_by_address.get(f.address.to_capa(), []),
|
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in f.features],
|
||||||
basic_blocks={
|
basic_blocks={
|
||||||
bb.address.to_capa(): null.BasicBlockFeatures(
|
bb.address.to_capa(): null.BasicBlockFeatures(
|
||||||
features=basic_block_features_by_address.get(bb.address.to_capa(), []),
|
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in bb.features],
|
||||||
instructions={
|
instructions={
|
||||||
i.address.to_capa(): null.InstructionFeatures(
|
i.address.to_capa(): null.InstructionFeatures(
|
||||||
features=instruction_features_by_address.get(i.address.to_capa(), []),
|
features=[(fe.address.to_capa(), fe.feature.to_capa()) for fe in i.features]
|
||||||
)
|
)
|
||||||
for i in bb.instructions
|
for i in bb.instructions
|
||||||
},
|
},
|
||||||
@@ -364,7 +336,7 @@ def loads(s: str) -> capa.features.extractors.base_extractor.FeatureExtractor:
|
|||||||
for bb in f.basic_blocks
|
for bb in f.basic_blocks
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
for f in freeze.layout.functions
|
for f in freeze.features.functions
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -106,14 +106,14 @@ def compare_extractors(a, b):
|
|||||||
|
|
||||||
for bb in a.get_basic_blocks(f):
|
for bb in a.get_basic_blocks(f):
|
||||||
assert addresses(a.get_instructions(f, bb)) == addresses(b.get_instructions(f, bb))
|
assert addresses(a.get_instructions(f, bb)) == addresses(b.get_instructions(f, bb))
|
||||||
assert list(sorted(set(a.extract_basic_block_features(f, bb)))) == list(sorted(set(b.extract_basic_block_features(f, bb))))
|
assert list(sorted(set(a.extract_basic_block_features(f, bb)))) == list(
|
||||||
|
sorted(set(b.extract_basic_block_features(f, bb)))
|
||||||
|
)
|
||||||
|
|
||||||
for insn in a.get_instructions(f, bb):
|
for insn in a.get_instructions(f, bb):
|
||||||
try:
|
assert list(sorted(set(a.extract_insn_features(f, bb, insn)))) == list(
|
||||||
assert list(sorted(set(a.extract_insn_features(f, bb, insn)))) == list(sorted(set(b.extract_insn_features(f, bb, insn))))
|
sorted(set(b.extract_insn_features(f, bb, insn)))
|
||||||
except Exception as e:
|
)
|
||||||
from IPython import embed; embed()
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def test_freeze_str_roundtrip():
|
def test_freeze_str_roundtrip():
|
||||||
|
|||||||
Reference in New Issue
Block a user