From d6fa832d83f90da4c507d8e24c9d46e46e0cb3fe Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Mon, 19 Jun 2023 13:50:46 +0100 Subject: [PATCH] cape: move get_processes() method to file scope --- capa/features/extractors/cape/extractor.py | 7 ++----- capa/features/extractors/cape/file.py | 14 ++++++++++++++ capa/features/extractors/cape/process.py | 3 +-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/capa/features/extractors/cape/extractor.py b/capa/features/extractors/cape/extractor.py index fd5bcafd..01836fee 100644 --- a/capa/features/extractors/cape/extractor.py +++ b/capa/features/extractors/cape/extractor.py @@ -5,7 +5,6 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. - import logging from typing import Dict, Tuple, Iterator @@ -35,7 +34,7 @@ class CapeExtractor(DynamicExtractor): yield from capa.features.extractors.cape.file.extract_features(self.static) def get_processes(self) -> Iterator[ProcessHandle]: - yield from capa.features.extractors.cape.process.get_processes(self.behavior) + yield from capa.features.extractors.cape.file.get_processes(self.behavior) def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: yield from capa.features.extractors.cape.process.extract_features(self.behavior, ph) @@ -48,14 +47,12 @@ class CapeExtractor(DynamicExtractor): @classmethod def from_report(cls, report: Dict) -> "DynamicExtractor": - # todo: - # 1. make the information extraction code more elegant - # 2. filter out redundant cape features in an efficient way static = report["static"] format_ = list(static.keys())[0] static = static[format_] static.update(report["target"]) static.update(report["behavior"].pop("summary")) + static.update({"processtree": report["behavior"]["processtree"]}) static.update({"strings": report["strings"]}) static.update({"format": format_}) diff --git a/capa/features/extractors/cape/file.py b/capa/features/extractors/cape/file.py index b6f60b3b..12caad2b 100644 --- a/capa/features/extractors/cape/file.py +++ b/capa/features/extractors/cape/file.py @@ -12,10 +12,24 @@ from typing import Any, Dict, List, Tuple, Iterator from capa.features.file import Export, Import, Section, FunctionName from capa.features.common import String, Feature from capa.features.address import NO_ADDRESS, Address, AbsoluteVirtualAddress +from capa.features.extractors.base_extractor import ProcessHandle logger = logging.getLogger(__name__) +def get_processes(static: Dict) -> Iterator[ProcessHandle]: + """ + get all the created processes for a sample + """ + def rec(process): + inner: Dict[str, str] = {"name": process["name"], "ppid": process["parent_id"]} + yield ProcessHandle(pid=process["pid"], inner=inner) + for child in process["children"]: + rec(child) + + yield from rec(static["processtree"]) + + def extract_import_names(static: Dict) -> Iterator[Tuple[Feature, Address]]: """ extract the names of imported library files, for example: USER32.dll diff --git a/capa/features/extractors/cape/process.py b/capa/features/extractors/cape/process.py index d36dae40..efb11299 100644 --- a/capa/features/extractors/cape/process.py +++ b/capa/features/extractors/cape/process.py @@ -5,7 +5,6 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. - import logging from typing import Any, Dict, List, Tuple, Iterator @@ -66,4 +65,4 @@ def extract_features(behavior: Dict, ph: ProcessHandle) -> Iterator[Tuple[Featur yield feature, addr -PROCESS_HANDLERS = extract_environ_strings +PROCESS_HANDLERS = (extract_environ_strings,)