mirror of
https://github.com/mandiant/capa.git
synced 2025-12-05 20:40:05 -08:00
add lint for duplicate feature under a statement (#2573)
* add lint for duplicate feature under a statement * add support for more scopes * fix format for duplicate feature lint * fix false positives for duplicate features lint * remove unused code and comments Signed-off-by: vibhatsu <maulikbarot2915@gmail.com> * refactor duplicate feature lint to use yaml parser Signed-off-by: vibhatsu <maulikbarot2915@gmail.com> * update CHANGELOG Signed-off-by: vibhatsu <maulikbarot2915@gmail.com> * clarify for using rule definition Signed-off-by: vibhatsu <maulikbarot2915@gmail.com> * update CHANGELOG Signed-off-by: vibhatsu <maulikbarot2915@gmail.com> * refactor duplicate feature lint to improve key generation and tracking of line numbers Signed-off-by: vibhatsu <maulikbarot2915@gmail.com> --------- Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
## master (unreleased)
|
||||
|
||||
### New Features
|
||||
|
||||
- add lint for detecting duplicate features in capa-rules #2250 @v1bh475u
|
||||
- add span-of-calls scope to match features against a across a sliding window of API calls within a thread @williballenthin #2532
|
||||
- add lint to catch rules that depend on other rules with impossible scope @williballenthin #2124
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@ import logging
|
||||
import argparse
|
||||
import itertools
|
||||
import posixpath
|
||||
from typing import Any, Dict, List
|
||||
from pathlib import Path
|
||||
from dataclasses import field, dataclass
|
||||
|
||||
@@ -597,6 +598,88 @@ class OptionalNotUnderAnd(Lint):
|
||||
return self.violation
|
||||
|
||||
|
||||
class DuplicateFeatureUnderStatement(Lint):
|
||||
name = "rule contains a duplicate features"
|
||||
recommendation = "remove the duplicate features"
|
||||
recommendation_template = '\n\tduplicate line: "{:s}"\t: line numbers: {:s}'
|
||||
violation = False
|
||||
|
||||
def check_rule(self, ctx: Context, rule: Rule) -> bool:
|
||||
self.violation = False
|
||||
self.recommendation = ""
|
||||
STATEMENTS = frozenset(
|
||||
{
|
||||
"or",
|
||||
"and",
|
||||
"not",
|
||||
"optional",
|
||||
"some",
|
||||
"basic block",
|
||||
"function",
|
||||
"instruction",
|
||||
"call",
|
||||
" or more",
|
||||
}
|
||||
)
|
||||
# rule.statement discards the duplicate features by default so
|
||||
# need to use the rule definition to check for duplicates
|
||||
data = rule._get_ruamel_yaml_parser().load(rule.definition)
|
||||
|
||||
def get_line_number(line: Dict[str, Any]) -> int:
|
||||
lc = getattr(line, "lc", None)
|
||||
if lc and hasattr(lc, "line"):
|
||||
return lc.line + 1
|
||||
return 0
|
||||
|
||||
def is_statement(key: str) -> bool:
|
||||
# to generalize the check for 'n or more' statements
|
||||
return any(statement in key for statement in STATEMENTS)
|
||||
|
||||
def get_feature_key(feature_dict: Dict[str, Any]) -> str:
|
||||
# need this for generating key for multi-lined feature
|
||||
# for example, - string: /dbghelp\.dll/i
|
||||
# description: WindBG
|
||||
parts = []
|
||||
for key, value in list(feature_dict.items()):
|
||||
parts.append(f"{key}: {value}")
|
||||
return "- " + ", ".join(parts)
|
||||
|
||||
def find_duplicates(features: List[Any]) -> None:
|
||||
if not isinstance(features, list):
|
||||
return
|
||||
|
||||
seen_features: Dict[str, List[int]] = {}
|
||||
for item in features:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
|
||||
if any(is_statement(key) for key in item.keys()):
|
||||
for key, value in item.items():
|
||||
if is_statement(key):
|
||||
# recursively check nested features
|
||||
find_duplicates(value)
|
||||
continue
|
||||
|
||||
feature_key = get_feature_key(item)
|
||||
line_num = get_line_number(item)
|
||||
if feature_key in seen_features:
|
||||
self.violation = True
|
||||
seen_features[feature_key].append(line_num)
|
||||
else:
|
||||
seen_features[feature_key] = [line_num]
|
||||
for feature_key, line_numbers in seen_features.items():
|
||||
if len(line_numbers) > 1:
|
||||
sorted_lines = sorted(line_numbers)
|
||||
self.recommendation += self.recommendation_template.format(
|
||||
feature_key, ", ".join(str(line) for line in sorted_lines)
|
||||
)
|
||||
|
||||
features = data["rule"].get("features", [])
|
||||
find_duplicates(features)
|
||||
|
||||
return self.violation
|
||||
|
||||
|
||||
class UnusualMetaField(Lint):
|
||||
name = "unusual meta field"
|
||||
recommendation = "Remove the meta field"
|
||||
@@ -916,6 +999,7 @@ LOGIC_LINTS = (
|
||||
OrStatementWithAlwaysTrueChild(),
|
||||
NotNotUnderAnd(),
|
||||
OptionalNotUnderAnd(),
|
||||
DuplicateFeatureUnderStatement(),
|
||||
RuleDependencyScopeMismatch(),
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user