cli: link to rule names to capa rules website (#2338)

* web: rules: redirect from various rule names to canonical rule URL

closes #2319

Update index.html

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>

* cli: link to rule names to capa rules website

* just: make `just lint` run all steps, not fail on first error

---------

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
This commit is contained in:
Willi Ballenthin
2024-08-29 08:56:14 -06:00
committed by GitHub
parent db4798aaf6
commit 729a1a85b7
10 changed files with 217 additions and 111 deletions

View File

@@ -16,5 +16,10 @@
@deptry: @deptry:
pre-commit run deptry --hook-stage manual --all-files pre-commit run deptry --hook-stage manual --all-files
lint: isort black ruff flake8 mypy deptry @lint:
-just isort
-just black
-just ruff
-just flake8
-just mypy
-just deptry

View File

@@ -6,11 +6,11 @@ Unlock powerful malware analysis with capa's new [VMRay sandbox](https://www.vmr
### New Features ### New Features
- regenerate ruleset cache automatically on source change (only in dev mode) #2133 @s-ff - regenerate ruleset cache automatically on source change (only in dev mode) #2133 @s-ff
- add landing page https://mandiant.github.io/capa/ @williballenthin #2310 - add landing page https://mandiant.github.io/capa/ @williballenthin #2310
- add rules website https://mandiant.github.io/capa/rules @DeeyaSingh #2310 - add rules website https://mandiant.github.io/capa/rules @DeeyaSingh #2310
- add .justfile @williballenthin #2325 - add .justfile @williballenthin #2325
- dynamic: add support for VMRay dynamic sandbox traces #2208 @mike-hunhoff @r-sm2024 @mr-tz - dynamic: add support for VMRay dynamic sandbox traces #2208 @mike-hunhoff @r-sm2024 @mr-tz
- cli: use modern terminal features to hyperlink to the rules website #2337 @williballenthin
### Breaking Changes ### Breaking Changes

View File

@@ -6,18 +6,43 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
import io
import collections import collections
import urllib.parse
import tabulate import rich
import rich.table
import rich.console
from rich.console import Console
import capa.render.utils as rutils import capa.render.utils as rutils
import capa.render.result_document as rd import capa.render.result_document as rd
import capa.features.freeze.features as frzf import capa.features.freeze.features as frzf
from capa.rules import RuleSet from capa.rules import RuleSet
from capa.engine import MatchResults from capa.engine import MatchResults
from capa.render.utils import StringIO
tabulate.PRESERVE_WHITESPACE = True
def bold_markup(s) -> str:
"""
Generate Rich markup in a bold style.
The resulting string should be passed to a Rich renderable
and/or printed via Rich or the markup will be visible to the user.
"""
return f"[cyan]{s}[/cyan]"
def link_markup(s: str, href: str) -> str:
"""
Generate Rich markup for a clickable hyperlink.
This works in many modern terminals.
When it doesn't work, the fallback is just to show the link name (s),
as if it was not a link.
The resulting string should be passed to a Rich renderable
and/or printed via Rich or the markup will be visible to the user.
"""
return f"[link={href}]{s}[/link]"
def width(s: str, character_count: int) -> str: def width(s: str, character_count: int) -> str:
@@ -28,11 +53,16 @@ def width(s: str, character_count: int) -> str:
return s return s
def render_meta(doc: rd.ResultDocument, ostream: StringIO): def render_sample_link(hash: str) -> str:
url = "https://www.virustotal.com/gui/file/" + hash
return link_markup(hash, url)
def render_meta(doc: rd.ResultDocument, console: Console):
rows = [ rows = [
(width("md5", 22), width(doc.meta.sample.md5, 82)), ("md5", render_sample_link(doc.meta.sample.md5)),
("sha1", doc.meta.sample.sha1), ("sha1", render_sample_link(doc.meta.sample.sha1)),
("sha256", doc.meta.sample.sha256), ("sha256", render_sample_link(doc.meta.sample.sha256)),
("analysis", doc.meta.flavor.value), ("analysis", doc.meta.flavor.value),
("os", doc.meta.analysis.os), ("os", doc.meta.analysis.os),
("format", doc.meta.analysis.format), ("format", doc.meta.analysis.format),
@@ -40,8 +70,14 @@ def render_meta(doc: rd.ResultDocument, ostream: StringIO):
("path", doc.meta.sample.path), ("path", doc.meta.sample.path),
] ]
ostream.write(tabulate.tabulate(rows, tablefmt="mixed_outline")) table = rich.table.Table(show_header=False, min_width=100)
ostream.write("\n") table.add_column()
table.add_column()
for row in rows:
table.add_row(*row)
console.print(table)
def find_subrule_matches(doc: rd.ResultDocument): def find_subrule_matches(doc: rd.ResultDocument):
@@ -71,7 +107,12 @@ def find_subrule_matches(doc: rd.ResultDocument):
return matches return matches
def render_capabilities(doc: rd.ResultDocument, ostream: StringIO): def render_rule_name(name: str) -> str:
url = f"https://mandiant.github.io/capa/rules/{urllib.parse.quote(name)}/"
return bold_markup(link_markup(name, url))
def render_capabilities(doc: rd.ResultDocument, console: Console):
""" """
example:: example::
@@ -95,25 +136,30 @@ def render_capabilities(doc: rd.ResultDocument, ostream: StringIO):
count = len(rule.matches) count = len(rule.matches)
if count == 1: if count == 1:
capability = rutils.bold(rule.meta.name) capability = render_rule_name(rule.meta.name)
else: else:
capability = f"{rutils.bold(rule.meta.name)} ({count} matches)" capability = render_rule_name(rule.meta.name) + f" ({count} matches)"
rows.append((capability, rule.meta.namespace)) rows.append((capability, rule.meta.namespace))
if rows: if rows:
ostream.write( table = rich.table.Table(min_width=100)
tabulate.tabulate( table.add_column(width("Capability", 20))
rows, table.add_column("Namespace")
headers=[width("Capability", 50), width("Namespace", 50)],
tablefmt="mixed_outline", for row in rows:
) table.add_row(*row)
)
ostream.write("\n") console.print(table)
else: else:
ostream.writeln(rutils.bold("no capabilities found")) console.print(bold_markup("no capabilities found"))
def render_attack(doc: rd.ResultDocument, ostream: StringIO): def render_attack_link(id: str) -> str:
url = f"https://attack.mitre.org/techniques/{id.replace('.', '/')}/"
return rf"\[{link_markup(id, url)}]"
def render_attack(doc: rd.ResultDocument, console: Console):
""" """
example:: example::
@@ -132,35 +178,36 @@ def render_attack(doc: rd.ResultDocument, ostream: StringIO):
tactics = collections.defaultdict(set) tactics = collections.defaultdict(set)
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
for attack in rule.meta.attack: for attack in rule.meta.attack:
tactics[attack.tactic].add((attack.technique, attack.subtechnique, attack.id)) tactics[attack.tactic].add((attack.technique, attack.subtechnique, attack.id.strip("[").strip("]")))
rows = [] rows = []
for tactic, techniques in sorted(tactics.items()): for tactic, techniques in sorted(tactics.items()):
inner_rows = [] inner_rows = []
for technique, subtechnique, id in sorted(techniques): for technique, subtechnique, id in sorted(techniques):
if not subtechnique: if not subtechnique:
inner_rows.append(f"{rutils.bold(technique)} {id}") # example: File and Directory Discovery [T1083]
inner_rows.append(f"{bold_markup(technique)} {render_attack_link(id)}")
else: else:
inner_rows.append(f"{rutils.bold(technique)}::{subtechnique} {id}") # example: Code Discovery::Enumerate PE Sections [T1084.001]
rows.append( inner_rows.append(f"{bold_markup(technique)}::{subtechnique} {render_attack_link(id)}")
(
rutils.bold(tactic.upper()), tactic = bold_markup(tactic.upper())
"\n".join(inner_rows), technique = "\n".join(inner_rows)
)
) rows.append((tactic, technique))
if rows: if rows:
ostream.write( table = rich.table.Table(min_width=100)
tabulate.tabulate( table.add_column(width("ATT&CK Tactic", 20))
rows, table.add_column("ATT&CK Technique")
headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 80)],
tablefmt="mixed_grid", for row in rows:
) table.add_row(*row)
)
ostream.write("\n") console.print(table)
def render_maec(doc: rd.ResultDocument, ostream: StringIO): def render_maec(doc: rd.ResultDocument, console: Console):
""" """
example:: example::
@@ -193,20 +240,37 @@ def render_maec(doc: rd.ResultDocument, ostream: StringIO):
for category in sorted(maec_categories): for category in sorted(maec_categories):
values = maec_table.get(category, set()) values = maec_table.get(category, set())
if values: if values:
rows.append((rutils.bold(category.replace("_", "-")), "\n".join(sorted(values)))) rows.append((bold_markup(category.replace("_", "-")), "\n".join(sorted(values))))
if rows: if rows:
ostream.write( table = rich.table.Table(min_width=100)
tabulate.tabulate( table.add_column(width("MAEC Category", 20))
rows, table.add_column("MAEC Value")
headers=[width("MAEC Category", 25), width("MAEC Value", 75)],
tablefmt="mixed_grid", for row in rows:
) table.add_row(*row)
)
ostream.write("\n") console.print(table)
def render_mbc(doc: rd.ResultDocument, ostream: StringIO): def render_mbc_link(id: str, objective: str, behavior: str) -> str:
if id[0] in {"B", "T", "E", "F"}:
# behavior
base_url = "https://github.com/MBCProject/mbc-markdown/blob/main"
elif id[0] == "C":
# micro-behavior
base_url = "https://github.com/MBCProject/mbc-markdown/blob/main/micro-behaviors"
else:
raise ValueError("unexpected MBC prefix")
objective_fragment = objective.lower().replace(" ", "-")
behavior_fragment = behavior.lower().replace(" ", "-")
url = f"{base_url}/{objective_fragment}/{behavior_fragment}.md"
return rf"\[{link_markup(id, url)}]"
def render_mbc(doc: rd.ResultDocument, console: Console):
""" """
example:: example::
@@ -223,48 +287,48 @@ def render_mbc(doc: rd.ResultDocument, ostream: StringIO):
objectives = collections.defaultdict(set) objectives = collections.defaultdict(set)
for rule in rutils.capability_rules(doc): for rule in rutils.capability_rules(doc):
for mbc in rule.meta.mbc: for mbc in rule.meta.mbc:
objectives[mbc.objective].add((mbc.behavior, mbc.method, mbc.id)) objectives[mbc.objective].add((mbc.behavior, mbc.method, mbc.id.strip("[").strip("]")))
rows = [] rows = []
for objective, behaviors in sorted(objectives.items()): for objective, behaviors in sorted(objectives.items()):
inner_rows = [] inner_rows = []
for behavior, method, id in sorted(behaviors): for technique, subtechnique, id in sorted(behaviors):
if not method: if not subtechnique:
inner_rows.append(f"{rutils.bold(behavior)} [{id}]") # example: File and Directory Discovery [T1083]
inner_rows.append(f"{bold_markup(technique)} {render_mbc_link(id, objective, technique)}")
else: else:
inner_rows.append(f"{rutils.bold(behavior)}::{method} [{id}]") # example: Code Discovery::Enumerate PE Sections [T1084.001]
rows.append( inner_rows.append(
( f"{bold_markup(technique)}::{subtechnique} {render_mbc_link(id, objective, technique)}"
rutils.bold(objective.upper()), )
"\n".join(inner_rows),
) objective = bold_markup(objective.upper())
) technique = "\n".join(inner_rows)
rows.append((objective, technique))
if rows: if rows:
ostream.write( table = rich.table.Table(min_width=100)
tabulate.tabulate( table.add_column(width("MBC Objective", 20))
rows, table.add_column("MBC Behavior")
headers=[width("MBC Objective", 25), width("MBC Behavior", 75)],
tablefmt="mixed_grid", for row in rows:
) table.add_row(*row)
)
ostream.write("\n") console.print(table)
def render_default(doc: rd.ResultDocument): def render_default(doc: rd.ResultDocument):
ostream = rutils.StringIO() f = io.StringIO()
console = rich.console.Console()
render_meta(doc, ostream) render_meta(doc, console)
ostream.write("\n") render_attack(doc, console)
render_attack(doc, ostream) render_maec(doc, console)
ostream.write("\n") render_mbc(doc, console)
render_maec(doc, ostream) render_capabilities(doc, console)
ostream.write("\n")
render_mbc(doc, ostream)
ostream.write("\n")
render_capabilities(doc, ostream)
return ostream.getvalue() return f.getvalue()
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str: def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:

View File

@@ -5,10 +5,12 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License # Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
import io
import textwrap import textwrap
from unittest.mock import Mock from unittest.mock import Mock
import fixtures import fixtures
import rich.console
import capa.rules import capa.rules
import capa.render.utils import capa.render.utils
@@ -151,9 +153,10 @@ def test_render_meta_maec():
mock_rd.rules = {"test rule": rm} mock_rd.rules = {"test rule": rm}
# capture the output of render_maec # capture the output of render_maec
output_stream = capa.render.utils.StringIO() f = io.StringIO()
capa.render.default.render_maec(mock_rd, output_stream) console = rich.console.Console(file=f)
output = output_stream.getvalue() capa.render.default.render_maec(mock_rd, console)
output = f.getvalue()
assert "analysis-conclusion" in output assert "analysis-conclusion" in output
assert analysis_conclusion in output assert analysis_conclusion in output

View File

@@ -235,7 +235,7 @@ const contextMenuItems = computed(() => [
label: "View rule in capa-rules", label: "View rule in capa-rules",
icon: "pi pi-external-link", icon: "pi pi-external-link",
target: "_blank", target: "_blank",
url: createCapaRulesUrl(selectedNode.value, props.data.meta.version) url: createCapaRulesUrl(selectedNode.value)
}, },
{ {
label: "Lookup rule in VirusTotal", label: "Lookup rule in VirusTotal",

View File

@@ -62,9 +62,8 @@ export function createATTACKHref(attack) {
*/ */
export function createCapaRulesUrl(node, tag) { export function createCapaRulesUrl(node, tag) {
if (!node || !node.data || !tag) return null; if (!node || !node.data || !tag) return null;
const namespace = node.data.namespace || "lib";
const ruleName = node.data.name.toLowerCase().replace(/\s+/g, "-"); const ruleName = node.data.name.toLowerCase().replace(/\s+/g, "-");
return `https://github.com/mandiant/capa-rules/blob/v${tag}/${namespace}/${ruleName}.yml`; return `https://mandiant.github.io/capa/rules/${ruleName}/`;
} }
/** /**

View File

@@ -200,14 +200,14 @@
<!-- TODO(williballenthin): add date --> <!-- TODO(williballenthin): add date -->
<li> <li>
added: added:
<a href="./rules/overwrite-dll-text-section-to-remove-hooks.html"> <a href="./rules/overwrite DLL .text section to remove hooks/">
overwrite DLL .text section to remove hooks overwrite DLL .text section to remove hooks
</a> </a>
</li> </li>
<li> <li>
added: added:
<a href="./rules/attach-bpf-to-socket-on-linux.html"> <a href="./rules/attach BPF to socket on Linux/">
attach BPF to socket on Linux attach BPF to socket on Linux
</a> </a>
</li> </li>

View File

@@ -10,3 +10,4 @@ file_modification_dates.txt
public/*.html public/*.html
public/pagefind/ public/pagefind/
public/index.html public/index.html
public/

View File

@@ -259,7 +259,6 @@ def generate_html(categories_data, color_map):
for card in cards_data: for card in cards_data:
first_word = get_first_word(card["namespace"]) first_word = get_first_word(card["namespace"])
rectangle_color = color_map[first_word] rectangle_color = color_map[first_word]
file_name = card["filename"].rpartition(".yml")[0]
card_html = f""" card_html = f"""
<div class="card-wrapper"> <div class="card-wrapper">
@@ -267,7 +266,7 @@ def generate_html(categories_data, color_map):
<div class="thin-rectangle" style="background-color: {rectangle_color};"></div> <div class="thin-rectangle" style="background-color: {rectangle_color};"></div>
<div class="card-body"> <div class="card-body">
<div class="namespace">{card['namespace']}</div> <div class="namespace">{card['namespace']}</div>
<div class="rule-name"><a href="./{file_name}.html">{card['name']}</a></div> <div class="rule-name"><a href="./{card['name']}/">{card['name']}</a></div>
<div class="authors">{', '.join(card['authors'])}</div> <div class="authors">{', '.join(card['authors'])}</div>
</div> </div>
</div> </div>

View File

@@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations
import os import os
import sys import sys
import logging
import urllib.parse import urllib.parse
from glob import glob from glob import glob
from pathlib import Path from pathlib import Path
@@ -20,6 +21,9 @@ from pygments.formatters import HtmlFormatter
import capa.rules import capa.rules
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
input_directory = Path(sys.argv[1]) input_directory = Path(sys.argv[1])
txt_file_path = Path(sys.argv[2]) txt_file_path = Path(sys.argv[2])
output_directory = Path(sys.argv[3]) output_directory = Path(sys.argv[3])
@@ -29,13 +33,13 @@ assert txt_file_path.exists(), "file-modification txt file must exist"
assert output_directory.exists(), "output directory must exist" assert output_directory.exists(), "output directory must exist"
def convert_yaml_to_html(timestamps, yaml_file: Path, output_dir: Path): def render_rule(timestamps, path: Path) -> str:
rule_content = yaml_file.read_text(encoding="utf-8") rule_content = path.read_text(encoding="utf-8")
rule = capa.rules.Rule.from_yaml(rule_content, use_ruamel=True) rule = capa.rules.Rule.from_yaml(rule_content, use_ruamel=True)
filename = os.path.basename(yaml_file).rpartition(".yml")[0] filename = path.with_suffix("").name
namespace = rule.meta.get("namespace", "") namespace = rule.meta.get("namespace", "")
timestamp = timestamps[yaml_file.as_posix()] timestamp = timestamps[path.as_posix()]
rendered_rule = pygments.highlight( rendered_rule = pygments.highlight(
rule_content, rule_content,
@@ -53,7 +57,7 @@ def convert_yaml_to_html(timestamps, yaml_file: Path, output_dir: Path):
vt_fragment = urllib.parse.quote(urllib.parse.quote(vt_query)) vt_fragment = urllib.parse.quote(urllib.parse.quote(vt_query))
vt_link = f"https://www.virustotal.com/gui/search/{vt_fragment}/files" vt_link = f"https://www.virustotal.com/gui/search/{vt_fragment}/files"
ns_query = f'"namespace: {namespace} "' ns_query = f'"namespace: {namespace} "'
ns_link = f"./?{urllib.parse.urlencode({'q': ns_query})}" ns_link = f"../?{urllib.parse.urlencode({'q': ns_query})}"
html_content = f""" html_content = f"""
<!DOCTYPE html> <!DOCTYPE html>
@@ -62,12 +66,12 @@ def convert_yaml_to_html(timestamps, yaml_file: Path, output_dir: Path):
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{rule.name}</title> <title>{rule.name}</title>
<link rel="icon" href="./img/favicon.ico" type="image/x-icon"> <link rel="icon" href="../img/favicon.ico" type="image/x-icon">
<link rel="stylesheet" href="./css/bootstrap-5.3.3.min.css"> <link rel="stylesheet" href="../css/bootstrap-5.3.3.min.css">
<script src="./js/jquery-3.5.1.slim.min.js"></script> <script src="../js/jquery-3.5.1.slim.min.js"></script>
<script src="./js/bootstrap-5.3.3.bundle.min.js"></script> <script src="../js/bootstrap-5.3.3.bundle.min.js"></script>
<script defer src="https://cloud.umami.is/script.js" data-website-id="0bb8ff9e-fbcc-4ee2-9f9f-b337a2e8cc7f"></script> <script src="https://cloud.umami.is/script.js" defer data-website-id="0bb8ff9e-fbcc-4ee2-9f9f-b337a2e8cc7f"></script>
<link rel="stylesheet" type="text/css" href="./css/style.css"> <link rel="stylesheet" type="text/css" href="../css/style.css">
<style> <style>
.rule-content .highlight pre {{ .rule-content .highlight pre {{
overflow: visible; overflow: visible;
@@ -81,7 +85,7 @@ def convert_yaml_to_html(timestamps, yaml_file: Path, output_dir: Path):
box-shadow: 0 0.5rem 1rem rgba(0,0,0,0.05),inset 0 -1px 0 rgba(0,0,0,0.15);" box-shadow: 0 0.5rem 1rem rgba(0,0,0,0.05),inset 0 -1px 0 rgba(0,0,0,0.15);"
> >
<a href="/capa/" class="d-flex align-items-center mb-3 mb-md-0 me-md-auto"> <a href="/capa/" class="d-flex align-items-center mb-3 mb-md-0 me-md-auto">
<img src="./img/logo.png" height=48 /> <img src="../img/logo.png" height=48 />
</a> </a>
<ul class="nav nav-pills"> <ul class="nav nav-pills">
@@ -115,9 +119,7 @@ def convert_yaml_to_html(timestamps, yaml_file: Path, output_dir: Path):
</html> </html>
""" """
output_dir.mkdir(parents=True, exist_ok=True) return html_content
output_file_path = output_dir / (filename + ".html")
output_file_path.write_text(html_content, encoding="utf-8")
yaml_files = glob(os.path.join(input_directory, "**/*.yml"), recursive=True) yaml_files = glob(os.path.join(input_directory, "**/*.yml"), recursive=True)
@@ -129,8 +131,41 @@ for line in txt_file_path.read_text(encoding="utf-8").splitlines():
if line.startswith("==="): if line.startswith("==="):
continue continue
path, _, timestamp = line.partition(" ") filepath, _, timestamp = line.partition(" ")
timestamps[path] = timestamp timestamps[filepath] = timestamp
for yaml_file in yaml_files: for yaml_file in yaml_files:
convert_yaml_to_html(timestamps, Path(yaml_file), output_directory) path = Path(yaml_file)
rule_content = path.read_text(encoding="utf-8")
html_content = render_rule(timestamps, path)
rule = capa.rules.Rule.from_yaml(path.read_text(encoding="utf-8"), use_ruamel=True)
# like: rules/create file/index.html
#
# which looks like the URL fragments:
#
# rules/create%20file/index.html
# rules/create%20file/
# rules/create file/
html_path = output_directory / rule.name / "index.html"
html_path.parent.mkdir(parents=True, exist_ok=True)
html_path.write_text(html_content, encoding="utf-8")
logger.info("wrote: %s", html_path)
# like: create-file
rule_id = path.with_suffix("").name
# like: rules/create-file/index.html
#
# which looks like the URL fragments:
#
# rules/create-file/index.html
# rules/create-file/
#
# and redirects, via meta refresh, to the canonical path above.
# since we don't control the GH Pages web server, we can't use HTTP redirects.
id_path = output_directory / rule_id / "index.html"
id_path.parent.mkdir(parents=True, exist_ok=True)
redirect = f"""<html><head><meta http-equiv="refresh" content="0; url=../{rule.name}/"></head></html>"""
id_path.write_text(redirect, encoding="utf-8")
logger.info("wrote: %s", id_path)