Files
Willi Ballenthin c8e277e8b4 mapa: html-map show function neighborhood
Fix html tooltip newline escaping
2026-03-17 09:58:30 +01:00

654 lines
23 KiB
Python

from __future__ import annotations
from io import StringIO
import pytest
from rich.console import Console
from rich.theme import Theme
from mapa.assemblage import load_assemblage_records
from mapa.cli import (
build_parser,
open_html_report,
validate_output_options,
write_temp_html_report,
)
from mapa.html_renderer import render_html_map
from mapa.model import (
AssemblageRecord,
MapaCall,
MapaCaller,
MapaFunction,
MapaLibrary,
MapaMeta,
MapaProgramString,
MapaReport,
MapaSection,
MapaString,
)
from mapa.renderer import render_report
class TestAssemblageRecord:
def test_from_csv_row(self):
row = {
"hash": "ABC123",
"name": "foo",
"start": "0x1000",
"end": "0x1010",
"source_file": "src/main.c (line 42)",
}
record = AssemblageRecord.from_csv_row(row, base_address=0x400000)
assert record.sha256 == "abc123"
assert record.start_rva == 0x1000
assert record.end_rva == 0x1010
assert record.address == 0x401000
assert record.end_address == 0x401010
assert record.source_path == "src/main.c"
def test_source_path_without_suffix(self):
record = AssemblageRecord(
sha256="abc123",
name="bar",
start_rva=0,
end_rva=0,
address=0x400000,
end_address=0x400010,
source_file="src/bar.c",
)
assert record.source_path == "src/bar.c"
class TestAssemblageLoader:
def test_load_filters_by_sha256_converts_rva_and_dedupes(self, tmp_path):
csv_path = tmp_path / "assemblage.csv"
csv_path.write_text(
"file_name,path,hash,name,start,end,source_file\n"
"sample.exe,01/sample.exe,abc123,foo,4096,4112,src/foo.c (MD5: 11)\n"
"sample.exe,01/sample.exe,abc123,foo,4096,4112,src/foo.c (MD5: 11)\n"
"sample.exe,01/sample.exe,abc123,foo_alias,4096,4112,src/foo_alias.c (MD5: 22)\n"
"sample.exe,01/sample.exe,def456,skip,4096,4112,src/skip.c (MD5: 33)\n"
"sample.exe,01/sample.exe,abc123,bar,8192,8208,src/bar.c (MD5: 44)\n",
encoding="utf-8",
)
records = load_assemblage_records(
csv_path, sample_sha256="ABC123", base_address=0x400000
)
assert sorted(records) == [0x401000, 0x402000]
assert [record.name for record in records[0x401000]] == ["foo", "foo_alias"]
assert [record.source_path for record in records[0x401000]] == [
"src/foo.c",
"src/foo_alias.c",
]
assert records[0x402000][0].name == "bar"
assert records[0x402000][0].address == 0x402000
def test_load_requires_sha256(self, tmp_path):
csv_path = tmp_path / "assemblage.csv"
csv_path.write_text(
"file_name,path,hash,name,start,end,source_file\n",
encoding="utf-8",
)
try:
load_assemblage_records(csv_path, sample_sha256="", base_address=0x400000)
except ValueError as exc:
assert "sha256" in str(exc).lower()
else:
assert False, "expected ValueError"
class TestRenderer:
@staticmethod
def _make_console() -> tuple[Console, StringIO]:
buf = StringIO()
theme = Theme(
{
"decoration": "grey54",
"title": "yellow",
"key": "black",
"value": "blue",
"default": "black",
},
inherit=False,
)
console = Console(
theme=theme,
markup=False,
emoji=False,
file=buf,
force_terminal=False,
width=120,
no_color=True,
)
return console, buf
@staticmethod
def _make_assemblage_record(
name: str, source_file: str, address: int = 0x1000
) -> AssemblageRecord:
return AssemblageRecord(
sha256="abc123",
name=name,
start_rva=address,
end_rva=address + 0x10,
address=address,
end_address=address + 0x10,
source_file=source_file,
)
def test_meta_section(self):
report = MapaReport(
meta=MapaMeta(
name="test.exe",
sha256="abc123",
arch="x86_64",
timestamp="2025-01-01T00:00:00",
),
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert "test.exe" in output
assert "abc123" in output
assert "x86_64" in output
def test_sections_rendered(self):
report = MapaReport(
meta=MapaMeta(name="t", sha256="s"),
sections=[
MapaSection(address=0x1000, size=0x2000, perms="r-x"),
],
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert "0x1000" in output
assert "r-x" in output
assert "0x2000" in output
def test_libraries_rendered(self):
report = MapaReport(
meta=MapaMeta(name="t", sha256="s"),
libraries=[MapaLibrary(name="KERNEL32.dll")],
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert "KERNEL32.dll" in output
def test_empty_libraries(self):
report = MapaReport(
meta=MapaMeta(name="t", sha256="s"),
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert "(none)" in output
def test_thunk_function_with_assemblage(self):
report = MapaReport(
meta=MapaMeta(name="t", sha256="s"),
functions=[
MapaFunction(
address=0x1000,
name="jmp_CreateFile",
is_thunk=True,
assemblage_records=[
self._make_assemblage_record(
"source_thunk", "src/thunk.c (MD5: 11)"
)
],
),
],
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert "thunk" in output
assert "jmp_CreateFile" in output
assert "assemblage name: source_thunk" in output
assert "assemblage file: src/thunk.c" in output
def test_function_with_calls_strings_and_assemblage(self):
report = MapaReport(
meta=MapaMeta(name="t", sha256="s"),
functions=[
MapaFunction(
address=0x1000,
name="main",
num_basic_blocks=3,
num_edges=4,
num_instructions=10,
total_instruction_bytes=42,
callers=[
MapaCaller(name="start", address=0x500, delta=-1, direction="")
],
calls=[
MapaCall(
name="helper",
address=0x2000,
is_api=False,
delta=1,
direction="",
)
],
apis=[MapaCall(name="CreateFileW", address=0x3000, is_api=True)],
strings=[MapaString(value="Hello World", address=0x4000)],
capa_matches=["write file"],
assemblage_records=[
self._make_assemblage_record(
"source_main", "src/main.c (MD5: 11)"
)
],
),
],
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert "function" in output
assert "main" in output
assert "assemblage name: source_main" in output
assert "assemblage file: src/main.c" in output
assert "3 / 4 / 10 (42 bytes)" in output
assert "xref:" in output
assert "start" in output
assert "calls:" in output
assert "helper" in output
assert "api:" in output
assert "CreateFileW" in output
assert "string:" in output
assert "Hello World" in output
assert "capa:" in output
assert "write file" in output
def test_ambiguous_assemblage_records_render_all(self):
report = MapaReport(
meta=MapaMeta(name="t", sha256="s"),
functions=[
MapaFunction(
address=0x1000,
name="ida_name",
assemblage_records=[
self._make_assemblage_record("foo", "src/foo.c (MD5: 11)"),
self._make_assemblage_record(
"foo_alias", "src/foo_alias.c (MD5: 22)"
),
],
),
],
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert "function ida_name @ 0x1000" in output
assert output.count("assemblage name:") == 2
assert output.count("assemblage file:") == 2
assert "foo_alias" in output
assert "src/foo_alias.c" in output
def test_source_file_separator_inserted_before_new_path(self):
report = MapaReport(
meta=MapaMeta(name="t", sha256="s"),
functions=[
MapaFunction(
address=0x1000,
name="a",
assemblage_records=[
self._make_assemblage_record("a", "src/a.c (MD5: 11)")
],
),
MapaFunction(
address=0x2000,
name="b",
assemblage_records=[
self._make_assemblage_record(
"b", "src/b.c (MD5: 22)", address=0x2000
)
],
),
],
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert "[ src/b.c ]" in output
assert "[ src/a.c ]" not in output
assert output.index("[ src/b.c ]") < output.index("function b @ 0x2000")
def test_missing_assemblage_data_does_not_force_split(self):
report = MapaReport(
meta=MapaMeta(name="t", sha256="s"),
functions=[
MapaFunction(
address=0x1000,
name="a1",
assemblage_records=[
self._make_assemblage_record("a1", "src/a.c (MD5: 11)")
],
),
MapaFunction(address=0x2000, name="unknown"),
MapaFunction(
address=0x3000,
name="a2",
assemblage_records=[
self._make_assemblage_record(
"a2", "src/a.c (MD5: 22)", address=0x3000
)
],
),
],
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert "[ src/a.c ]" not in output
def test_new_source_after_missing_gap_still_splits(self):
report = MapaReport(
meta=MapaMeta(name="t", sha256="s"),
functions=[
MapaFunction(
address=0x1000,
name="a",
assemblage_records=[
self._make_assemblage_record("a", "src/a.c (MD5: 11)")
],
),
MapaFunction(address=0x2000, name="unknown1"),
MapaFunction(address=0x3000, name="unknown2"),
MapaFunction(
address=0x4000,
name="b",
assemblage_records=[
self._make_assemblage_record(
"b", "src/b.c (MD5: 22)", address=0x4000
)
],
),
],
)
console, buf = self._make_console()
render_report(report, console)
output = buf.getvalue()
assert output.count("[ src/b.c ]") == 1
assert "[ src/a.c ]" not in output
assert output.index("[ src/b.c ]") < output.index("function b @ 0x4000")
class TestStringDedup:
def test_strings_deduped_in_model(self):
seen: set[str] = set()
strings = ["hello", "hello", "world", "hello"]
result = []
for s in strings:
stripped = s.rstrip()
if stripped and stripped not in seen:
seen.add(stripped)
result.append(stripped)
assert result == ["hello", "world"]
def test_string_rstrip(self):
s = "hello \n\t"
assert s.rstrip() == "hello"
class TestHtmlMapRenderer:
@staticmethod
def _make_assemblage_record(
name: str, source_file: str, address: int
) -> AssemblageRecord:
return AssemblageRecord(
sha256="abc123",
name=name,
start_rva=address,
end_rva=address + 0x10,
address=address,
end_address=address + 0x10,
source_file=source_file,
)
@staticmethod
def _make_report() -> MapaReport:
return MapaReport(
meta=MapaMeta(name="sample.exe", sha256="abc123", arch="x86_64"),
functions=[
MapaFunction(
address=0x1000,
name="entry",
calls=[
MapaCall(
name="worker",
address=0x2000,
is_api=False,
delta=1,
direction="",
)
],
strings=[MapaString(value="CreateFileW", address=0x3000, tags=("#common", "#winapi"))],
assemblage_records=[
TestHtmlMapRenderer._make_assemblage_record(
"entry_src", "src/core/a.c (MD5: 11)", 0x1000
)
],
),
MapaFunction(
address=0x2000,
name="worker",
callers=[
MapaCaller(
name="entry",
address=0x1000,
delta=-1,
direction="",
)
],
calls=[
MapaCall(
name="helper",
address=0x3000,
is_api=False,
delta=1,
direction="",
)
],
apis=[MapaCall(name="kernel32.dll!CreateFileW", address=0x5000, is_api=True)],
strings=[MapaString(value="inflate", address=0x2000, tags=("#zlib",))],
capa_matches=["write file"],
assemblage_records=[
TestHtmlMapRenderer._make_assemblage_record(
"worker_src", "src/core/a.c (MD5: 22)", 0x2000
)
],
),
MapaFunction(
address=0x3000,
name="helper",
callers=[
MapaCaller(
name="worker",
address=0x2000,
delta=-1,
direction="",
)
],
strings=[MapaString(value="normal", address=0x4000, tags=("#common",))],
assemblage_records=[
TestHtmlMapRenderer._make_assemblage_record(
"helper_src", "src/core/b.c (MD5: 33)", 0x3000
)
],
),
],
program_strings=[
MapaProgramString(
value="inflate",
address=0x4000,
tags=("#zlib",),
function_addresses=(0x2000,),
),
MapaProgramString(
value="CreateFileW",
address=0x3000,
tags=("#common", "#winapi"),
function_addresses=(0x1000,),
),
MapaProgramString(
value="normal",
address=0x5000,
tags=("#common",),
function_addresses=(0x3000,),
),
MapaProgramString(
value="inflate",
address=0x6000,
tags=("#zlib",),
function_addresses=(0x1000, 0x2000),
),
],
)
def test_cli_parser_accepts_html_map_output(self):
parser = build_parser()
args = parser.parse_args(["sample.exe", "--output", "html-map"])
assert args.output == "html-map"
def test_open_requires_html_map_output(self):
with pytest.raises(ValueError, match="--open requires --output html-map"):
validate_output_options("text", True)
def test_write_temp_html_report(self, tmp_path):
report_path = write_temp_html_report("<html>ok</html>", directory=tmp_path)
assert report_path.parent == tmp_path
assert report_path.read_text(encoding="utf-8") == "<html>ok</html>"
def test_open_html_report_writes_temp_file_and_uses_opener(self, tmp_path):
opened: list[str] = []
def open_url(url: str) -> bool:
opened.append(url)
return True
report_path = open_html_report(
"<html>ok</html>",
opener=open_url,
directory=tmp_path,
)
assert report_path.parent == tmp_path
assert report_path.read_text(encoding="utf-8") == "<html>ok</html>"
assert opened == [report_path.as_uri()]
def test_html_map_is_self_contained(self):
html = render_html_map(self._make_report())
assert "<style>" in html
assert "<script" in html
assert "function-grid" in html
assert 'class="split-view"' in html
assert 'role="separator"' in html
assert "<link " not in html
assert "<script src=" not in html
def test_html_map_uses_split_panes_and_resizer(self):
html = render_html_map(self._make_report())
assert 'id="functions-pane"' in html
assert 'id="strings-pane"' in html
assert 'id="splitter"' in html
assert "setPointerCapture" in html
assert "pointerdown" in html
def test_html_map_shows_tag_counts_and_legend(self):
html = render_html_map(self._make_report())
assert '#zlib <span class="control-count">(2)</span>' in html
assert '#common <span class="control-count">(1)</span>' in html
assert 'fill = neighborhood heat · outline = seed · dim = outside neighborhood' in html
def test_html_map_orders_tags_by_function_count_then_name(self):
html = render_html_map(self._make_report())
zlib = html.index('data-tag="#zlib"')
common = html.index('data-tag="#common"')
winapi = html.index('data-tag="#winapi"')
assert zlib < common < winapi
def test_html_map_orders_program_strings_by_address_and_shows_addresses(self):
html = render_html_map(self._make_report())
first = html.index('data-string-address="0x3000"')
second = html.index('data-string-address="0x4000"')
third = html.index('data-string-address="0x5000"')
fourth = html.index('data-string-address="0x6000"')
assert first < second < third < fourth
assert '0x3000' in html
assert '0x6000' in html
def test_html_map_string_rows_show_visible_tags(self):
html = render_html_map(self._make_report())
assert 'data-string-address="0x3000"' in html
assert 'data-string-tags="#winapi"' in html
assert 'data-string-address="0x4000"' in html
assert 'data-string-tags="#zlib"' in html
assert 'data-string-address="0x5000"' in html
assert 'data-string-tags="#common"' in html
assert 'class="string-tags">#winapi</span>' in html
def test_html_map_exposes_neighborhood_controls_and_call_graph_data(self):
html = render_html_map(self._make_report())
assert 'id="direction-controls"' in html
assert 'data-direction="callers"' in html
assert 'data-direction="callees"' in html
assert 'data-direction="both"' in html
assert 'id="depth-controls"' in html
assert 'data-depth="1"' in html
assert 'data-depth="3"' in html
assert '"callersByIndex":[[],[0],[1]]' in html
assert '"calleesByIndex":[[1],[2],[]]' in html
def test_html_map_function_boxes_support_neighborhood_hover_and_lock(self):
html = render_html_map(self._make_report())
assert 'data-function-index="0"' in html
assert 'let hoveredFunction=null;' in html
assert 'let lockedFunction=null;' in html
assert 'const getDecayScore=(distance)=>0.5**distance;' in html
assert "lines.join('\\n')" in html
assert 'function-box.is-seed' in html
assert "box.addEventListener('click'" in html
def test_html_map_preserves_duplicate_values_at_distinct_addresses(self):
html = render_html_map(self._make_report())
assert html.count('data-string-value="inflate"') == 2
def test_html_map_tooltip_contains_function_summary_text(self):
html = render_html_map(self._make_report())
assert "function worker @ 0x2000" in html
assert "assemblage file: src/core/a.c" in html
assert "api: kernel32.dll!CreateFileW" in html
assert 'string: \\\"inflate\\\" #zlib' in html
assert "capa: write file" in html
def test_html_map_visible_tag_policy_matches_text_renderer(self):
html = render_html_map(self._make_report())
assert 'data-tag="#winapi"' in html
assert 'data-tag="#common"' in html
common_control = html.index('data-tag="#common"')
createfile_row = html.index('data-string-address="0x3000"')
normal_row = html.index('data-string-address="0x5000"')
assert common_control < createfile_row < normal_row
def test_html_map_hides_common_control_when_only_hidden_common_exists(self):
report = MapaReport(
meta=MapaMeta(name="sample.exe", sha256="abc123"),
functions=[MapaFunction(address=0x1000, name="entry")],
program_strings=[
MapaProgramString(
value="CreateFileW",
address=0x3000,
tags=("#common", "#winapi"),
function_addresses=(0x1000,),
)
],
)
html = render_html_map(report)
assert 'data-tag="#winapi"' in html
assert 'data-tag="#common"' not in html