From 6ec0b60e5ac1016da124836380c7e2b963fce296 Mon Sep 17 00:00:00 2001 From: vmfunc Date: Wed, 10 Jun 2026 16:37:06 -0700 Subject: [PATCH] feat: diff mode with json snapshot store re-scans become a monitor: -diff snapshots each target's normalized findings to a per-target json file and, on the next run, surfaces only the delta (+ new / - gone) against the last snapshot, then overwrites it so each run diffs against the previous one. behavior is unchanged when -diff is off. new internal/store keys the set-difference off finding.Key (already stable across runs) and uses only encoding/json + os - no new deps. snapshot files are sanitized per target (no traversal), written 0600 under 0750 dirs. -store picks the location: explicit dir, else the log dir, else /sif/state. a missing snapshot is a clean baseline, a corrupt one self-heals on the next save. --- README.md | 14 ++ docs/usage.md | 22 ++++ internal/config/config.go | 4 + internal/config/config_test.go | 8 ++ internal/store/store.go | 204 ++++++++++++++++++++++++++++ internal/store/store_test.go | 234 +++++++++++++++++++++++++++++++++ man/sif.1 | 10 ++ sif.go | 95 ++++++++++++- sif_test.go | 70 ++++++++++ 9 files changed, 655 insertions(+), 6 deletions(-) create mode 100644 internal/store/store.go create mode 100644 internal/store/store_test.go diff --git a/README.md b/README.md index a493d38..7aef3a6 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,8 @@ write the run's findings out to a file for ci/cd or triage: | `-sarif` | write a sarif 2.1.0 report to this file | | `-markdown`, `-md` | write a markdown report to this file | | `-silent` | plain output: chrome to stderr, one finding per line to stdout (for pipelines) | +| `-diff` | surface only findings added/removed since the last snapshot of each target | +| `-store` | snapshot directory for `-diff` (default: log dir, else `/sif/state`) | ```bash # scan and emit both a sarif and markdown report @@ -228,6 +230,18 @@ write the run's findings out to a file for ci/cd or triage: sarif output is ingestable by github code scanning; markdown is a readable per-target summary. +### diff mode + +`-diff` turns a re-scan into a monitor: sif snapshots each target's normalized findings to a json file, and on the next run reports only the delta (`+ new` / `- gone`) against that snapshot, then overwrites it. the first run for a target has no baseline, so everything is `+ new`. snapshots land in `-store` (one sanitized file per target); when unset they reuse the log dir, falling back to `/sif/state`. + +```bash +# baseline run, then re-scan later and see only what moved +./sif -u https://example.com -sh -cors -diff +./sif -u https://example.com -sh -cors -diff +``` + +the snapshot is always rewritten, so each run diffs against the previous one. the delta is chrome (it rides the normal output sink / stderr under `-silent`), not the findings stream. + ### pipe mode sif reads targets from stdin and accepts naked hosts, so it drops into a unix pipeline. `-silent` routes all banner/spinner/log chrome to stderr and prints one normalized finding per line (`[severity] target module title`) to stdout: diff --git a/docs/usage.md b/docs/usage.md index 8325e75..2416eba 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -442,6 +442,28 @@ plain output for pipelines: all banner/spinner/log chrome goes to stderr and std subfinder -d example.com | sif -silent -probe -sh | notify ``` +### -diff + +turn a re-scan into a monitor. sif snapshots each target's normalized findings to a json file under the store dir; on the next run it loads that snapshot, diffs the current findings against it by finding key, and prints only the delta (`+ new` for findings that appeared, `- gone` for findings that vanished). it always rewrites the snapshot afterwards, so each run compares against the previous one. + +the first run for a target has no snapshot, so every finding shows as `+ new`. when nothing changed, sif notes that and writes a fresh snapshot anyway. + +```bash +# baseline, then re-scan and see only what moved +./sif -u https://example.com -sh -cors -diff +./sif -u https://example.com -sh -cors -diff +``` + +the delta is chrome, not the findings stream: under `-silent` it rides stderr with the rest of the chrome, leaving stdout for the full findings. + +### -store + +snapshot directory for `-diff`. precedence when unset: the `-log` dir if one is given, else `/sif/state` (`$XDG_CONFIG_HOME/sif/state` on linux, `~/Library/Application Support/sif/state` on macos). one sanitized file per target, created at `0750`, written `0600`. + +```bash +./sif -u https://example.com -sh -diff -store ./snapshots +``` + ## api options ### -api diff --git a/internal/config/config.go b/internal/config/config.go index e0f7583..287e6d7 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -69,6 +69,8 @@ type Settings struct { SARIF string // path to write a sarif 2.1.0 report to ("" = off) Markdown string // path to write a markdown report to ("" = off) Silent bool // route chrome to stderr, print one finding per line to stdout + Diff bool // surface only findings added/removed vs the last snapshot + Store string // snapshot dir for diff mode ("" = default state dir) Modules string // Comma-separated list of module IDs to run ModuleTags string // Run modules matching these tags AllModules bool // Run all loaded modules @@ -174,6 +176,8 @@ func Parse() *Settings { flagSet.StringVar(&settings.SARIF, "sarif", "", "Write a SARIF 2.1.0 report to this file"), flagSet.StringVarP(&settings.Markdown, "markdown", "md", "", "Write a markdown report to this file"), flagSet.BoolVar(&settings.Silent, "silent", false, "Plain output: chrome to stderr, one finding per line to stdout (for pipelines)"), + flagSet.BoolVar(&settings.Diff, "diff", false, "Diff mode: surface only findings added/removed since the last snapshot of each target"), + flagSet.StringVar(&settings.Store, "store", "", "Snapshot directory for -diff (default: log dir, else /sif/state)"), ) flagSet.CreateGroup("api", "API", diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 918cd91..96aaeba 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -61,6 +61,14 @@ func TestSettingsDefaults(t *testing.T) { if settings.Ports != "" { t.Errorf("expected Ports default to be empty, got %v", settings.Ports) } + + // diff mode is opt-in and its store dir defaults empty (resolved at runtime). + if settings.Diff != false { + t.Errorf("expected Diff default to be false, got %v", settings.Diff) + } + if settings.Store != "" { + t.Errorf("expected Store default to be empty, got %v", settings.Store) + } } func TestSettingsNoScanBehavior(t *testing.T) { diff --git a/internal/store/store.go b/internal/store/store.go new file mode 100644 index 0000000..dcc2d6b --- /dev/null +++ b/internal/store/store.go @@ -0,0 +1,204 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +// Package store persists a run's normalized findings as a json snapshot, one +// file per target, so a later run can diff against it and surface only what +// changed. it leans on encoding/json + os only - no new deps - and keys the +// delta off finding.Key, the identity the finding layer already guarantees is +// stable across runs. +package store + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/dropalldatabases/sif/internal/finding" +) + +// snapshotFileMode is applied to written snapshot files: owner read/write only. +// a snapshot enumerates a target's findings (urls, secrets, takeovers) and is +// not meant for other users on the box, so it stays 0600. +const snapshotFileMode = 0o600 + +// stateDirMode is applied to directories the store creates: owner rwx, group rx, +// no world access. matches the 0o750 the bundle asks for so the state tree isn't +// world-readable. +const stateDirMode = 0o750 + +// snapshotExt is the extension every snapshot file carries; makes the state dir +// self-describing and lets Load reconstruct the path from a bare target. +const snapshotExt = ".json" + +// defaultDirName is the sif-owned subdirectory under the user's config dir when +// no explicit store dir is given. DefaultDir joins it under os.UserConfigDir(). +const defaultDirName = "sif" + +// stateSubDir separates snapshots from anything else sif might drop in its +// config dir later, so the state tree is a single sweepable directory. +const stateSubDir = "state" + +// DefaultDir returns the fallback snapshot location: /sif/state. +// callers pass it when -store is unset and there's no logdir to reuse. the dir +// is not created here - Save does that lazily so a diff-less run touches nothing. +func DefaultDir() (string, error) { + configDir, err := os.UserConfigDir() + if err != nil { + return "", fmt.Errorf("resolving user config dir: %w", err) + } + return filepath.Join(configDir, defaultDirName, stateSubDir), nil +} + +// sanitize turns an arbitrary target (https://example.com:8443/path?q=1) into a +// single safe filename component. a target is attacker-influenced (it can come +// from a stdin pipe or a -f file), so every separator and path metacharacter is +// folded to '_' - no '/', '\\', '.', ':' survives to escape the state dir or +// collide with a parent reference. empty/degenerate input falls back to a fixed +// token rather than producing a dotfile or empty name. +func sanitize(target string) string { + var b strings.Builder + b.Grow(len(target)) + // collapse runs of separators: a scheme like "https://" is three metachars + // in a row, and one '_' reads cleaner than three without losing uniqueness. + prevSep := false + for i := 0; i < len(target); i++ { + c := target[i] + switch { + case c >= 'a' && c <= 'z', c >= 'A' && c <= 'Z', c >= '0' && c <= '9', c == '-': + b.WriteByte(c) + prevSep = false + default: + // every other byte (path sep, dot, colon, slash, space, unicode, and a + // literal '_') is a separator; fold it so traversal and dotfiles are + // impossible and a run never balloons the filename. + if !prevSep { + b.WriteByte('_') + prevSep = true + } + } + } + name := strings.Trim(b.String(), "_") + if name == "" { + return "target" + } + return name +} + +// pathFor builds the absolute snapshot path for a target under dir. kept private +// so the sanitized-filename invariant lives in one place; Save and Load both go +// through it so a target always maps to the same file. +func pathFor(dir, target string) string { + return filepath.Join(dir, sanitize(target)+snapshotExt) +} + +// Save writes the run's findings for target as a json snapshot under dir, +// overwriting any prior snapshot. the dir (and parents) is created lazily with +// stateDirMode. an empty findings slice is still written - it records "this +// target had nothing", which a later diff reads as a clean baseline rather than +// a missing one. +func Save(dir, target string, findings []finding.Finding) error { + if dir == "" { + return fmt.Errorf("store: empty snapshot dir") + } + if err := os.MkdirAll(dir, stateDirMode); err != nil { + return fmt.Errorf("creating state dir %q: %w", dir, err) + } + + // marshal a non-nil slice so an empty run serializes to [] not null; keeps + // the on-disk shape stable and Load's decode unambiguous. + if findings == nil { + findings = []finding.Finding{} + } + data, err := json.MarshalIndent(findings, "", " ") + if err != nil { + return fmt.Errorf("marshaling snapshot for %q: %w", target, err) + } + + path := pathFor(dir, target) + if err := os.WriteFile(path, data, snapshotFileMode); err != nil { + return fmt.Errorf("writing snapshot %q: %w", path, err) + } + return nil +} + +// Load reads the previously saved snapshot for target under dir. a missing +// snapshot is not an error - it's the first run for that target, so an empty +// slice comes back and the caller treats every current finding as new. a present +// but unreadable/corrupt file is a real error: silently swallowing it would make +// a broken store look like a fresh one and flag everything as added forever. +func Load(dir, target string) ([]finding.Finding, error) { + path := pathFor(dir, target) + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return []finding.Finding{}, nil + } + return nil, fmt.Errorf("reading snapshot %q: %w", path, err) + } + + var findings []finding.Finding + if err := json.Unmarshal(data, &findings); err != nil { + return nil, fmt.Errorf("decoding snapshot %q: %w", path, err) + } + if findings == nil { + findings = []finding.Finding{} + } + return findings, nil +} + +// Diff computes the set-difference between two snapshots keyed on Finding.Key: +// added is everything in next whose Key isn't in old, removed is everything in +// old whose Key isn't in next. order follows the input slices (added in next's +// order, removed in old's) so output is deterministic for a given pair. a Key +// seen twice in one slice is deduped on first sight, so duplicate findings don't +// double-report. +func Diff(old, next []finding.Finding) (added, removed []finding.Finding) { + oldKeys := make(map[string]struct{}, len(old)) + for i := 0; i < len(old); i++ { + oldKeys[old[i].Key] = struct{}{} + } + nextKeys := make(map[string]struct{}, len(next)) + for i := 0; i < len(next); i++ { + nextKeys[next[i].Key] = struct{}{} + } + + seen := make(map[string]struct{}, len(next)) + for i := 0; i < len(next); i++ { + k := next[i].Key + if _, ok := oldKeys[k]; ok { + continue + } + if _, dup := seen[k]; dup { + continue + } + seen[k] = struct{}{} + added = append(added, next[i]) + } + + // reuse seen for the removed pass; the two key spaces don't overlap by + // construction (removed keys are absent from next) so a single map is safe. + clear(seen) + for i := 0; i < len(old); i++ { + k := old[i].Key + if _, ok := nextKeys[k]; ok { + continue + } + if _, dup := seen[k]; dup { + continue + } + seen[k] = struct{}{} + removed = append(removed, old[i]) + } + return added, removed +} diff --git a/internal/store/store_test.go b/internal/store/store_test.go new file mode 100644 index 0000000..3816948 --- /dev/null +++ b/internal/store/store_test.go @@ -0,0 +1,234 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package store + +import ( + "os" + "path/filepath" + "reflect" + "sort" + "testing" + + "github.com/dropalldatabases/sif/internal/finding" +) + +// sampleFindings is a small, stable set of findings reused across the round-trip +// and diff cases; covers two modules and two severities so marshaling exercises +// every Finding field. +func sampleFindings() []finding.Finding { + return []finding.Finding{ + { + Target: "https://example.com", + Module: "headers", + Severity: finding.SeverityInfo, + Key: "headers:Server", + Title: "Server", + Raw: "nginx", + }, + { + Target: "https://example.com", + Module: "cors", + Severity: finding.SeverityMedium, + Key: "cors:https://example.com:null", + Title: "null origin reflected", + Raw: "allow-origin: null", + }, + } +} + +func TestSaveLoadRoundTrip(t *testing.T) { + dir := t.TempDir() + const target = "https://example.com" + want := sampleFindings() + + if err := Save(dir, target, want); err != nil { + t.Fatalf("Save: %v", err) + } + + got, err := Load(dir, target) + if err != nil { + t.Fatalf("Load: %v", err) + } + if !reflect.DeepEqual(got, want) { + t.Fatalf("round-trip mismatch:\n got=%#v\nwant=%#v", got, want) + } +} + +func TestSaveCreatesNestedDir(t *testing.T) { + // the state dir need not exist; Save mkdir's it (and parents) lazily. + dir := filepath.Join(t.TempDir(), "nested", "state") + if err := Save(dir, "https://x.test", sampleFindings()); err != nil { + t.Fatalf("Save into missing dir: %v", err) + } + info, err := os.Stat(dir) + if err != nil { + t.Fatalf("stat created dir: %v", err) + } + if !info.IsDir() { + t.Fatalf("expected %q to be a directory", dir) + } +} + +func TestSaveEmptyDirRejected(t *testing.T) { + if err := Save("", "https://x.test", sampleFindings()); err == nil { + t.Fatal("Save with empty dir: want error, got nil") + } +} + +func TestSaveEmptyFindingsRoundTrips(t *testing.T) { + // an empty run is a valid baseline: Save writes [], Load reads back an empty + // (non-nil) slice, never an error. + dir := t.TempDir() + const target = "https://empty.test" + + if err := Save(dir, target, nil); err != nil { + t.Fatalf("Save nil findings: %v", err) + } + got, err := Load(dir, target) + if err != nil { + t.Fatalf("Load: %v", err) + } + if got == nil { + t.Fatal("Load returned nil, want non-nil empty slice") + } + if len(got) != 0 { + t.Fatalf("Load returned %d findings, want 0", len(got)) + } +} + +func TestLoadMissingSnapshotIsEmpty(t *testing.T) { + // no prior run for this target: a missing file is not an error, it's an empty + // baseline so the first run treats everything as added. + dir := t.TempDir() + got, err := Load(dir, "https://never-scanned.test") + if err != nil { + t.Fatalf("Load missing snapshot: %v", err) + } + if got == nil { + t.Fatal("Load returned nil, want non-nil empty slice") + } + if len(got) != 0 { + t.Fatalf("Load missing snapshot returned %d findings, want 0", len(got)) + } +} + +func TestLoadCorruptSnapshotErrors(t *testing.T) { + // a present-but-garbage snapshot must surface loudly: treating it as empty + // would silently re-flag every finding as new on every run. + dir := t.TempDir() + const target = "https://corrupt.test" + path := filepath.Join(dir, sanitize(target)+snapshotExt) + if err := os.WriteFile(path, []byte("{not json"), snapshotFileMode); err != nil { + t.Fatalf("seeding corrupt snapshot: %v", err) + } + if _, err := Load(dir, target); err == nil { + t.Fatal("Load corrupt snapshot: want error, got nil") + } +} + +func TestDiffAddedAndRemoved(t *testing.T) { + base := sampleFindings() + + // next drops the cors finding (removed) and adds a takeover (added); the + // headers finding is unchanged and must appear in neither delta. + next := []finding.Finding{ + base[0], // headers - unchanged + { + Target: "https://example.com", + Module: "subdomain_takeover", + Severity: finding.SeverityHigh, + Key: "subdomain_takeover:old.example.com", + Title: "takeover: old.example.com", + Raw: "GitHub Pages", + }, + } + + added, removed := Diff(base, next) + + if len(added) != 1 || added[0].Key != "subdomain_takeover:old.example.com" { + t.Fatalf("added = %#v, want the takeover only", added) + } + if len(removed) != 1 || removed[0].Key != "cors:https://example.com:null" { + t.Fatalf("removed = %#v, want the cors finding only", removed) + } +} + +func TestDiffNoChange(t *testing.T) { + // identical snapshots produce no delta in either direction. + base := sampleFindings() + added, removed := Diff(base, base) + if len(added) != 0 || len(removed) != 0 { + t.Fatalf("identical snapshots: added=%d removed=%d, want 0/0", len(added), len(removed)) + } +} + +func TestDiffFirstRunAllAdded(t *testing.T) { + // no prior snapshot (empty old) means every current finding is new. + next := sampleFindings() + added, removed := Diff(nil, next) + if len(removed) != 0 { + t.Fatalf("first run removed=%d, want 0", len(removed)) + } + gotKeys := keysOf(added) + wantKeys := keysOf(next) + if !reflect.DeepEqual(gotKeys, wantKeys) { + t.Fatalf("first run added keys=%v, want %v", gotKeys, wantKeys) + } +} + +func TestDiffDedupesRepeatedKey(t *testing.T) { + // a Key appearing twice in the new snapshot is reported once, not twice. + f := sampleFindings()[0] + next := []finding.Finding{f, f} + added, _ := Diff(nil, next) + if len(added) != 1 { + t.Fatalf("duplicate key reported %d times, want 1", len(added)) + } +} + +// keysOf returns the sorted Key set of a finding slice for order-independent +// comparison. +func keysOf(fs []finding.Finding) []string { + out := make([]string, 0, len(fs)) + for i := 0; i < len(fs); i++ { + out = append(out, fs[i].Key) + } + sort.Strings(out) + return out +} + +func TestSanitizeNoTraversal(t *testing.T) { + // sanitize is the only barrier between an attacker-influenced target and the + // state dir; assert no separator or traversal token survives. + tests := []struct { + in string + want string + }{ + {"https://example.com", "https_example_com"}, + {"../../etc/passwd", "etc_passwd"}, + {"a/b/c", "a_b_c"}, + {"....//....//x", "x"}, + {"", "target"}, + {"///", "target"}, + {"host:8443/path?q=1", "host_8443_path_q_1"}, + } + for _, tt := range tests { + got := sanitize(tt.in) + if got != tt.want { + t.Errorf("sanitize(%q) = %q, want %q", tt.in, got, tt.want) + } + if filepath.Base(got) != got { + t.Errorf("sanitize(%q) = %q escapes its component", tt.in, got) + } + } +} diff --git a/man/sif.1 b/man/sif.1 index 9799fbf..5c52fcf 100644 --- a/man/sif.1 +++ b/man/sif.1 @@ -200,6 +200,16 @@ plain output for pipelines: route all chrome to stderr and print one normalized finding per line to stdout as \fB[severity] target module title\fR. implies non\-interactive (no spinners). .TP +.B \-diff +diff mode: snapshot each target's findings to a json file and, on a re\-scan, +print only the delta against the last snapshot (\fB+ new\fR for findings that +appeared, \fB- gone\fR for ones that vanished), then overwrite the snapshot. +the first run for a target reports everything as new. +.TP +.BR \-store " \fIdir\fR" +snapshot directory for \fB\-diff\fR. defaults to the \fB\-log\fR dir if set, +otherwise \fI/sif/state\fR. one sanitized file per target. +.TP .B \-api emit json results and suppress the interactive output. .SH MODULES diff --git a/sif.go b/sif.go index 5044f89..faec3ba 100644 --- a/sif.go +++ b/sif.go @@ -37,6 +37,7 @@ import ( "github.com/dropalldatabases/sif/internal/scan/builtin" "github.com/dropalldatabases/sif/internal/scan/frameworks" jsscan "github.com/dropalldatabases/sif/internal/scan/js" + "github.com/dropalldatabases/sif/internal/store" ) // App represents the main application structure for sif. @@ -303,10 +304,22 @@ func (app *App) Run() error { reportResults := make([]report.Result, 0, 16) // normalized findings for the whole run; the single Flatten-driven view that - // notify and diff (later) consume. collected alongside the report so both - // describe the same scanners from one pass. + // notify and diff consume. collected alongside the report so both describe the + // same scanners from one pass. allFindings := make([]finding.Finding, 0, 16) + // resolve the snapshot dir once when diff mode is on; a bad default isn't + // fatal - diff just no-ops for the run rather than killing the scan. + storeDir := "" + if app.settings.Diff { + dir, err := app.resolveStoreDir() + if err != nil { + log.Warnf("diff disabled: %v", err) + } else { + storeDir = dir + } + } + for _, url := range app.targets { output.Info("Starting scan on %s", output.Highlight.Render(url)) @@ -664,7 +677,17 @@ func (app *App) Run() error { fmt.Println(string(marshalled)) } - allFindings = append(allFindings, collectFindings(url, moduleResults)...) + targetFindings := collectFindings(url, moduleResults) + allFindings = append(allFindings, targetFindings...) + + // diff mode is per-target: load this target's last snapshot, surface only + // the delta, then overwrite the snapshot so the next run diffs against now. + // storeDir is "" when diff is off or the dir couldn't resolve, in which + // case this is a no-op and behavior is unchanged. + if storeDir != "" { + app.diffTarget(storeDir, url, targetFindings) + } + // the report carries raw blobs and is only built when an export flag is // set, so the common path skips the marshalling entirely. if wantReport { @@ -709,9 +732,9 @@ func printFindings(findings []finding.Finding) { } // collectFindings normalizes one target's module results through finding.Flatten -// - the single normalization path that notify and diff (later bundles) build on. -// every scan result struct collapses to flat, severity-ranked findings here so a -// scanner is described once, not once per consumer. +// - the single normalization path that notify and diff build on. every scan +// result struct collapses to flat, severity-ranked findings here so a scanner is +// described once, not once per consumer. func collectFindings(target string, moduleResults []ModuleResult) []finding.Finding { out := make([]finding.Finding, 0, len(moduleResults)) for _, mr := range moduleResults { @@ -720,6 +743,66 @@ func collectFindings(target string, moduleResults []ModuleResult) []finding.Find return out } +// resolveStoreDir picks the snapshot directory for diff mode. precedence: an +// explicit -store wins; else the run's log dir is reused (snapshots live next to +// logs); else the per-user default under /sif/state. returns an +// error only when no usable location exists, so the caller can disable diff +// without failing the scan. +func (app *App) resolveStoreDir() (string, error) { + if app.settings.Store != "" { + return app.settings.Store, nil + } + if app.settings.LogDir != "" { + return app.settings.LogDir, nil + } + dir, err := store.DefaultDir() + if err != nil { + return "", fmt.Errorf("resolving snapshot dir: %w", err) + } + return dir, nil +} + +// diffTarget loads target's previous snapshot, prints the added/removed delta +// against the current findings, then overwrites the snapshot so the next run +// diffs against this one. a load failure surfaces but doesn't abort the run - +// the new snapshot is still written so a corrupt baseline self-heals. always +// saves, even when the delta is empty, to advance the baseline. +func (app *App) diffTarget(dir, target string, current []finding.Finding) { + previous, err := store.Load(dir, target) + if err != nil { + log.Warnf("diff: reading snapshot for %s, treating as fresh: %v", target, err) + previous = nil + } + + added, removed := store.Diff(previous, current) + printDiff(target, added, removed) + + if err := store.Save(dir, target, current); err != nil { + log.Warnf("diff: saving snapshot for %s: %v", target, err) + } +} + +// printDiff renders a target's diff: each added finding marked "+ new", each +// removed one "- gone", with a one-line note when nothing changed. routed +// through the shared output sink so -silent keeps it on stderr alongside the +// other chrome. a single Builder keeps the block from interleaving. +func printDiff(target string, added, removed []finding.Finding) { + if len(added) == 0 && len(removed) == 0 { + output.Info("diff %s: no changes since last snapshot", target) + return + } + + var b strings.Builder + fmt.Fprintf(&b, "diff %s: %d new, %d gone\n", target, len(added), len(removed)) + for i := 0; i < len(added); i++ { + fmt.Fprintf(&b, " + new %s\n", added[i].Line()) + } + for i := 0; i < len(removed); i++ { + fmt.Fprintf(&b, " - gone %s\n", removed[i].Line()) + } + fmt.Fprint(output.Writer(), b.String()) +} + // collectReportResults flattens one target's module results into the report // model, carrying each finding as raw json so the report package stays free of // scan types. a result that won't marshal is skipped rather than failing the run. diff --git a/sif_test.go b/sif_test.go index c6455b4..36a0be2 100644 --- a/sif_test.go +++ b/sif_test.go @@ -20,6 +20,7 @@ import ( "github.com/dropalldatabases/sif/internal/config" "github.com/dropalldatabases/sif/internal/finding" + "github.com/dropalldatabases/sif/internal/store" ) // TestMain neutralizes the stdin seam for the whole package so tests that build @@ -373,3 +374,72 @@ func TestUrlResult_JSON(t *testing.T) { t.Errorf("UrlResult.Results = %d, want 1", len(ur.Results)) } } + +func TestResolveStoreDir(t *testing.T) { + // explicit -store wins over everything. + explicit := &App{settings: &config.Settings{Store: "/tmp/snaps", LogDir: "/tmp/logs"}} + if dir, err := explicit.resolveStoreDir(); err != nil || dir != "/tmp/snaps" { + t.Fatalf("explicit store: got (%q, %v), want (/tmp/snaps, nil)", dir, err) + } + + // no -store: reuse the log dir. + logged := &App{settings: &config.Settings{LogDir: "/tmp/logs"}} + if dir, err := logged.resolveStoreDir(); err != nil || dir != "/tmp/logs" { + t.Fatalf("log dir fallback: got (%q, %v), want (/tmp/logs, nil)", dir, err) + } + + // neither set: fall through to the per-user default (non-empty, no error). + bare := &App{settings: &config.Settings{}} + dir, err := bare.resolveStoreDir() + if err != nil { + t.Fatalf("default store dir: %v", err) + } + if dir == "" { + t.Fatal("default store dir resolved empty") + } +} + +func TestDiffTargetSnapshotsAndDiffs(t *testing.T) { + dir := t.TempDir() + const target = "https://diff.example.com" + app := &App{settings: &config.Settings{Diff: true, Store: dir}} + + first := []finding.Finding{ + {Target: target, Module: "headers", Severity: finding.SeverityInfo, Key: "headers:Server", Title: "Server", Raw: "nginx"}, + } + + // first run: no prior snapshot, everything is new; the snapshot must persist. + app.diffTarget(dir, target, first) + + saved, err := store.Load(dir, target) + if err != nil { + t.Fatalf("load after first run: %v", err) + } + if len(saved) != 1 || saved[0].Key != "headers:Server" { + t.Fatalf("snapshot after first run = %#v, want the headers finding", saved) + } + + // second run with a different set: the snapshot must advance to the new set so + // a third run would diff against it. + second := []finding.Finding{ + {Target: target, Module: "cors", Severity: finding.SeverityMedium, Key: "cors:x", Title: "null origin", Raw: "null"}, + } + app.diffTarget(dir, target, second) + + saved, err = store.Load(dir, target) + if err != nil { + t.Fatalf("load after second run: %v", err) + } + if len(saved) != 1 || saved[0].Key != "cors:x" { + t.Fatalf("snapshot after second run = %#v, want the cors finding", saved) + } + + // the delta between the two snapshots is exactly: headers gone, cors new. + added, removed := store.Diff(first, second) + if len(added) != 1 || added[0].Key != "cors:x" { + t.Fatalf("added = %#v, want cors:x", added) + } + if len(removed) != 1 || removed[0].Key != "headers:Server" { + t.Fatalf("removed = %#v, want headers:Server", removed) + } +}