feat: diff mode with json snapshot store

re-scans become a monitor: -diff snapshots each target's normalized
findings to a per-target json file and, on the next run, surfaces only
the delta (+ new / - gone) against the last snapshot, then overwrites it
so each run diffs against the previous one. behavior is unchanged when
-diff is off.

new internal/store keys the set-difference off finding.Key (already
stable across runs) and uses only encoding/json + os - no new deps.
snapshot files are sanitized per target (no traversal), written 0600
under 0750 dirs. -store picks the location: explicit dir, else the log
dir, else <user-config>/sif/state. a missing snapshot is a clean
baseline, a corrupt one self-heals on the next save.
This commit is contained in:
vmfunc
2026-06-10 16:37:06 -07:00
parent 22168611e4
commit 6ec0b60e5a
9 changed files with 655 additions and 6 deletions
+4
View File
@@ -69,6 +69,8 @@ type Settings struct {
SARIF string // path to write a sarif 2.1.0 report to ("" = off)
Markdown string // path to write a markdown report to ("" = off)
Silent bool // route chrome to stderr, print one finding per line to stdout
Diff bool // surface only findings added/removed vs the last snapshot
Store string // snapshot dir for diff mode ("" = default state dir)
Modules string // Comma-separated list of module IDs to run
ModuleTags string // Run modules matching these tags
AllModules bool // Run all loaded modules
@@ -174,6 +176,8 @@ func Parse() *Settings {
flagSet.StringVar(&settings.SARIF, "sarif", "", "Write a SARIF 2.1.0 report to this file"),
flagSet.StringVarP(&settings.Markdown, "markdown", "md", "", "Write a markdown report to this file"),
flagSet.BoolVar(&settings.Silent, "silent", false, "Plain output: chrome to stderr, one finding per line to stdout (for pipelines)"),
flagSet.BoolVar(&settings.Diff, "diff", false, "Diff mode: surface only findings added/removed since the last snapshot of each target"),
flagSet.StringVar(&settings.Store, "store", "", "Snapshot directory for -diff (default: log dir, else <user-config>/sif/state)"),
)
flagSet.CreateGroup("api", "API",
+8
View File
@@ -61,6 +61,14 @@ func TestSettingsDefaults(t *testing.T) {
if settings.Ports != "" {
t.Errorf("expected Ports default to be empty, got %v", settings.Ports)
}
// diff mode is opt-in and its store dir defaults empty (resolved at runtime).
if settings.Diff != false {
t.Errorf("expected Diff default to be false, got %v", settings.Diff)
}
if settings.Store != "" {
t.Errorf("expected Store default to be empty, got %v", settings.Store)
}
}
func TestSettingsNoScanBehavior(t *testing.T) {
+204
View File
@@ -0,0 +1,204 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
// Package store persists a run's normalized findings as a json snapshot, one
// file per target, so a later run can diff against it and surface only what
// changed. it leans on encoding/json + os only - no new deps - and keys the
// delta off finding.Key, the identity the finding layer already guarantees is
// stable across runs.
package store
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/dropalldatabases/sif/internal/finding"
)
// snapshotFileMode is applied to written snapshot files: owner read/write only.
// a snapshot enumerates a target's findings (urls, secrets, takeovers) and is
// not meant for other users on the box, so it stays 0600.
const snapshotFileMode = 0o600
// stateDirMode is applied to directories the store creates: owner rwx, group rx,
// no world access. matches the 0o750 the bundle asks for so the state tree isn't
// world-readable.
const stateDirMode = 0o750
// snapshotExt is the extension every snapshot file carries; makes the state dir
// self-describing and lets Load reconstruct the path from a bare target.
const snapshotExt = ".json"
// defaultDirName is the sif-owned subdirectory under the user's config dir when
// no explicit store dir is given. DefaultDir joins it under os.UserConfigDir().
const defaultDirName = "sif"
// stateSubDir separates snapshots from anything else sif might drop in its
// config dir later, so the state tree is a single sweepable directory.
const stateSubDir = "state"
// DefaultDir returns the fallback snapshot location: <user-config>/sif/state.
// callers pass it when -store is unset and there's no logdir to reuse. the dir
// is not created here - Save does that lazily so a diff-less run touches nothing.
func DefaultDir() (string, error) {
configDir, err := os.UserConfigDir()
if err != nil {
return "", fmt.Errorf("resolving user config dir: %w", err)
}
return filepath.Join(configDir, defaultDirName, stateSubDir), nil
}
// sanitize turns an arbitrary target (https://example.com:8443/path?q=1) into a
// single safe filename component. a target is attacker-influenced (it can come
// from a stdin pipe or a -f file), so every separator and path metacharacter is
// folded to '_' - no '/', '\\', '.', ':' survives to escape the state dir or
// collide with a parent reference. empty/degenerate input falls back to a fixed
// token rather than producing a dotfile or empty name.
func sanitize(target string) string {
var b strings.Builder
b.Grow(len(target))
// collapse runs of separators: a scheme like "https://" is three metachars
// in a row, and one '_' reads cleaner than three without losing uniqueness.
prevSep := false
for i := 0; i < len(target); i++ {
c := target[i]
switch {
case c >= 'a' && c <= 'z', c >= 'A' && c <= 'Z', c >= '0' && c <= '9', c == '-':
b.WriteByte(c)
prevSep = false
default:
// every other byte (path sep, dot, colon, slash, space, unicode, and a
// literal '_') is a separator; fold it so traversal and dotfiles are
// impossible and a run never balloons the filename.
if !prevSep {
b.WriteByte('_')
prevSep = true
}
}
}
name := strings.Trim(b.String(), "_")
if name == "" {
return "target"
}
return name
}
// pathFor builds the absolute snapshot path for a target under dir. kept private
// so the sanitized-filename invariant lives in one place; Save and Load both go
// through it so a target always maps to the same file.
func pathFor(dir, target string) string {
return filepath.Join(dir, sanitize(target)+snapshotExt)
}
// Save writes the run's findings for target as a json snapshot under dir,
// overwriting any prior snapshot. the dir (and parents) is created lazily with
// stateDirMode. an empty findings slice is still written - it records "this
// target had nothing", which a later diff reads as a clean baseline rather than
// a missing one.
func Save(dir, target string, findings []finding.Finding) error {
if dir == "" {
return fmt.Errorf("store: empty snapshot dir")
}
if err := os.MkdirAll(dir, stateDirMode); err != nil {
return fmt.Errorf("creating state dir %q: %w", dir, err)
}
// marshal a non-nil slice so an empty run serializes to [] not null; keeps
// the on-disk shape stable and Load's decode unambiguous.
if findings == nil {
findings = []finding.Finding{}
}
data, err := json.MarshalIndent(findings, "", " ")
if err != nil {
return fmt.Errorf("marshaling snapshot for %q: %w", target, err)
}
path := pathFor(dir, target)
if err := os.WriteFile(path, data, snapshotFileMode); err != nil {
return fmt.Errorf("writing snapshot %q: %w", path, err)
}
return nil
}
// Load reads the previously saved snapshot for target under dir. a missing
// snapshot is not an error - it's the first run for that target, so an empty
// slice comes back and the caller treats every current finding as new. a present
// but unreadable/corrupt file is a real error: silently swallowing it would make
// a broken store look like a fresh one and flag everything as added forever.
func Load(dir, target string) ([]finding.Finding, error) {
path := pathFor(dir, target)
data, err := os.ReadFile(path)
if err != nil {
if os.IsNotExist(err) {
return []finding.Finding{}, nil
}
return nil, fmt.Errorf("reading snapshot %q: %w", path, err)
}
var findings []finding.Finding
if err := json.Unmarshal(data, &findings); err != nil {
return nil, fmt.Errorf("decoding snapshot %q: %w", path, err)
}
if findings == nil {
findings = []finding.Finding{}
}
return findings, nil
}
// Diff computes the set-difference between two snapshots keyed on Finding.Key:
// added is everything in next whose Key isn't in old, removed is everything in
// old whose Key isn't in next. order follows the input slices (added in next's
// order, removed in old's) so output is deterministic for a given pair. a Key
// seen twice in one slice is deduped on first sight, so duplicate findings don't
// double-report.
func Diff(old, next []finding.Finding) (added, removed []finding.Finding) {
oldKeys := make(map[string]struct{}, len(old))
for i := 0; i < len(old); i++ {
oldKeys[old[i].Key] = struct{}{}
}
nextKeys := make(map[string]struct{}, len(next))
for i := 0; i < len(next); i++ {
nextKeys[next[i].Key] = struct{}{}
}
seen := make(map[string]struct{}, len(next))
for i := 0; i < len(next); i++ {
k := next[i].Key
if _, ok := oldKeys[k]; ok {
continue
}
if _, dup := seen[k]; dup {
continue
}
seen[k] = struct{}{}
added = append(added, next[i])
}
// reuse seen for the removed pass; the two key spaces don't overlap by
// construction (removed keys are absent from next) so a single map is safe.
clear(seen)
for i := 0; i < len(old); i++ {
k := old[i].Key
if _, ok := nextKeys[k]; ok {
continue
}
if _, dup := seen[k]; dup {
continue
}
seen[k] = struct{}{}
removed = append(removed, old[i])
}
return added, removed
}
+234
View File
@@ -0,0 +1,234 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package store
import (
"os"
"path/filepath"
"reflect"
"sort"
"testing"
"github.com/dropalldatabases/sif/internal/finding"
)
// sampleFindings is a small, stable set of findings reused across the round-trip
// and diff cases; covers two modules and two severities so marshaling exercises
// every Finding field.
func sampleFindings() []finding.Finding {
return []finding.Finding{
{
Target: "https://example.com",
Module: "headers",
Severity: finding.SeverityInfo,
Key: "headers:Server",
Title: "Server",
Raw: "nginx",
},
{
Target: "https://example.com",
Module: "cors",
Severity: finding.SeverityMedium,
Key: "cors:https://example.com:null",
Title: "null origin reflected",
Raw: "allow-origin: null",
},
}
}
func TestSaveLoadRoundTrip(t *testing.T) {
dir := t.TempDir()
const target = "https://example.com"
want := sampleFindings()
if err := Save(dir, target, want); err != nil {
t.Fatalf("Save: %v", err)
}
got, err := Load(dir, target)
if err != nil {
t.Fatalf("Load: %v", err)
}
if !reflect.DeepEqual(got, want) {
t.Fatalf("round-trip mismatch:\n got=%#v\nwant=%#v", got, want)
}
}
func TestSaveCreatesNestedDir(t *testing.T) {
// the state dir need not exist; Save mkdir's it (and parents) lazily.
dir := filepath.Join(t.TempDir(), "nested", "state")
if err := Save(dir, "https://x.test", sampleFindings()); err != nil {
t.Fatalf("Save into missing dir: %v", err)
}
info, err := os.Stat(dir)
if err != nil {
t.Fatalf("stat created dir: %v", err)
}
if !info.IsDir() {
t.Fatalf("expected %q to be a directory", dir)
}
}
func TestSaveEmptyDirRejected(t *testing.T) {
if err := Save("", "https://x.test", sampleFindings()); err == nil {
t.Fatal("Save with empty dir: want error, got nil")
}
}
func TestSaveEmptyFindingsRoundTrips(t *testing.T) {
// an empty run is a valid baseline: Save writes [], Load reads back an empty
// (non-nil) slice, never an error.
dir := t.TempDir()
const target = "https://empty.test"
if err := Save(dir, target, nil); err != nil {
t.Fatalf("Save nil findings: %v", err)
}
got, err := Load(dir, target)
if err != nil {
t.Fatalf("Load: %v", err)
}
if got == nil {
t.Fatal("Load returned nil, want non-nil empty slice")
}
if len(got) != 0 {
t.Fatalf("Load returned %d findings, want 0", len(got))
}
}
func TestLoadMissingSnapshotIsEmpty(t *testing.T) {
// no prior run for this target: a missing file is not an error, it's an empty
// baseline so the first run treats everything as added.
dir := t.TempDir()
got, err := Load(dir, "https://never-scanned.test")
if err != nil {
t.Fatalf("Load missing snapshot: %v", err)
}
if got == nil {
t.Fatal("Load returned nil, want non-nil empty slice")
}
if len(got) != 0 {
t.Fatalf("Load missing snapshot returned %d findings, want 0", len(got))
}
}
func TestLoadCorruptSnapshotErrors(t *testing.T) {
// a present-but-garbage snapshot must surface loudly: treating it as empty
// would silently re-flag every finding as new on every run.
dir := t.TempDir()
const target = "https://corrupt.test"
path := filepath.Join(dir, sanitize(target)+snapshotExt)
if err := os.WriteFile(path, []byte("{not json"), snapshotFileMode); err != nil {
t.Fatalf("seeding corrupt snapshot: %v", err)
}
if _, err := Load(dir, target); err == nil {
t.Fatal("Load corrupt snapshot: want error, got nil")
}
}
func TestDiffAddedAndRemoved(t *testing.T) {
base := sampleFindings()
// next drops the cors finding (removed) and adds a takeover (added); the
// headers finding is unchanged and must appear in neither delta.
next := []finding.Finding{
base[0], // headers - unchanged
{
Target: "https://example.com",
Module: "subdomain_takeover",
Severity: finding.SeverityHigh,
Key: "subdomain_takeover:old.example.com",
Title: "takeover: old.example.com",
Raw: "GitHub Pages",
},
}
added, removed := Diff(base, next)
if len(added) != 1 || added[0].Key != "subdomain_takeover:old.example.com" {
t.Fatalf("added = %#v, want the takeover only", added)
}
if len(removed) != 1 || removed[0].Key != "cors:https://example.com:null" {
t.Fatalf("removed = %#v, want the cors finding only", removed)
}
}
func TestDiffNoChange(t *testing.T) {
// identical snapshots produce no delta in either direction.
base := sampleFindings()
added, removed := Diff(base, base)
if len(added) != 0 || len(removed) != 0 {
t.Fatalf("identical snapshots: added=%d removed=%d, want 0/0", len(added), len(removed))
}
}
func TestDiffFirstRunAllAdded(t *testing.T) {
// no prior snapshot (empty old) means every current finding is new.
next := sampleFindings()
added, removed := Diff(nil, next)
if len(removed) != 0 {
t.Fatalf("first run removed=%d, want 0", len(removed))
}
gotKeys := keysOf(added)
wantKeys := keysOf(next)
if !reflect.DeepEqual(gotKeys, wantKeys) {
t.Fatalf("first run added keys=%v, want %v", gotKeys, wantKeys)
}
}
func TestDiffDedupesRepeatedKey(t *testing.T) {
// a Key appearing twice in the new snapshot is reported once, not twice.
f := sampleFindings()[0]
next := []finding.Finding{f, f}
added, _ := Diff(nil, next)
if len(added) != 1 {
t.Fatalf("duplicate key reported %d times, want 1", len(added))
}
}
// keysOf returns the sorted Key set of a finding slice for order-independent
// comparison.
func keysOf(fs []finding.Finding) []string {
out := make([]string, 0, len(fs))
for i := 0; i < len(fs); i++ {
out = append(out, fs[i].Key)
}
sort.Strings(out)
return out
}
func TestSanitizeNoTraversal(t *testing.T) {
// sanitize is the only barrier between an attacker-influenced target and the
// state dir; assert no separator or traversal token survives.
tests := []struct {
in string
want string
}{
{"https://example.com", "https_example_com"},
{"../../etc/passwd", "etc_passwd"},
{"a/b/c", "a_b_c"},
{"....//....//x", "x"},
{"", "target"},
{"///", "target"},
{"host:8443/path?q=1", "host_8443_path_q_1"},
}
for _, tt := range tests {
got := sanitize(tt.in)
if got != tt.want {
t.Errorf("sanitize(%q) = %q, want %q", tt.in, got, tt.want)
}
if filepath.Base(got) != got {
t.Errorf("sanitize(%q) = %q escapes its component", tt.in, got)
}
}
}