diff --git a/README.md b/README.md index 5c2a158..530d041 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,7 @@ sif has a modular architecture. modules are defined in yaml and can be extended | `-crawl` | web crawler (spider same-host links/scripts/forms) | | `-crawl-depth` | max crawl recursion depth (default 2) | | `-passive` | passive subdomain/url discovery (zero traffic to target) | +| `-probe` | live-host probe (status, title, server, redirect chain) | ### http options @@ -207,6 +208,22 @@ these apply to every outbound request across all scanners: a scanner that sets a header explicitly (e.g. an api key) always wins over the global default. +### report export + +write the run's findings out to a file for ci/cd or triage: + +| flag | description | +|------|-------------| +| `-sarif` | write a sarif 2.1.0 report to this file | +| `-markdown`, `-md` | write a markdown report to this file | + +```bash +# scan and emit both a sarif and markdown report +./sif -u https://example.com -headers -cors -sarif out.sarif -md out.md +``` + +sarif output is ingestable by github code scanning; markdown is a readable per-target summary. + ### yaml modules list available modules: diff --git a/docs/usage.md b/docs/usage.md index 55410ef..5bf504e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -242,6 +242,14 @@ keyless and zero traffic to the target itself - all lookups hit third-party feed ./sif -u https://example.com -passive ``` +### live-host probe + +`-probe` - check whether the target is alive and report its final status, page title, server header, content-length and the redirect chain it walked + +```bash +./sif -u https://example.com -probe +``` + ### whois lookup `-whois` - perform whois lookups @@ -363,6 +371,26 @@ cap outbound requests per second (0 = unlimited, default 0): ./sif -u https://example.com -rate-limit 20 ``` +## output options + +write the collected findings out to a file after the scan. both formats can be requested in the same run. + +### -sarif + +write a sarif 2.1.0 report (one run, tool `sif`, one result per finding). ingestable by github code scanning and other sarif consumers: + +```bash +./sif -u https://example.com -headers -cors -sarif out.sarif +``` + +### -md, --markdown + +write a readable markdown report grouped by target, then by module: + +```bash +./sif -u https://example.com -headers -cors -md report.md +``` + ## api options ### -api diff --git a/internal/config/config.go b/internal/config/config.go index 95f683c..7eeea8d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -61,6 +61,9 @@ type Settings struct { Crawl bool CrawlDepth int Passive bool + Probe bool + SARIF string // path to write a sarif 2.1.0 report to ("" = off) + Markdown string // path to write a markdown report to ("" = off) Modules string // Comma-separated list of module IDs to run ModuleTags string // Run modules matching these tags AllModules bool // Run all loaded modules @@ -140,6 +143,7 @@ func Parse() *Settings { flagSet.BoolVar(&settings.Crawl, "crawl", false, "Enable web crawling (spider same-host links/scripts/forms)"), flagSet.IntVar(&settings.CrawlDepth, "crawl-depth", defaultCrawlDepth, "Max crawl recursion depth"), flagSet.BoolVar(&settings.Passive, "passive", false, "Enable passive subdomain/url discovery (zero traffic to target)"), + flagSet.BoolVar(&settings.Probe, "probe", false, "Probe the target for liveness (status, title, server, redirect chain)"), ) flagSet.CreateGroup("runtime", "Runtime", @@ -157,6 +161,11 @@ func Parse() *Settings { flagSet.IntVar(&settings.RateLimit, "rate-limit", 0, "Max requests per second (0 = unlimited)"), ) + flagSet.CreateGroup("output", "Output", + flagSet.StringVar(&settings.SARIF, "sarif", "", "Write a SARIF 2.1.0 report to this file"), + flagSet.StringVarP(&settings.Markdown, "markdown", "md", "", "Write a markdown report to this file"), + ) + flagSet.CreateGroup("api", "API", flagSet.BoolVar(&settings.ApiMode, "api", false, "Enable API mode. Only useful for internal lunchcat usage"), ) diff --git a/internal/report/markdown.go b/internal/report/markdown.go new file mode 100644 index 0000000..44b1bb1 --- /dev/null +++ b/internal/report/markdown.go @@ -0,0 +1,74 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package report + +import ( + "bytes" + "encoding/json" + "sort" + "strings" +) + +// Markdown renders results as a readable report grouped by target, then by +// module, with each module's finding pretty-printed as a json code block. +func Markdown(results []Result) []byte { + var b strings.Builder + b.WriteString("# sif scan report\n\n") + + // group module results under their target so the report reads target-first + // regardless of the order results came in. + byTarget := make(map[string][]Result) + order := make([]string, 0) + for i := 0; i < len(results); i++ { + t := results[i].Target + if _, seen := byTarget[t]; !seen { + order = append(order, t) + } + byTarget[t] = append(byTarget[t], results[i]) + } + + for i := 0; i < len(order); i++ { + target := order[i] + b.WriteString("## ") + b.WriteString(target) + b.WriteString("\n\n") + + mods := byTarget[target] + // sort modules so the report is deterministic across runs + sort.SliceStable(mods, func(a, c int) bool { return mods[a].Module < mods[c].Module }) + + for j := 0; j < len(mods); j++ { + b.WriteString("### ") + b.WriteString(mods[j].Module) + b.WriteString("\n\n") + b.WriteString("```json\n") + b.WriteString(prettyJSON(mods[j].Data)) + b.WriteString("\n```\n\n") + } + } + + return []byte(b.String()) +} + +// prettyJSON re-indents the raw finding for readability; if it doesn't parse as +// json (shouldn't happen, but never trust it) the raw bytes are returned as-is. +func prettyJSON(raw json.RawMessage) string { + if len(raw) == 0 { + return "null" + } + var indented bytes.Buffer + if err := json.Indent(&indented, raw, "", " "); err != nil { + return string(raw) + } + return indented.String() +} diff --git a/internal/report/report.go b/internal/report/report.go new file mode 100644 index 0000000..e2999ea --- /dev/null +++ b/internal/report/report.go @@ -0,0 +1,26 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +// Package report serializes collected scan results to sarif and markdown. it's +// deliberately decoupled from the scan package: callers map their own results +// into report.Result, so report never imports a scanner type. +package report + +import "encoding/json" + +// Result is one module's output for one target. Data is whatever the scanner +// returned, carried as raw json so report stays free of scan types. +type Result struct { + Target string + Module string + Data json.RawMessage +} diff --git a/internal/report/report_test.go b/internal/report/report_test.go new file mode 100644 index 0000000..7e4762a --- /dev/null +++ b/internal/report/report_test.go @@ -0,0 +1,172 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package report + +import ( + "encoding/json" + "strings" + "testing" +) + +// fakeResults are a couple of representative findings across two targets used by +// every test below. +func fakeResults() []Result { + return []Result{ + {Target: "https://a.example.com", Module: "cors", Data: json.RawMessage(`{"severity":"high"}`)}, + {Target: "https://a.example.com", Module: "probe", Data: json.RawMessage(`{"status_code":200}`)}, + {Target: "https://b.example.com", Module: "redirect", Data: json.RawMessage(`{"parameter":"next"}`)}, + } +} + +func TestSARIF_ValidAndContainsFindings(t *testing.T) { + out, err := SARIF(fakeResults()) + if err != nil { + t.Fatalf("SARIF: %v", err) + } + + // the output must parse back into the sarif shape + var doc sarifLog + if err := json.Unmarshal(out, &doc); err != nil { + t.Fatalf("sarif output is not valid json: %v", err) + } + + if doc.Version != "2.1.0" { + t.Errorf("expected sarif version 2.1.0, got %q", doc.Version) + } + if len(doc.Runs) != 1 { + t.Fatalf("expected exactly one run, got %d", len(doc.Runs)) + } + run := doc.Runs[0] + if run.Tool.Driver.Name != "sif" { + t.Errorf("expected tool name sif, got %q", run.Tool.Driver.Name) + } + if len(run.Results) != 3 { + t.Fatalf("expected 3 results, got %d", len(run.Results)) + } + + // each finding's module id surfaces as the ruleId and its target as the uri + tests := []struct { + ruleID string + target string + }{ + {"cors", "https://a.example.com"}, + {"probe", "https://a.example.com"}, + {"redirect", "https://b.example.com"}, + } + for _, tt := range tests { + if !sarifHasResult(run.Results, tt.ruleID, tt.target) { + t.Errorf("expected sarif result rule=%q target=%q, got %+v", tt.ruleID, tt.target, run.Results) + } + } + + // rules list each module id once, deduped across targets + if len(run.Tool.Driver.Rules) != 3 { + t.Errorf("expected 3 deduped rules, got %d: %+v", len(run.Tool.Driver.Rules), run.Tool.Driver.Rules) + } +} + +func TestSARIF_DedupesRulesAcrossTargets(t *testing.T) { + // the same module on two targets must yield one rule but two results. + results := []Result{ + {Target: "https://a.example.com", Module: "cors", Data: json.RawMessage(`{}`)}, + {Target: "https://b.example.com", Module: "cors", Data: json.RawMessage(`{}`)}, + } + out, err := SARIF(results) + if err != nil { + t.Fatalf("SARIF: %v", err) + } + var doc sarifLog + if err := json.Unmarshal(out, &doc); err != nil { + t.Fatalf("invalid json: %v", err) + } + run := doc.Runs[0] + if len(run.Tool.Driver.Rules) != 1 { + t.Errorf("expected 1 deduped rule, got %d", len(run.Tool.Driver.Rules)) + } + if len(run.Results) != 2 { + t.Errorf("expected 2 results, got %d", len(run.Results)) + } +} + +func TestSARIF_Empty(t *testing.T) { + out, err := SARIF(nil) + if err != nil { + t.Fatalf("SARIF: %v", err) + } + var doc sarifLog + if err := json.Unmarshal(out, &doc); err != nil { + t.Fatalf("empty sarif is not valid json: %v", err) + } + if len(doc.Runs) != 1 { + t.Fatalf("expected one run even when empty, got %d", len(doc.Runs)) + } + if len(doc.Runs[0].Results) != 0 { + t.Errorf("expected no results, got %d", len(doc.Runs[0].Results)) + } +} + +func TestMarkdown_ContainsTargetsAndModules(t *testing.T) { + out := string(Markdown(fakeResults())) + + wants := []string{ + "# sif scan report", + "## https://a.example.com", + "## https://b.example.com", + "### cors", + "### probe", + "### redirect", + `"severity": "high"`, // re-indented finding body + `"parameter": "next"`, + } + for _, want := range wants { + if !strings.Contains(out, want) { + t.Errorf("markdown report missing %q\n---\n%s", want, out) + } + } +} + +func TestMarkdown_GroupsByTarget(t *testing.T) { + // a.example.com's two modules must both appear before b.example.com's header. + out := string(Markdown(fakeResults())) + aHeader := strings.Index(out, "## https://a.example.com") + bHeader := strings.Index(out, "## https://b.example.com") + if aHeader < 0 || bHeader < 0 { + t.Fatalf("missing target headers in:\n%s", out) + } + if aHeader > bHeader { + t.Errorf("expected target a before target b, got a=%d b=%d", aHeader, bHeader) + } + // both of a's modules sit between a's header and b's header + corsIdx := strings.Index(out, "### cors") + probeIdx := strings.Index(out, "### probe") + if corsIdx < aHeader || corsIdx > bHeader || probeIdx < aHeader || probeIdx > bHeader { + t.Errorf("expected a's modules grouped under a, cors=%d probe=%d (a=%d b=%d)", corsIdx, probeIdx, aHeader, bHeader) + } +} + +// sarifHasResult reports whether any result carries the given rule id and target +// uri, the pairing that proves a finding survived serialization. +func sarifHasResult(results []sarifResult, ruleID, target string) bool { + for i := 0; i < len(results); i++ { + r := results[i] + if r.RuleID != ruleID { + continue + } + for j := 0; j < len(r.Locations); j++ { + if r.Locations[j].PhysicalLocation.ArtifactLocation.URI == target { + return true + } + } + } + return false +} diff --git a/internal/report/sarif.go b/internal/report/sarif.go new file mode 100644 index 0000000..fc2baee --- /dev/null +++ b/internal/report/sarif.go @@ -0,0 +1,133 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package report + +import ( + "encoding/json" + "fmt" +) + +// sarif format/version constants pinned to the 2.1.0 schema so the output is +// ingestable by github code scanning and other sarif consumers. +const ( + sarifVersion = "2.1.0" + sarifSchema = "https://json.schemastore.org/sarif-2.1.0.json" + toolName = "sif" +) + +// sarifLog is the minimal valid 2.1.0 shape: one run from one tool. +type sarifLog struct { + Schema string `json:"$schema"` + Version string `json:"version"` + Runs []sarifRun `json:"runs"` +} + +type sarifRun struct { + Tool sarifTool `json:"tool"` + Results []sarifResult `json:"results"` +} + +type sarifTool struct { + Driver sarifDriver `json:"driver"` +} + +type sarifDriver struct { + Name string `json:"name"` + Rules []sarifRule `json:"rules"` +} + +type sarifRule struct { + ID string `json:"id"` +} + +type sarifResult struct { + RuleID string `json:"ruleId"` + Level string `json:"level"` + Message sarifMessage `json:"message"` + Locations []sarifLocation `json:"locations"` +} + +type sarifMessage struct { + Text string `json:"text"` +} + +type sarifLocation struct { + PhysicalLocation sarifPhysicalLocation `json:"physicalLocation"` +} + +type sarifPhysicalLocation struct { + ArtifactLocation sarifArtifactLocation `json:"artifactLocation"` +} + +type sarifArtifactLocation struct { + URI string `json:"uri"` +} + +// sarifLevel is the default severity for findings; sif results don't carry a +// uniform severity field, so "warning" is the neutral middle ground. +const sarifLevel = "warning" + +// SARIF serializes results to a minimal valid sarif 2.1.0 log. Each module +// result becomes one sarif result tagged with its module id (the rule) and the +// target uri, with the raw module data inlined into the message for context. +func SARIF(results []Result) ([]byte, error) { + sarifResults := make([]sarifResult, 0, len(results)) + ruleSet := make(map[string]struct{}, len(results)) + + for i := 0; i < len(results); i++ { + res := results[i] + ruleSet[res.Module] = struct{}{} + + sarifResults = append(sarifResults, sarifResult{ + RuleID: res.Module, + Level: sarifLevel, + Message: sarifMessage{Text: messageFor(res)}, + Locations: []sarifLocation{{ + PhysicalLocation: sarifPhysicalLocation{ + ArtifactLocation: sarifArtifactLocation{URI: res.Target}, + }, + }}, + }) + } + + // rules must list each id exactly once; build it from the set so duplicate + // modules across targets don't duplicate the rule. + rules := make([]sarifRule, 0, len(ruleSet)) + for id := range ruleSet { + rules = append(rules, sarifRule{ID: id}) + } + + doc := sarifLog{ + Schema: sarifSchema, + Version: sarifVersion, + Runs: []sarifRun{{ + Tool: sarifTool{Driver: sarifDriver{Name: toolName, Rules: rules}}, + Results: sarifResults, + }}, + } + + out, err := json.MarshalIndent(doc, "", " ") + if err != nil { + return nil, fmt.Errorf("marshal sarif: %w", err) + } + return out, nil +} + +// messageFor builds a human-readable result message: the module id plus the raw +// finding json so a sarif viewer shows what was actually found. +func messageFor(res Result) string { + if len(res.Data) == 0 { + return fmt.Sprintf("%s finding on %s", res.Module, res.Target) + } + return fmt.Sprintf("%s finding on %s: %s", res.Module, res.Target, string(res.Data)) +} diff --git a/internal/scan/integration_test.go b/internal/scan/integration_test.go index 0e86075..93f46b8 100644 --- a/internal/scan/integration_test.go +++ b/internal/scan/integration_test.go @@ -245,6 +245,22 @@ func TestIntegrationXSS(t *testing.T) { } } +func TestIntegrationProbe(t *testing.T) { + srv := newVulnApp() + defer srv.Close() + + result, err := Probe(srv.URL, 5*time.Second, "") + if err != nil { + t.Fatalf("Probe: %v", err) + } + if result == nil || !result.Alive { + t.Fatalf("expected the vuln app to be alive, got %+v", result) + } + if result.StatusCode != http.StatusOK { + t.Errorf("expected 200 from the homepage, got %d", result.StatusCode) + } +} + func TestIntegrationPorts(t *testing.T) { // a real listener stands in for an open port; a tiny server hands its number // to Ports via the commonPorts wordlist. diff --git a/internal/scan/probe.go b/internal/scan/probe.go new file mode 100644 index 0000000..0d78cce --- /dev/null +++ b/internal/scan/probe.go @@ -0,0 +1,148 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "context" + "fmt" + "io" + "net/http" + "regexp" + "strings" + "time" + + "github.com/dropalldatabases/sif/internal/httpx" + "github.com/dropalldatabases/sif/internal/logger" + "github.com/dropalldatabases/sif/internal/output" +) + +// ProbeResult is the httpx-style liveness snapshot for one target: did it answer, +// where did it land, and the few fingerprint fields worth keeping. +type ProbeResult struct { + URL string `json:"url"` + Alive bool `json:"alive"` + StatusCode int `json:"status_code"` + Title string `json:"title,omitempty"` + Server string `json:"server,omitempty"` + ContentLength int64 `json:"content_length"` + RedirectChain []string `json:"redirect_chain,omitempty"` +} + +// probeMaxRedirects caps the chain we'll follow so a redirect loop can't run +// forever; matches httpx's default depth. +const probeMaxRedirects = 10 + +// probeMaxBody bounds the body we read to extract a