diff --git a/README.md b/README.md index 5c2a158..530d041 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,7 @@ sif has a modular architecture. modules are defined in yaml and can be extended | `-crawl` | web crawler (spider same-host links/scripts/forms) | | `-crawl-depth` | max crawl recursion depth (default 2) | | `-passive` | passive subdomain/url discovery (zero traffic to target) | +| `-probe` | live-host probe (status, title, server, redirect chain) | ### http options @@ -207,6 +208,22 @@ these apply to every outbound request across all scanners: a scanner that sets a header explicitly (e.g. an api key) always wins over the global default. +### report export + +write the run's findings out to a file for ci/cd or triage: + +| flag | description | +|------|-------------| +| `-sarif` | write a sarif 2.1.0 report to this file | +| `-markdown`, `-md` | write a markdown report to this file | + +```bash +# scan and emit both a sarif and markdown report +./sif -u https://example.com -headers -cors -sarif out.sarif -md out.md +``` + +sarif output is ingestable by github code scanning; markdown is a readable per-target summary. + ### yaml modules list available modules: diff --git a/docs/usage.md b/docs/usage.md index 55410ef..5bf504e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -242,6 +242,14 @@ keyless and zero traffic to the target itself - all lookups hit third-party feed ./sif -u https://example.com -passive ``` +### live-host probe + +`-probe` - check whether the target is alive and report its final status, page title, server header, content-length and the redirect chain it walked + +```bash +./sif -u https://example.com -probe +``` + ### whois lookup `-whois` - perform whois lookups @@ -363,6 +371,26 @@ cap outbound requests per second (0 = unlimited, default 0): ./sif -u https://example.com -rate-limit 20 ``` +## output options + +write the collected findings out to a file after the scan. both formats can be requested in the same run. + +### -sarif + +write a sarif 2.1.0 report (one run, tool `sif`, one result per finding). ingestable by github code scanning and other sarif consumers: + +```bash +./sif -u https://example.com -headers -cors -sarif out.sarif +``` + +### -md, --markdown + +write a readable markdown report grouped by target, then by module: + +```bash +./sif -u https://example.com -headers -cors -md report.md +``` + ## api options ### -api diff --git a/internal/config/config.go b/internal/config/config.go index 95f683c..7eeea8d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -61,6 +61,9 @@ type Settings struct { Crawl bool CrawlDepth int Passive bool + Probe bool + SARIF string // path to write a sarif 2.1.0 report to ("" = off) + Markdown string // path to write a markdown report to ("" = off) Modules string // Comma-separated list of module IDs to run ModuleTags string // Run modules matching these tags AllModules bool // Run all loaded modules @@ -140,6 +143,7 @@ func Parse() *Settings { flagSet.BoolVar(&settings.Crawl, "crawl", false, "Enable web crawling (spider same-host links/scripts/forms)"), flagSet.IntVar(&settings.CrawlDepth, "crawl-depth", defaultCrawlDepth, "Max crawl recursion depth"), flagSet.BoolVar(&settings.Passive, "passive", false, "Enable passive subdomain/url discovery (zero traffic to target)"), + flagSet.BoolVar(&settings.Probe, "probe", false, "Probe the target for liveness (status, title, server, redirect chain)"), ) flagSet.CreateGroup("runtime", "Runtime", @@ -157,6 +161,11 @@ func Parse() *Settings { flagSet.IntVar(&settings.RateLimit, "rate-limit", 0, "Max requests per second (0 = unlimited)"), ) + flagSet.CreateGroup("output", "Output", + flagSet.StringVar(&settings.SARIF, "sarif", "", "Write a SARIF 2.1.0 report to this file"), + flagSet.StringVarP(&settings.Markdown, "markdown", "md", "", "Write a markdown report to this file"), + ) + flagSet.CreateGroup("api", "API", flagSet.BoolVar(&settings.ApiMode, "api", false, "Enable API mode. Only useful for internal lunchcat usage"), ) diff --git a/internal/report/markdown.go b/internal/report/markdown.go new file mode 100644 index 0000000..44b1bb1 --- /dev/null +++ b/internal/report/markdown.go @@ -0,0 +1,74 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package report + +import ( + "bytes" + "encoding/json" + "sort" + "strings" +) + +// Markdown renders results as a readable report grouped by target, then by +// module, with each module's finding pretty-printed as a json code block. +func Markdown(results []Result) []byte { + var b strings.Builder + b.WriteString("# sif scan report\n\n") + + // group module results under their target so the report reads target-first + // regardless of the order results came in. + byTarget := make(map[string][]Result) + order := make([]string, 0) + for i := 0; i < len(results); i++ { + t := results[i].Target + if _, seen := byTarget[t]; !seen { + order = append(order, t) + } + byTarget[t] = append(byTarget[t], results[i]) + } + + for i := 0; i < len(order); i++ { + target := order[i] + b.WriteString("## ") + b.WriteString(target) + b.WriteString("\n\n") + + mods := byTarget[target] + // sort modules so the report is deterministic across runs + sort.SliceStable(mods, func(a, c int) bool { return mods[a].Module < mods[c].Module }) + + for j := 0; j < len(mods); j++ { + b.WriteString("### ") + b.WriteString(mods[j].Module) + b.WriteString("\n\n") + b.WriteString("```json\n") + b.WriteString(prettyJSON(mods[j].Data)) + b.WriteString("\n```\n\n") + } + } + + return []byte(b.String()) +} + +// prettyJSON re-indents the raw finding for readability; if it doesn't parse as +// json (shouldn't happen, but never trust it) the raw bytes are returned as-is. +func prettyJSON(raw json.RawMessage) string { + if len(raw) == 0 { + return "null" + } + var indented bytes.Buffer + if err := json.Indent(&indented, raw, "", " "); err != nil { + return string(raw) + } + return indented.String() +} diff --git a/internal/report/report.go b/internal/report/report.go new file mode 100644 index 0000000..e2999ea --- /dev/null +++ b/internal/report/report.go @@ -0,0 +1,26 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +// Package report serializes collected scan results to sarif and markdown. it's +// deliberately decoupled from the scan package: callers map their own results +// into report.Result, so report never imports a scanner type. +package report + +import "encoding/json" + +// Result is one module's output for one target. Data is whatever the scanner +// returned, carried as raw json so report stays free of scan types. +type Result struct { + Target string + Module string + Data json.RawMessage +} diff --git a/internal/report/report_test.go b/internal/report/report_test.go new file mode 100644 index 0000000..7e4762a --- /dev/null +++ b/internal/report/report_test.go @@ -0,0 +1,172 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package report + +import ( + "encoding/json" + "strings" + "testing" +) + +// fakeResults are a couple of representative findings across two targets used by +// every test below. +func fakeResults() []Result { + return []Result{ + {Target: "https://a.example.com", Module: "cors", Data: json.RawMessage(`{"severity":"high"}`)}, + {Target: "https://a.example.com", Module: "probe", Data: json.RawMessage(`{"status_code":200}`)}, + {Target: "https://b.example.com", Module: "redirect", Data: json.RawMessage(`{"parameter":"next"}`)}, + } +} + +func TestSARIF_ValidAndContainsFindings(t *testing.T) { + out, err := SARIF(fakeResults()) + if err != nil { + t.Fatalf("SARIF: %v", err) + } + + // the output must parse back into the sarif shape + var doc sarifLog + if err := json.Unmarshal(out, &doc); err != nil { + t.Fatalf("sarif output is not valid json: %v", err) + } + + if doc.Version != "2.1.0" { + t.Errorf("expected sarif version 2.1.0, got %q", doc.Version) + } + if len(doc.Runs) != 1 { + t.Fatalf("expected exactly one run, got %d", len(doc.Runs)) + } + run := doc.Runs[0] + if run.Tool.Driver.Name != "sif" { + t.Errorf("expected tool name sif, got %q", run.Tool.Driver.Name) + } + if len(run.Results) != 3 { + t.Fatalf("expected 3 results, got %d", len(run.Results)) + } + + // each finding's module id surfaces as the ruleId and its target as the uri + tests := []struct { + ruleID string + target string + }{ + {"cors", "https://a.example.com"}, + {"probe", "https://a.example.com"}, + {"redirect", "https://b.example.com"}, + } + for _, tt := range tests { + if !sarifHasResult(run.Results, tt.ruleID, tt.target) { + t.Errorf("expected sarif result rule=%q target=%q, got %+v", tt.ruleID, tt.target, run.Results) + } + } + + // rules list each module id once, deduped across targets + if len(run.Tool.Driver.Rules) != 3 { + t.Errorf("expected 3 deduped rules, got %d: %+v", len(run.Tool.Driver.Rules), run.Tool.Driver.Rules) + } +} + +func TestSARIF_DedupesRulesAcrossTargets(t *testing.T) { + // the same module on two targets must yield one rule but two results. + results := []Result{ + {Target: "https://a.example.com", Module: "cors", Data: json.RawMessage(`{}`)}, + {Target: "https://b.example.com", Module: "cors", Data: json.RawMessage(`{}`)}, + } + out, err := SARIF(results) + if err != nil { + t.Fatalf("SARIF: %v", err) + } + var doc sarifLog + if err := json.Unmarshal(out, &doc); err != nil { + t.Fatalf("invalid json: %v", err) + } + run := doc.Runs[0] + if len(run.Tool.Driver.Rules) != 1 { + t.Errorf("expected 1 deduped rule, got %d", len(run.Tool.Driver.Rules)) + } + if len(run.Results) != 2 { + t.Errorf("expected 2 results, got %d", len(run.Results)) + } +} + +func TestSARIF_Empty(t *testing.T) { + out, err := SARIF(nil) + if err != nil { + t.Fatalf("SARIF: %v", err) + } + var doc sarifLog + if err := json.Unmarshal(out, &doc); err != nil { + t.Fatalf("empty sarif is not valid json: %v", err) + } + if len(doc.Runs) != 1 { + t.Fatalf("expected one run even when empty, got %d", len(doc.Runs)) + } + if len(doc.Runs[0].Results) != 0 { + t.Errorf("expected no results, got %d", len(doc.Runs[0].Results)) + } +} + +func TestMarkdown_ContainsTargetsAndModules(t *testing.T) { + out := string(Markdown(fakeResults())) + + wants := []string{ + "# sif scan report", + "## https://a.example.com", + "## https://b.example.com", + "### cors", + "### probe", + "### redirect", + `"severity": "high"`, // re-indented finding body + `"parameter": "next"`, + } + for _, want := range wants { + if !strings.Contains(out, want) { + t.Errorf("markdown report missing %q\n---\n%s", want, out) + } + } +} + +func TestMarkdown_GroupsByTarget(t *testing.T) { + // a.example.com's two modules must both appear before b.example.com's header. + out := string(Markdown(fakeResults())) + aHeader := strings.Index(out, "## https://a.example.com") + bHeader := strings.Index(out, "## https://b.example.com") + if aHeader < 0 || bHeader < 0 { + t.Fatalf("missing target headers in:\n%s", out) + } + if aHeader > bHeader { + t.Errorf("expected target a before target b, got a=%d b=%d", aHeader, bHeader) + } + // both of a's modules sit between a's header and b's header + corsIdx := strings.Index(out, "### cors") + probeIdx := strings.Index(out, "### probe") + if corsIdx < aHeader || corsIdx > bHeader || probeIdx < aHeader || probeIdx > bHeader { + t.Errorf("expected a's modules grouped under a, cors=%d probe=%d (a=%d b=%d)", corsIdx, probeIdx, aHeader, bHeader) + } +} + +// sarifHasResult reports whether any result carries the given rule id and target +// uri, the pairing that proves a finding survived serialization. +func sarifHasResult(results []sarifResult, ruleID, target string) bool { + for i := 0; i < len(results); i++ { + r := results[i] + if r.RuleID != ruleID { + continue + } + for j := 0; j < len(r.Locations); j++ { + if r.Locations[j].PhysicalLocation.ArtifactLocation.URI == target { + return true + } + } + } + return false +} diff --git a/internal/report/sarif.go b/internal/report/sarif.go new file mode 100644 index 0000000..fc2baee --- /dev/null +++ b/internal/report/sarif.go @@ -0,0 +1,133 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package report + +import ( + "encoding/json" + "fmt" +) + +// sarif format/version constants pinned to the 2.1.0 schema so the output is +// ingestable by github code scanning and other sarif consumers. +const ( + sarifVersion = "2.1.0" + sarifSchema = "https://json.schemastore.org/sarif-2.1.0.json" + toolName = "sif" +) + +// sarifLog is the minimal valid 2.1.0 shape: one run from one tool. +type sarifLog struct { + Schema string `json:"$schema"` + Version string `json:"version"` + Runs []sarifRun `json:"runs"` +} + +type sarifRun struct { + Tool sarifTool `json:"tool"` + Results []sarifResult `json:"results"` +} + +type sarifTool struct { + Driver sarifDriver `json:"driver"` +} + +type sarifDriver struct { + Name string `json:"name"` + Rules []sarifRule `json:"rules"` +} + +type sarifRule struct { + ID string `json:"id"` +} + +type sarifResult struct { + RuleID string `json:"ruleId"` + Level string `json:"level"` + Message sarifMessage `json:"message"` + Locations []sarifLocation `json:"locations"` +} + +type sarifMessage struct { + Text string `json:"text"` +} + +type sarifLocation struct { + PhysicalLocation sarifPhysicalLocation `json:"physicalLocation"` +} + +type sarifPhysicalLocation struct { + ArtifactLocation sarifArtifactLocation `json:"artifactLocation"` +} + +type sarifArtifactLocation struct { + URI string `json:"uri"` +} + +// sarifLevel is the default severity for findings; sif results don't carry a +// uniform severity field, so "warning" is the neutral middle ground. +const sarifLevel = "warning" + +// SARIF serializes results to a minimal valid sarif 2.1.0 log. Each module +// result becomes one sarif result tagged with its module id (the rule) and the +// target uri, with the raw module data inlined into the message for context. +func SARIF(results []Result) ([]byte, error) { + sarifResults := make([]sarifResult, 0, len(results)) + ruleSet := make(map[string]struct{}, len(results)) + + for i := 0; i < len(results); i++ { + res := results[i] + ruleSet[res.Module] = struct{}{} + + sarifResults = append(sarifResults, sarifResult{ + RuleID: res.Module, + Level: sarifLevel, + Message: sarifMessage{Text: messageFor(res)}, + Locations: []sarifLocation{{ + PhysicalLocation: sarifPhysicalLocation{ + ArtifactLocation: sarifArtifactLocation{URI: res.Target}, + }, + }}, + }) + } + + // rules must list each id exactly once; build it from the set so duplicate + // modules across targets don't duplicate the rule. + rules := make([]sarifRule, 0, len(ruleSet)) + for id := range ruleSet { + rules = append(rules, sarifRule{ID: id}) + } + + doc := sarifLog{ + Schema: sarifSchema, + Version: sarifVersion, + Runs: []sarifRun{{ + Tool: sarifTool{Driver: sarifDriver{Name: toolName, Rules: rules}}, + Results: sarifResults, + }}, + } + + out, err := json.MarshalIndent(doc, "", " ") + if err != nil { + return nil, fmt.Errorf("marshal sarif: %w", err) + } + return out, nil +} + +// messageFor builds a human-readable result message: the module id plus the raw +// finding json so a sarif viewer shows what was actually found. +func messageFor(res Result) string { + if len(res.Data) == 0 { + return fmt.Sprintf("%s finding on %s", res.Module, res.Target) + } + return fmt.Sprintf("%s finding on %s: %s", res.Module, res.Target, string(res.Data)) +} diff --git a/internal/scan/integration_test.go b/internal/scan/integration_test.go index 0e86075..93f46b8 100644 --- a/internal/scan/integration_test.go +++ b/internal/scan/integration_test.go @@ -245,6 +245,22 @@ func TestIntegrationXSS(t *testing.T) { } } +func TestIntegrationProbe(t *testing.T) { + srv := newVulnApp() + defer srv.Close() + + result, err := Probe(srv.URL, 5*time.Second, "") + if err != nil { + t.Fatalf("Probe: %v", err) + } + if result == nil || !result.Alive { + t.Fatalf("expected the vuln app to be alive, got %+v", result) + } + if result.StatusCode != http.StatusOK { + t.Errorf("expected 200 from the homepage, got %d", result.StatusCode) + } +} + func TestIntegrationPorts(t *testing.T) { // a real listener stands in for an open port; a tiny server hands its number // to Ports via the commonPorts wordlist. diff --git a/internal/scan/probe.go b/internal/scan/probe.go new file mode 100644 index 0000000..0d78cce --- /dev/null +++ b/internal/scan/probe.go @@ -0,0 +1,148 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "context" + "fmt" + "io" + "net/http" + "regexp" + "strings" + "time" + + "github.com/dropalldatabases/sif/internal/httpx" + "github.com/dropalldatabases/sif/internal/logger" + "github.com/dropalldatabases/sif/internal/output" +) + +// ProbeResult is the httpx-style liveness snapshot for one target: did it answer, +// where did it land, and the few fingerprint fields worth keeping. +type ProbeResult struct { + URL string `json:"url"` + Alive bool `json:"alive"` + StatusCode int `json:"status_code"` + Title string `json:"title,omitempty"` + Server string `json:"server,omitempty"` + ContentLength int64 `json:"content_length"` + RedirectChain []string `json:"redirect_chain,omitempty"` +} + +// probeMaxRedirects caps the chain we'll follow so a redirect loop can't run +// forever; matches httpx's default depth. +const probeMaxRedirects = 10 + +// probeMaxBody bounds the body we read to extract a (64KB) so a hostile +// or huge response can't exhaust memory. +const probeMaxBody = 64 * 1024 + +// titleRe pulls the text out of the first <title>; DOTALL so a title spanning +// lines is still caught. +var titleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)`) + +// Probe checks whether the target is alive and reports its final status, page +// title, Server header, content-length and the redirect chain it walked. +func Probe(targetURL string, timeout time.Duration, logdir string) (*ProbeResult, error) { + log := output.Module("PROBE") + log.Start() + + sanitizedURL := stripScheme(targetURL) + + if logdir != "" { + if err := logger.WriteHeader(sanitizedURL, logdir, "Live-host probe"); err != nil { + log.Error("error creating log file: %v", err) + return nil, fmt.Errorf("create probe log: %w", err) + } + } + + // follow redirects but record every hop; the chain is half the value of a + // probe. capping at probeMaxRedirects stops a loop from spinning forever. + chain := make([]string, 0, 4) + client := httpx.Client(timeout) + client.CheckRedirect = func(req *http.Request, via []*http.Request) error { + if len(via) >= probeMaxRedirects { + return fmt.Errorf("stopped after %d redirects", probeMaxRedirects) + } + chain = append(chain, req.URL.String()) + return nil + } + + req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, targetURL, http.NoBody) + if err != nil { + return nil, fmt.Errorf("build probe request: %w", err) + } + + resp, err := client.Do(req) + if err != nil { + // a transport error means the host didn't answer; that's a dead probe, + // not a tool failure, so report it rather than bailing. + log.Warn("%s is dead: %v", output.Highlight.Render(sanitizedURL), err) + if logdir != "" { + logger.Write(sanitizedURL, logdir, fmt.Sprintf("dead: %v\n", err)) + } + result := &ProbeResult{URL: targetURL, Alive: false, RedirectChain: chain} + log.Complete(0, "alive") + return result, nil + } + defer resp.Body.Close() + + body, err := io.ReadAll(io.LimitReader(resp.Body, probeMaxBody)) + if err != nil { + return nil, fmt.Errorf("read probe body: %w", err) + } + + result := &ProbeResult{ + URL: targetURL, + Alive: true, + StatusCode: resp.StatusCode, + Title: extractTitle(body), + Server: resp.Header.Get("Server"), + ContentLength: resp.ContentLength, + RedirectChain: chain, + } + + log.Info("%s [%s] %s", + output.Status.Render(fmt.Sprintf("%d", result.StatusCode)), + output.Highlight.Render(result.Title), + output.Muted.Render(result.Server)) + if len(chain) > 0 { + log.Info("redirect chain: %s", strings.Join(chain, " -> ")) + } + + if logdir != "" { + logger.Write(sanitizedURL, logdir, + fmt.Sprintf("alive status=%d title=%q server=%q length=%d\n", + result.StatusCode, result.Title, result.Server, result.ContentLength)) + if len(chain) > 0 { + logger.Write(sanitizedURL, logdir, "redirect chain: "+strings.Join(chain, " -> ")+"\n") + } + } + + log.Complete(1, "alive") + return result, nil +} + +// extractTitle returns the trimmed text of the first in body, or "" when +// there isn't one. +func extractTitle(body []byte) string { + m := titleRe.FindSubmatch(body) + if len(m) < 2 { + return "" + } + return strings.TrimSpace(string(m[1])) +} + +// ResultType identifies probe results for the result registry. +func (r *ProbeResult) ResultType() string { return "probe" } + +var _ ScanResult = (*ProbeResult)(nil) diff --git a/internal/scan/probe_test.go b/internal/scan/probe_test.go new file mode 100644 index 0000000..246e8db --- /dev/null +++ b/internal/scan/probe_test.go @@ -0,0 +1,133 @@ +/* +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +: : +: █▀ █ █▀▀ · Blazing-fast pentesting suite : +: ▄█ █ █▀ · BSD 3-Clause License : +: : +: (c) 2022-2026 vmfunc, xyzeva, : +: lunchcat alumni & contributors : +: : +·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━· +*/ + +package scan + +import ( + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestProbe_TitleServerStatus(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Server", "nginx/1.25.3") + w.WriteHeader(http.StatusOK) + w.Write([]byte("<html><head><title> Welcome Home hi")) + })) + defer srv.Close() + + result, err := Probe(srv.URL, 5*time.Second, "") + if err != nil { + t.Fatalf("Probe: %v", err) + } + if !result.Alive { + t.Fatalf("expected alive, got %+v", result) + } + if result.StatusCode != http.StatusOK { + t.Errorf("expected status 200, got %d", result.StatusCode) + } + // title text is trimmed of surrounding whitespace + if result.Title != "Welcome Home" { + t.Errorf("expected trimmed title, got %q", result.Title) + } + if result.Server != "nginx/1.25.3" { + t.Errorf("expected server header, got %q", result.Server) + } +} + +func TestProbe_RedirectChain(t *testing.T) { + // /a -> /b -> /c(final); the chain should record both intermediate hops the + // client followed before landing on the final 200. + mux := http.NewServeMux() + mux.HandleFunc("/a", func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, "/b", http.StatusFound) + }) + mux.HandleFunc("/b", func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, "/c", http.StatusMovedPermanently) + }) + mux.HandleFunc("/c", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("final")) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + + result, err := Probe(srv.URL+"/a", 5*time.Second, "") + if err != nil { + t.Fatalf("Probe: %v", err) + } + if !result.Alive || result.StatusCode != http.StatusOK { + t.Fatalf("expected alive 200 after redirects, got %+v", result) + } + if result.Title != "final" { + t.Errorf("expected title of final hop, got %q", result.Title) + } + // two hops were followed (/b and /c are the urls requested after the first) + if len(result.RedirectChain) != 2 { + t.Fatalf("expected 2 redirect hops, got %d: %v", len(result.RedirectChain), result.RedirectChain) + } + if !hasSuffix(result.RedirectChain[0], "/b") || !hasSuffix(result.RedirectChain[1], "/c") { + t.Errorf("expected chain to walk /b then /c, got %v", result.RedirectChain) + } +} + +func TestProbe_DeadHost(t *testing.T) { + // a server we immediately close so the dial fails; a dead host is a reported + // result, not an error. + srv := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) {})) + deadURL := srv.URL + srv.Close() + + result, err := Probe(deadURL, 2*time.Second, "") + if err != nil { + t.Fatalf("Probe should not error on a dead host: %v", err) + } + if result.Alive { + t.Errorf("expected dead host, got %+v", result) + } +} + +func TestProbe_ExtractTitle(t *testing.T) { + tests := []struct { + name string + body string + want string + }{ + {"simple", "hello", "hello"}, + {"trimmed", " spaced ", "spaced"}, + {"attrs", `attr`, "attr"}, + {"multiline", "line one\nline two", "line one\nline two"}, + {"none", "no title", ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractTitle([]byte(tt.body)) + if got != tt.want { + t.Errorf("extractTitle(%q) = %q, want %q", tt.body, got, tt.want) + } + }) + } +} + +func TestProbeResult_ResultType(t *testing.T) { + r := &ProbeResult{} + if r.ResultType() != "probe" { + t.Errorf("expected result type 'probe', got %q", r.ResultType()) + } +} + +// hasSuffix is a tiny local helper so the redirect-chain assertions read clearly. +func hasSuffix(s, suffix string) bool { + return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix +} diff --git a/man/sif.1 b/man/sif.1 index f763167..e2f2199 100644 --- a/man/sif.1 +++ b/man/sif.1 @@ -131,6 +131,9 @@ max crawl recursion depth (default 2). .B \-passive passive subdomain and historical url discovery from third\-party feeds (zero traffic to the target). .TP +.B \-probe +live\-host probe; reports liveness, final status, page title, server header and the redirect chain. +.TP .B \-noscan skip the base url scan (robots.txt, etc). .SH OPTIONS @@ -162,6 +165,12 @@ cookie header to send with every request. .BR \-rate\-limit " \fIn\fR" cap outbound requests per second (0 = unlimited, default 0). .TP +.BR \-sarif " \fIfile\fR" +write a sarif 2.1.0 report of the run to \fIfile\fR. +.TP +.BR \-md ", " \-\-markdown " \fIfile\fR" +write a markdown report of the run to \fIfile\fR. +.TP .B \-api emit json results and suppress the interactive output. .SH MODULES diff --git a/sif.go b/sif.go index 90d0174..3f1746c 100644 --- a/sif.go +++ b/sif.go @@ -29,6 +29,7 @@ import ( "github.com/dropalldatabases/sif/internal/logger" "github.com/dropalldatabases/sif/internal/modules" "github.com/dropalldatabases/sif/internal/output" + "github.com/dropalldatabases/sif/internal/report" "github.com/dropalldatabases/sif/internal/scan" "github.com/dropalldatabases/sif/internal/scan/builtin" "github.com/dropalldatabases/sif/internal/scan/frameworks" @@ -46,6 +47,10 @@ type App struct { // Version is set by main to the resolved build version and shown on the banner. var Version = "dev" +// reportFileMode is the permission applied to written report files: owner +// read/write, group/other read. reports aren't secret but may name targets. +const reportFileMode = 0o644 + type UrlResult struct { Url string `json:"url"` Results []ModuleResult @@ -204,6 +209,12 @@ func (app *App) Run() error { scansRun := make([]string, 0, 16) + // accumulate every module result across targets so the report writers can + // serialize the full run after the loop. only collected when an export flag + // is set, so the common path pays nothing. + wantReport := app.settings.SARIF != "" || app.settings.Markdown != "" + reportResults := make([]report.Result, 0, 16) + for _, url := range app.targets { output.Info("Starting scan on %s", output.Highlight.Render(url)) @@ -450,6 +461,16 @@ func (app *App) Run() error { } } + if app.settings.Probe { + result, err := scan.Probe(url, app.settings.Timeout, app.settings.LogDir) + if err != nil { + log.Errorf("Error while running probe: %s", err) + } else if result != nil { + moduleResults = append(moduleResults, NewModuleResult(result)) + scansRun = append(scansRun, "Probe") + } + } + // Load and run modules if app.settings.AllModules || app.settings.Modules != "" || app.settings.ModuleTags != "" { loader, err := modules.NewLoader() @@ -520,6 +541,16 @@ func (app *App) Run() error { } fmt.Println(string(marshalled)) } + + if wantReport { + reportResults = append(reportResults, collectReportResults(url, moduleResults)...) + } + } + + if wantReport { + if err := app.writeReports(reportResults); err != nil { + return err + } } if !app.settings.ApiMode { @@ -529,6 +560,48 @@ func (app *App) Run() error { return nil } +// collectReportResults flattens one target's module results into the report +// model, carrying each finding as raw json so the report package stays free of +// scan types. a result that won't marshal is skipped rather than failing the run. +func collectReportResults(target string, moduleResults []ModuleResult) []report.Result { + out := make([]report.Result, 0, len(moduleResults)) + for _, mr := range moduleResults { + data, err := json.Marshal(mr.Data) + if err != nil { + log.Warnf("report: skipping %s result for %s: %v", mr.Id, target, err) + continue + } + out = append(out, report.Result{Target: target, Module: mr.Id, Data: data}) + } + return out +} + +// writeReports serializes the collected results to the requested export files. +// each writer runs independently so a bad path for one format doesn't suppress +// the other. +func (app *App) writeReports(results []report.Result) error { + if path := app.settings.SARIF; path != "" { + data, err := report.SARIF(results) + if err != nil { + return fmt.Errorf("build sarif report: %w", err) + } + if err := os.WriteFile(path, data, reportFileMode); err != nil { + return fmt.Errorf("write sarif report %q: %w", path, err) + } + output.Success("sarif report written to %s", path) + } + + if path := app.settings.Markdown; path != "" { + data := report.Markdown(results) + if err := os.WriteFile(path, data, reportFileMode); err != nil { + return fmt.Errorf("write markdown report %q: %w", path, err) + } + output.Success("markdown report written to %s", path) + } + + return nil +} + // expandTargets queries SecurityTrails for each original target and returns // newly discovered domains (subdomains + associated) for target expansion func (app *App) expandTargets() []string {