feat: live-host probe and sarif/markdown report export

adds an httpx-style -probe scanner reporting liveness, final status, page
title, server header and the redirect chain, plus -sarif/-markdown export
flags that serialize the collected run after the scan loop. the report
serializers live in a decoupled internal/report package consuming a raw-json
result model so they never import scan types.
This commit is contained in:
vmfunc
2026-06-10 14:45:21 -07:00
parent 5050900f29
commit c3a755f934
12 changed files with 838 additions and 0 deletions
+17
View File
@@ -188,6 +188,7 @@ sif has a modular architecture. modules are defined in yaml and can be extended
| `-crawl` | web crawler (spider same-host links/scripts/forms) |
| `-crawl-depth` | max crawl recursion depth (default 2) |
| `-passive` | passive subdomain/url discovery (zero traffic to target) |
| `-probe` | live-host probe (status, title, server, redirect chain) |
### http options
@@ -207,6 +208,22 @@ these apply to every outbound request across all scanners:
a scanner that sets a header explicitly (e.g. an api key) always wins over the global default.
### report export
write the run's findings out to a file for ci/cd or triage:
| flag | description |
|------|-------------|
| `-sarif` | write a sarif 2.1.0 report to this file |
| `-markdown`, `-md` | write a markdown report to this file |
```bash
# scan and emit both a sarif and markdown report
./sif -u https://example.com -headers -cors -sarif out.sarif -md out.md
```
sarif output is ingestable by github code scanning; markdown is a readable per-target summary.
### yaml modules
list available modules:
+28
View File
@@ -242,6 +242,14 @@ keyless and zero traffic to the target itself - all lookups hit third-party feed
./sif -u https://example.com -passive
```
### live-host probe
`-probe` - check whether the target is alive and report its final status, page title, server header, content-length and the redirect chain it walked
```bash
./sif -u https://example.com -probe
```
### whois lookup
`-whois` - perform whois lookups
@@ -363,6 +371,26 @@ cap outbound requests per second (0 = unlimited, default 0):
./sif -u https://example.com -rate-limit 20
```
## output options
write the collected findings out to a file after the scan. both formats can be requested in the same run.
### -sarif
write a sarif 2.1.0 report (one run, tool `sif`, one result per finding). ingestable by github code scanning and other sarif consumers:
```bash
./sif -u https://example.com -headers -cors -sarif out.sarif
```
### -md, --markdown
write a readable markdown report grouped by target, then by module:
```bash
./sif -u https://example.com -headers -cors -md report.md
```
## api options
### -api
+9
View File
@@ -61,6 +61,9 @@ type Settings struct {
Crawl bool
CrawlDepth int
Passive bool
Probe bool
SARIF string // path to write a sarif 2.1.0 report to ("" = off)
Markdown string // path to write a markdown report to ("" = off)
Modules string // Comma-separated list of module IDs to run
ModuleTags string // Run modules matching these tags
AllModules bool // Run all loaded modules
@@ -140,6 +143,7 @@ func Parse() *Settings {
flagSet.BoolVar(&settings.Crawl, "crawl", false, "Enable web crawling (spider same-host links/scripts/forms)"),
flagSet.IntVar(&settings.CrawlDepth, "crawl-depth", defaultCrawlDepth, "Max crawl recursion depth"),
flagSet.BoolVar(&settings.Passive, "passive", false, "Enable passive subdomain/url discovery (zero traffic to target)"),
flagSet.BoolVar(&settings.Probe, "probe", false, "Probe the target for liveness (status, title, server, redirect chain)"),
)
flagSet.CreateGroup("runtime", "Runtime",
@@ -157,6 +161,11 @@ func Parse() *Settings {
flagSet.IntVar(&settings.RateLimit, "rate-limit", 0, "Max requests per second (0 = unlimited)"),
)
flagSet.CreateGroup("output", "Output",
flagSet.StringVar(&settings.SARIF, "sarif", "", "Write a SARIF 2.1.0 report to this file"),
flagSet.StringVarP(&settings.Markdown, "markdown", "md", "", "Write a markdown report to this file"),
)
flagSet.CreateGroup("api", "API",
flagSet.BoolVar(&settings.ApiMode, "api", false, "Enable API mode. Only useful for internal lunchcat usage"),
)
+74
View File
@@ -0,0 +1,74 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package report
import (
"bytes"
"encoding/json"
"sort"
"strings"
)
// Markdown renders results as a readable report grouped by target, then by
// module, with each module's finding pretty-printed as a json code block.
func Markdown(results []Result) []byte {
var b strings.Builder
b.WriteString("# sif scan report\n\n")
// group module results under their target so the report reads target-first
// regardless of the order results came in.
byTarget := make(map[string][]Result)
order := make([]string, 0)
for i := 0; i < len(results); i++ {
t := results[i].Target
if _, seen := byTarget[t]; !seen {
order = append(order, t)
}
byTarget[t] = append(byTarget[t], results[i])
}
for i := 0; i < len(order); i++ {
target := order[i]
b.WriteString("## ")
b.WriteString(target)
b.WriteString("\n\n")
mods := byTarget[target]
// sort modules so the report is deterministic across runs
sort.SliceStable(mods, func(a, c int) bool { return mods[a].Module < mods[c].Module })
for j := 0; j < len(mods); j++ {
b.WriteString("### ")
b.WriteString(mods[j].Module)
b.WriteString("\n\n")
b.WriteString("```json\n")
b.WriteString(prettyJSON(mods[j].Data))
b.WriteString("\n```\n\n")
}
}
return []byte(b.String())
}
// prettyJSON re-indents the raw finding for readability; if it doesn't parse as
// json (shouldn't happen, but never trust it) the raw bytes are returned as-is.
func prettyJSON(raw json.RawMessage) string {
if len(raw) == 0 {
return "null"
}
var indented bytes.Buffer
if err := json.Indent(&indented, raw, "", " "); err != nil {
return string(raw)
}
return indented.String()
}
+26
View File
@@ -0,0 +1,26 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
// Package report serializes collected scan results to sarif and markdown. it's
// deliberately decoupled from the scan package: callers map their own results
// into report.Result, so report never imports a scanner type.
package report
import "encoding/json"
// Result is one module's output for one target. Data is whatever the scanner
// returned, carried as raw json so report stays free of scan types.
type Result struct {
Target string
Module string
Data json.RawMessage
}
+172
View File
@@ -0,0 +1,172 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package report
import (
"encoding/json"
"strings"
"testing"
)
// fakeResults are a couple of representative findings across two targets used by
// every test below.
func fakeResults() []Result {
return []Result{
{Target: "https://a.example.com", Module: "cors", Data: json.RawMessage(`{"severity":"high"}`)},
{Target: "https://a.example.com", Module: "probe", Data: json.RawMessage(`{"status_code":200}`)},
{Target: "https://b.example.com", Module: "redirect", Data: json.RawMessage(`{"parameter":"next"}`)},
}
}
func TestSARIF_ValidAndContainsFindings(t *testing.T) {
out, err := SARIF(fakeResults())
if err != nil {
t.Fatalf("SARIF: %v", err)
}
// the output must parse back into the sarif shape
var doc sarifLog
if err := json.Unmarshal(out, &doc); err != nil {
t.Fatalf("sarif output is not valid json: %v", err)
}
if doc.Version != "2.1.0" {
t.Errorf("expected sarif version 2.1.0, got %q", doc.Version)
}
if len(doc.Runs) != 1 {
t.Fatalf("expected exactly one run, got %d", len(doc.Runs))
}
run := doc.Runs[0]
if run.Tool.Driver.Name != "sif" {
t.Errorf("expected tool name sif, got %q", run.Tool.Driver.Name)
}
if len(run.Results) != 3 {
t.Fatalf("expected 3 results, got %d", len(run.Results))
}
// each finding's module id surfaces as the ruleId and its target as the uri
tests := []struct {
ruleID string
target string
}{
{"cors", "https://a.example.com"},
{"probe", "https://a.example.com"},
{"redirect", "https://b.example.com"},
}
for _, tt := range tests {
if !sarifHasResult(run.Results, tt.ruleID, tt.target) {
t.Errorf("expected sarif result rule=%q target=%q, got %+v", tt.ruleID, tt.target, run.Results)
}
}
// rules list each module id once, deduped across targets
if len(run.Tool.Driver.Rules) != 3 {
t.Errorf("expected 3 deduped rules, got %d: %+v", len(run.Tool.Driver.Rules), run.Tool.Driver.Rules)
}
}
func TestSARIF_DedupesRulesAcrossTargets(t *testing.T) {
// the same module on two targets must yield one rule but two results.
results := []Result{
{Target: "https://a.example.com", Module: "cors", Data: json.RawMessage(`{}`)},
{Target: "https://b.example.com", Module: "cors", Data: json.RawMessage(`{}`)},
}
out, err := SARIF(results)
if err != nil {
t.Fatalf("SARIF: %v", err)
}
var doc sarifLog
if err := json.Unmarshal(out, &doc); err != nil {
t.Fatalf("invalid json: %v", err)
}
run := doc.Runs[0]
if len(run.Tool.Driver.Rules) != 1 {
t.Errorf("expected 1 deduped rule, got %d", len(run.Tool.Driver.Rules))
}
if len(run.Results) != 2 {
t.Errorf("expected 2 results, got %d", len(run.Results))
}
}
func TestSARIF_Empty(t *testing.T) {
out, err := SARIF(nil)
if err != nil {
t.Fatalf("SARIF: %v", err)
}
var doc sarifLog
if err := json.Unmarshal(out, &doc); err != nil {
t.Fatalf("empty sarif is not valid json: %v", err)
}
if len(doc.Runs) != 1 {
t.Fatalf("expected one run even when empty, got %d", len(doc.Runs))
}
if len(doc.Runs[0].Results) != 0 {
t.Errorf("expected no results, got %d", len(doc.Runs[0].Results))
}
}
func TestMarkdown_ContainsTargetsAndModules(t *testing.T) {
out := string(Markdown(fakeResults()))
wants := []string{
"# sif scan report",
"## https://a.example.com",
"## https://b.example.com",
"### cors",
"### probe",
"### redirect",
`"severity": "high"`, // re-indented finding body
`"parameter": "next"`,
}
for _, want := range wants {
if !strings.Contains(out, want) {
t.Errorf("markdown report missing %q\n---\n%s", want, out)
}
}
}
func TestMarkdown_GroupsByTarget(t *testing.T) {
// a.example.com's two modules must both appear before b.example.com's header.
out := string(Markdown(fakeResults()))
aHeader := strings.Index(out, "## https://a.example.com")
bHeader := strings.Index(out, "## https://b.example.com")
if aHeader < 0 || bHeader < 0 {
t.Fatalf("missing target headers in:\n%s", out)
}
if aHeader > bHeader {
t.Errorf("expected target a before target b, got a=%d b=%d", aHeader, bHeader)
}
// both of a's modules sit between a's header and b's header
corsIdx := strings.Index(out, "### cors")
probeIdx := strings.Index(out, "### probe")
if corsIdx < aHeader || corsIdx > bHeader || probeIdx < aHeader || probeIdx > bHeader {
t.Errorf("expected a's modules grouped under a, cors=%d probe=%d (a=%d b=%d)", corsIdx, probeIdx, aHeader, bHeader)
}
}
// sarifHasResult reports whether any result carries the given rule id and target
// uri, the pairing that proves a finding survived serialization.
func sarifHasResult(results []sarifResult, ruleID, target string) bool {
for i := 0; i < len(results); i++ {
r := results[i]
if r.RuleID != ruleID {
continue
}
for j := 0; j < len(r.Locations); j++ {
if r.Locations[j].PhysicalLocation.ArtifactLocation.URI == target {
return true
}
}
}
return false
}
+133
View File
@@ -0,0 +1,133 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package report
import (
"encoding/json"
"fmt"
)
// sarif format/version constants pinned to the 2.1.0 schema so the output is
// ingestable by github code scanning and other sarif consumers.
const (
sarifVersion = "2.1.0"
sarifSchema = "https://json.schemastore.org/sarif-2.1.0.json"
toolName = "sif"
)
// sarifLog is the minimal valid 2.1.0 shape: one run from one tool.
type sarifLog struct {
Schema string `json:"$schema"`
Version string `json:"version"`
Runs []sarifRun `json:"runs"`
}
type sarifRun struct {
Tool sarifTool `json:"tool"`
Results []sarifResult `json:"results"`
}
type sarifTool struct {
Driver sarifDriver `json:"driver"`
}
type sarifDriver struct {
Name string `json:"name"`
Rules []sarifRule `json:"rules"`
}
type sarifRule struct {
ID string `json:"id"`
}
type sarifResult struct {
RuleID string `json:"ruleId"`
Level string `json:"level"`
Message sarifMessage `json:"message"`
Locations []sarifLocation `json:"locations"`
}
type sarifMessage struct {
Text string `json:"text"`
}
type sarifLocation struct {
PhysicalLocation sarifPhysicalLocation `json:"physicalLocation"`
}
type sarifPhysicalLocation struct {
ArtifactLocation sarifArtifactLocation `json:"artifactLocation"`
}
type sarifArtifactLocation struct {
URI string `json:"uri"`
}
// sarifLevel is the default severity for findings; sif results don't carry a
// uniform severity field, so "warning" is the neutral middle ground.
const sarifLevel = "warning"
// SARIF serializes results to a minimal valid sarif 2.1.0 log. Each module
// result becomes one sarif result tagged with its module id (the rule) and the
// target uri, with the raw module data inlined into the message for context.
func SARIF(results []Result) ([]byte, error) {
sarifResults := make([]sarifResult, 0, len(results))
ruleSet := make(map[string]struct{}, len(results))
for i := 0; i < len(results); i++ {
res := results[i]
ruleSet[res.Module] = struct{}{}
sarifResults = append(sarifResults, sarifResult{
RuleID: res.Module,
Level: sarifLevel,
Message: sarifMessage{Text: messageFor(res)},
Locations: []sarifLocation{{
PhysicalLocation: sarifPhysicalLocation{
ArtifactLocation: sarifArtifactLocation{URI: res.Target},
},
}},
})
}
// rules must list each id exactly once; build it from the set so duplicate
// modules across targets don't duplicate the rule.
rules := make([]sarifRule, 0, len(ruleSet))
for id := range ruleSet {
rules = append(rules, sarifRule{ID: id})
}
doc := sarifLog{
Schema: sarifSchema,
Version: sarifVersion,
Runs: []sarifRun{{
Tool: sarifTool{Driver: sarifDriver{Name: toolName, Rules: rules}},
Results: sarifResults,
}},
}
out, err := json.MarshalIndent(doc, "", " ")
if err != nil {
return nil, fmt.Errorf("marshal sarif: %w", err)
}
return out, nil
}
// messageFor builds a human-readable result message: the module id plus the raw
// finding json so a sarif viewer shows what was actually found.
func messageFor(res Result) string {
if len(res.Data) == 0 {
return fmt.Sprintf("%s finding on %s", res.Module, res.Target)
}
return fmt.Sprintf("%s finding on %s: %s", res.Module, res.Target, string(res.Data))
}
+16
View File
@@ -245,6 +245,22 @@ func TestIntegrationXSS(t *testing.T) {
}
}
func TestIntegrationProbe(t *testing.T) {
srv := newVulnApp()
defer srv.Close()
result, err := Probe(srv.URL, 5*time.Second, "")
if err != nil {
t.Fatalf("Probe: %v", err)
}
if result == nil || !result.Alive {
t.Fatalf("expected the vuln app to be alive, got %+v", result)
}
if result.StatusCode != http.StatusOK {
t.Errorf("expected 200 from the homepage, got %d", result.StatusCode)
}
}
func TestIntegrationPorts(t *testing.T) {
// a real listener stands in for an open port; a tiny server hands its number
// to Ports via the commonPorts wordlist.
+148
View File
@@ -0,0 +1,148 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package scan
import (
"context"
"fmt"
"io"
"net/http"
"regexp"
"strings"
"time"
"github.com/dropalldatabases/sif/internal/httpx"
"github.com/dropalldatabases/sif/internal/logger"
"github.com/dropalldatabases/sif/internal/output"
)
// ProbeResult is the httpx-style liveness snapshot for one target: did it answer,
// where did it land, and the few fingerprint fields worth keeping.
type ProbeResult struct {
URL string `json:"url"`
Alive bool `json:"alive"`
StatusCode int `json:"status_code"`
Title string `json:"title,omitempty"`
Server string `json:"server,omitempty"`
ContentLength int64 `json:"content_length"`
RedirectChain []string `json:"redirect_chain,omitempty"`
}
// probeMaxRedirects caps the chain we'll follow so a redirect loop can't run
// forever; matches httpx's default depth.
const probeMaxRedirects = 10
// probeMaxBody bounds the body we read to extract a <title> (64KB) so a hostile
// or huge response can't exhaust memory.
const probeMaxBody = 64 * 1024
// titleRe pulls the text out of the first <title>; DOTALL so a title spanning
// lines is still caught.
var titleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
// Probe checks whether the target is alive and reports its final status, page
// title, Server header, content-length and the redirect chain it walked.
func Probe(targetURL string, timeout time.Duration, logdir string) (*ProbeResult, error) {
log := output.Module("PROBE")
log.Start()
sanitizedURL := stripScheme(targetURL)
if logdir != "" {
if err := logger.WriteHeader(sanitizedURL, logdir, "Live-host probe"); err != nil {
log.Error("error creating log file: %v", err)
return nil, fmt.Errorf("create probe log: %w", err)
}
}
// follow redirects but record every hop; the chain is half the value of a
// probe. capping at probeMaxRedirects stops a loop from spinning forever.
chain := make([]string, 0, 4)
client := httpx.Client(timeout)
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
if len(via) >= probeMaxRedirects {
return fmt.Errorf("stopped after %d redirects", probeMaxRedirects)
}
chain = append(chain, req.URL.String())
return nil
}
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, targetURL, http.NoBody)
if err != nil {
return nil, fmt.Errorf("build probe request: %w", err)
}
resp, err := client.Do(req)
if err != nil {
// a transport error means the host didn't answer; that's a dead probe,
// not a tool failure, so report it rather than bailing.
log.Warn("%s is dead: %v", output.Highlight.Render(sanitizedURL), err)
if logdir != "" {
logger.Write(sanitizedURL, logdir, fmt.Sprintf("dead: %v\n", err))
}
result := &ProbeResult{URL: targetURL, Alive: false, RedirectChain: chain}
log.Complete(0, "alive")
return result, nil
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, probeMaxBody))
if err != nil {
return nil, fmt.Errorf("read probe body: %w", err)
}
result := &ProbeResult{
URL: targetURL,
Alive: true,
StatusCode: resp.StatusCode,
Title: extractTitle(body),
Server: resp.Header.Get("Server"),
ContentLength: resp.ContentLength,
RedirectChain: chain,
}
log.Info("%s [%s] %s",
output.Status.Render(fmt.Sprintf("%d", result.StatusCode)),
output.Highlight.Render(result.Title),
output.Muted.Render(result.Server))
if len(chain) > 0 {
log.Info("redirect chain: %s", strings.Join(chain, " -> "))
}
if logdir != "" {
logger.Write(sanitizedURL, logdir,
fmt.Sprintf("alive status=%d title=%q server=%q length=%d\n",
result.StatusCode, result.Title, result.Server, result.ContentLength))
if len(chain) > 0 {
logger.Write(sanitizedURL, logdir, "redirect chain: "+strings.Join(chain, " -> ")+"\n")
}
}
log.Complete(1, "alive")
return result, nil
}
// extractTitle returns the trimmed text of the first <title> in body, or "" when
// there isn't one.
func extractTitle(body []byte) string {
m := titleRe.FindSubmatch(body)
if len(m) < 2 {
return ""
}
return strings.TrimSpace(string(m[1]))
}
// ResultType identifies probe results for the result registry.
func (r *ProbeResult) ResultType() string { return "probe" }
var _ ScanResult = (*ProbeResult)(nil)
+133
View File
@@ -0,0 +1,133 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package scan
import (
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestProbe_TitleServerStatus(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Server", "nginx/1.25.3")
w.WriteHeader(http.StatusOK)
w.Write([]byte("<html><head><title> Welcome Home </title></head><body>hi</body></html>"))
}))
defer srv.Close()
result, err := Probe(srv.URL, 5*time.Second, "")
if err != nil {
t.Fatalf("Probe: %v", err)
}
if !result.Alive {
t.Fatalf("expected alive, got %+v", result)
}
if result.StatusCode != http.StatusOK {
t.Errorf("expected status 200, got %d", result.StatusCode)
}
// title text is trimmed of surrounding whitespace
if result.Title != "Welcome Home" {
t.Errorf("expected trimmed title, got %q", result.Title)
}
if result.Server != "nginx/1.25.3" {
t.Errorf("expected server header, got %q", result.Server)
}
}
func TestProbe_RedirectChain(t *testing.T) {
// /a -> /b -> /c(final); the chain should record both intermediate hops the
// client followed before landing on the final 200.
mux := http.NewServeMux()
mux.HandleFunc("/a", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/b", http.StatusFound)
})
mux.HandleFunc("/b", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/c", http.StatusMovedPermanently)
})
mux.HandleFunc("/c", func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte("<title>final</title>"))
})
srv := httptest.NewServer(mux)
defer srv.Close()
result, err := Probe(srv.URL+"/a", 5*time.Second, "")
if err != nil {
t.Fatalf("Probe: %v", err)
}
if !result.Alive || result.StatusCode != http.StatusOK {
t.Fatalf("expected alive 200 after redirects, got %+v", result)
}
if result.Title != "final" {
t.Errorf("expected title of final hop, got %q", result.Title)
}
// two hops were followed (/b and /c are the urls requested after the first)
if len(result.RedirectChain) != 2 {
t.Fatalf("expected 2 redirect hops, got %d: %v", len(result.RedirectChain), result.RedirectChain)
}
if !hasSuffix(result.RedirectChain[0], "/b") || !hasSuffix(result.RedirectChain[1], "/c") {
t.Errorf("expected chain to walk /b then /c, got %v", result.RedirectChain)
}
}
func TestProbe_DeadHost(t *testing.T) {
// a server we immediately close so the dial fails; a dead host is a reported
// result, not an error.
srv := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) {}))
deadURL := srv.URL
srv.Close()
result, err := Probe(deadURL, 2*time.Second, "")
if err != nil {
t.Fatalf("Probe should not error on a dead host: %v", err)
}
if result.Alive {
t.Errorf("expected dead host, got %+v", result)
}
}
func TestProbe_ExtractTitle(t *testing.T) {
tests := []struct {
name string
body string
want string
}{
{"simple", "<title>hello</title>", "hello"},
{"trimmed", "<title> spaced </title>", "spaced"},
{"attrs", `<title lang="en">attr</title>`, "attr"},
{"multiline", "<title>line one\nline two</title>", "line one\nline two"},
{"none", "<html><body>no title</body></html>", ""},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := extractTitle([]byte(tt.body))
if got != tt.want {
t.Errorf("extractTitle(%q) = %q, want %q", tt.body, got, tt.want)
}
})
}
}
func TestProbeResult_ResultType(t *testing.T) {
r := &ProbeResult{}
if r.ResultType() != "probe" {
t.Errorf("expected result type 'probe', got %q", r.ResultType())
}
}
// hasSuffix is a tiny local helper so the redirect-chain assertions read clearly.
func hasSuffix(s, suffix string) bool {
return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
}
+9
View File
@@ -131,6 +131,9 @@ max crawl recursion depth (default 2).
.B \-passive
passive subdomain and historical url discovery from third\-party feeds (zero traffic to the target).
.TP
.B \-probe
live\-host probe; reports liveness, final status, page title, server header and the redirect chain.
.TP
.B \-noscan
skip the base url scan (robots.txt, etc).
.SH OPTIONS
@@ -162,6 +165,12 @@ cookie header to send with every request.
.BR \-rate\-limit " \fIn\fR"
cap outbound requests per second (0 = unlimited, default 0).
.TP
.BR \-sarif " \fIfile\fR"
write a sarif 2.1.0 report of the run to \fIfile\fR.
.TP
.BR \-md ", " \-\-markdown " \fIfile\fR"
write a markdown report of the run to \fIfile\fR.
.TP
.B \-api
emit json results and suppress the interactive output.
.SH MODULES
+73
View File
@@ -29,6 +29,7 @@ import (
"github.com/dropalldatabases/sif/internal/logger"
"github.com/dropalldatabases/sif/internal/modules"
"github.com/dropalldatabases/sif/internal/output"
"github.com/dropalldatabases/sif/internal/report"
"github.com/dropalldatabases/sif/internal/scan"
"github.com/dropalldatabases/sif/internal/scan/builtin"
"github.com/dropalldatabases/sif/internal/scan/frameworks"
@@ -46,6 +47,10 @@ type App struct {
// Version is set by main to the resolved build version and shown on the banner.
var Version = "dev"
// reportFileMode is the permission applied to written report files: owner
// read/write, group/other read. reports aren't secret but may name targets.
const reportFileMode = 0o644
type UrlResult struct {
Url string `json:"url"`
Results []ModuleResult
@@ -204,6 +209,12 @@ func (app *App) Run() error {
scansRun := make([]string, 0, 16)
// accumulate every module result across targets so the report writers can
// serialize the full run after the loop. only collected when an export flag
// is set, so the common path pays nothing.
wantReport := app.settings.SARIF != "" || app.settings.Markdown != ""
reportResults := make([]report.Result, 0, 16)
for _, url := range app.targets {
output.Info("Starting scan on %s", output.Highlight.Render(url))
@@ -450,6 +461,16 @@ func (app *App) Run() error {
}
}
if app.settings.Probe {
result, err := scan.Probe(url, app.settings.Timeout, app.settings.LogDir)
if err != nil {
log.Errorf("Error while running probe: %s", err)
} else if result != nil {
moduleResults = append(moduleResults, NewModuleResult(result))
scansRun = append(scansRun, "Probe")
}
}
// Load and run modules
if app.settings.AllModules || app.settings.Modules != "" || app.settings.ModuleTags != "" {
loader, err := modules.NewLoader()
@@ -520,6 +541,16 @@ func (app *App) Run() error {
}
fmt.Println(string(marshalled))
}
if wantReport {
reportResults = append(reportResults, collectReportResults(url, moduleResults)...)
}
}
if wantReport {
if err := app.writeReports(reportResults); err != nil {
return err
}
}
if !app.settings.ApiMode {
@@ -529,6 +560,48 @@ func (app *App) Run() error {
return nil
}
// collectReportResults flattens one target's module results into the report
// model, carrying each finding as raw json so the report package stays free of
// scan types. a result that won't marshal is skipped rather than failing the run.
func collectReportResults(target string, moduleResults []ModuleResult) []report.Result {
out := make([]report.Result, 0, len(moduleResults))
for _, mr := range moduleResults {
data, err := json.Marshal(mr.Data)
if err != nil {
log.Warnf("report: skipping %s result for %s: %v", mr.Id, target, err)
continue
}
out = append(out, report.Result{Target: target, Module: mr.Id, Data: data})
}
return out
}
// writeReports serializes the collected results to the requested export files.
// each writer runs independently so a bad path for one format doesn't suppress
// the other.
func (app *App) writeReports(results []report.Result) error {
if path := app.settings.SARIF; path != "" {
data, err := report.SARIF(results)
if err != nil {
return fmt.Errorf("build sarif report: %w", err)
}
if err := os.WriteFile(path, data, reportFileMode); err != nil {
return fmt.Errorf("write sarif report %q: %w", path, err)
}
output.Success("sarif report written to %s", path)
}
if path := app.settings.Markdown; path != "" {
data := report.Markdown(results)
if err := os.WriteFile(path, data, reportFileMode); err != nil {
return fmt.Errorf("write markdown report %q: %w", path, err)
}
output.Success("markdown report written to %s", path)
}
return nil
}
// expandTargets queries SecurityTrails for each original target and returns
// newly discovered domains (subdomains + associated) for target expansion
func (app *App) expandTargets() []string {