mirror of
https://github.com/lunchcat/sif.git
synced 2026-06-12 11:01:24 -07:00
feat(dirlist): response filters, wildcard calibration and custom wordlists
the old scanner surfaced every response that wasn't 404/403, so modern SPA catch-all 200s flooded the output and made -dirlist near-useless. add ffuf-style matching: - -mc/-fc/-fr and -fs/-fw filter by status, regex, body size and word count; bodies are read through a capped io.LimitReader so size/word counts are deterministic and memory stays flat. filters win over matches. - -ac auto-calibrates the soft-404 baseline from a few deterministic non-existent paths and drops responses matching that wildcard shape. - -w overrides the size switch with a local file or remote list (fetched through the shared client so proxy/rate-limit apply); -e appends extensions per word. size and words are added to DirectoryResult for the json output.
This commit is contained in:
@@ -157,6 +157,14 @@ sif has a modular architecture. modules are defined in yaml and can be extended
|
||||
| flag | description |
|
||||
|------|-------------|
|
||||
| `-dirlist` | directory and file fuzzing (small/medium/large) |
|
||||
| `-mc` | dirlist: match these status codes (comma list, e.g. 200,301) |
|
||||
| `-fc` | dirlist: filter out these status codes (comma list) |
|
||||
| `-fs` | dirlist: filter out responses of these body sizes (comma list) |
|
||||
| `-fw` | dirlist: filter out responses with these word counts (comma list) |
|
||||
| `-fr` | dirlist: filter out responses whose body matches this regex |
|
||||
| `-ac` | dirlist: auto-calibrate the soft-404 wildcard baseline |
|
||||
| `-w` | dirlist: custom wordlist (local file or url; overrides `-dirlist` size) |
|
||||
| `-e` | dirlist: extensions appended to each word (comma list, e.g. php,bak,env) |
|
||||
| `-dnslist` | subdomain enumeration (small/medium/large) |
|
||||
| `-ports` | port scanning (common/full) |
|
||||
| `-nuclei` | vulnerability scanning with nuclei templates |
|
||||
|
||||
@@ -33,6 +33,42 @@ sizes: `small`, `medium`, `large`
|
||||
./sif -u https://example.com -dirlist medium
|
||||
```
|
||||
|
||||
#### response filters
|
||||
|
||||
modern apps serve a catch-all 200 for unknown routes, so a naive scan reports
|
||||
every path. these ffuf-style filters cut the noise (a filter always wins over a
|
||||
match):
|
||||
|
||||
- `-mc <codes>` - match only these status codes (comma list, e.g. `200,301`)
|
||||
- `-fc <codes>` - filter out these status codes
|
||||
- `-fs <sizes>` - filter out responses of these body sizes
|
||||
- `-fw <counts>` - filter out responses with these word counts
|
||||
- `-fr <regex>` - filter out responses whose body matches this regex
|
||||
|
||||
```bash
|
||||
./sif -u https://example.com -dirlist medium -mc 200,301 -fs 1234
|
||||
```
|
||||
|
||||
#### wildcard calibration
|
||||
|
||||
`-ac` probes a few paths that cannot exist, learns the soft-404 baseline
|
||||
(status + size + words), and auto-drops any response matching it - so SPA
|
||||
catch-all 200s stop flooding the output:
|
||||
|
||||
```bash
|
||||
./sif -u https://example.com -dirlist medium -ac
|
||||
```
|
||||
|
||||
#### custom wordlists and extensions
|
||||
|
||||
`-w <path|url>` overrides the size switch with your own list (local file or
|
||||
remote url); `-e <exts>` appends each extension to every word, keeping the bare
|
||||
word too:
|
||||
|
||||
```bash
|
||||
./sif -u https://example.com -w /path/to/words.txt -e php,bak,env
|
||||
```
|
||||
|
||||
### subdomain enumeration
|
||||
|
||||
`-dnslist <size>` - enumerate subdomains
|
||||
|
||||
@@ -21,6 +21,14 @@ import (
|
||||
|
||||
type Settings struct {
|
||||
Dirlist string
|
||||
DirMatchCodes string // -mc dirlist: status codes to keep
|
||||
DirFilterCodes string // -fc dirlist: status codes to drop
|
||||
DirFilterSizes string // -fs dirlist: body sizes to drop
|
||||
DirFilterWords string // -fw dirlist: word counts to drop
|
||||
DirFilterRegex string // -fr dirlist: regex; body match drops response
|
||||
DirCalibrate bool // -ac dirlist: auto-calibrate soft-404 baseline
|
||||
DirWordlist string // -w dirlist: custom wordlist (file path or url)
|
||||
DirExtensions string // -e dirlist: extensions appended to each word
|
||||
Dnslist string
|
||||
Debug bool
|
||||
LogDir string
|
||||
@@ -100,6 +108,14 @@ func Parse() *Settings {
|
||||
portScopes := goflags.AllowdTypes{"common": Common, "full": Full, "none": Nil}
|
||||
flagSet.CreateGroup("scans", "Scans",
|
||||
flagSet.EnumVar(&settings.Dirlist, "dirlist", Nil, "Directory fuzzing scan size (small/medium/large)", listSizes),
|
||||
flagSet.StringVar(&settings.DirMatchCodes, "mc", "", "Dirlist: match these status codes (comma list, e.g. 200,301)"),
|
||||
flagSet.StringVar(&settings.DirFilterCodes, "fc", "", "Dirlist: filter out these status codes (comma list)"),
|
||||
flagSet.StringVar(&settings.DirFilterSizes, "fs", "", "Dirlist: filter out responses of these body sizes (comma list)"),
|
||||
flagSet.StringVar(&settings.DirFilterWords, "fw", "", "Dirlist: filter out responses with these word counts (comma list)"),
|
||||
flagSet.StringVar(&settings.DirFilterRegex, "fr", "", "Dirlist: filter out responses whose body matches this regex"),
|
||||
flagSet.BoolVar(&settings.DirCalibrate, "ac", false, "Dirlist: auto-calibrate the soft-404 wildcard baseline"),
|
||||
flagSet.StringVar(&settings.DirWordlist, "w", "", "Dirlist: custom wordlist (local file path or url; overrides -dirlist size)"),
|
||||
flagSet.StringVar(&settings.DirExtensions, "e", "", "Dirlist: extensions appended to each word (comma list, e.g. php,bak,env)"),
|
||||
flagSet.EnumVar(&settings.Dnslist, "dnslist", Nil, "DNS fuzzing scan size (small/medium/large)", listSizes),
|
||||
flagSet.EnumVar(&settings.Ports, "ports", Nil, "Port scanning scope (common/full)", portScopes),
|
||||
flagSet.BoolVar(&settings.Dorking, "dork", false, "Enable Google dorking"),
|
||||
|
||||
+377
-41
@@ -16,8 +16,12 @@ import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -36,13 +40,342 @@ const (
|
||||
bigFile = "directory-list-2.3-big.txt"
|
||||
)
|
||||
|
||||
// dirlistBodyCap bounds how many bytes we read per response before computing
|
||||
// size/word counts. modern apps stream large html; capping keeps memory flat
|
||||
// and makes size/word matching deterministic against arbitrarily large bodies.
|
||||
const dirlistBodyCap = 512 * 1024
|
||||
|
||||
// soft-404 calibration probes. we ask for a handful of deterministic paths that
|
||||
// cannot exist, then treat any response shape they share as the wildcard
|
||||
// baseline. deterministic (no rng) so the workflow stays reproducible.
|
||||
const (
|
||||
calibrationProbes = 3
|
||||
calibrationPrefix = "/sif-cal-"
|
||||
)
|
||||
|
||||
// statusNotFound / statusForbidden are the historical default "not interesting"
|
||||
// codes; they seed the filter set when no explicit -mc/-fc is given.
|
||||
const (
|
||||
statusNotFound = 404
|
||||
statusForbidden = 403
|
||||
)
|
||||
|
||||
type DirectoryResult struct {
|
||||
Url string `json:"url"`
|
||||
StatusCode int `json:"status_code"`
|
||||
Size int `json:"size"`
|
||||
Words int `json:"words"`
|
||||
}
|
||||
|
||||
// Dirlist performs directory fuzzing on the target URL.
|
||||
func Dirlist(size string, url string, timeout time.Duration, threads int, logdir string) ([]DirectoryResult, error) {
|
||||
// DirlistOptions carries the ffuf-style matcher knobs. the zero value reproduces
|
||||
// the legacy behavior (report everything that isn't 404/403), so callers that
|
||||
// don't set anything keep the old defaults.
|
||||
type DirlistOptions struct {
|
||||
MatchCodes string // -mc comma list of status codes to keep
|
||||
FilterCodes string // -fc comma list of status codes to drop
|
||||
FilterSizes string // -fs comma list of body sizes to drop
|
||||
FilterWords string // -fw comma list of word counts to drop
|
||||
FilterRegex string // -fr regex; a body match drops the response
|
||||
Calibrate bool // -ac auto-calibrate the soft-404 wildcard baseline
|
||||
Wordlist string // -w local path or url; overrides the size switch
|
||||
Extensions string // -e comma list appended to each word (php,bak,env)
|
||||
}
|
||||
|
||||
// responseMeta is the shape we match on: just enough of the response to decide
|
||||
// keep/drop without holding the whole body.
|
||||
type responseMeta struct {
|
||||
status int
|
||||
size int
|
||||
words int
|
||||
}
|
||||
|
||||
// matcher decides whether a response is "interesting" using the same precedence
|
||||
// as ffuf/feroxbuster: an explicit filter (-fc/-fs/-fw/-fr or a calibrated
|
||||
// baseline) drops the response, otherwise the match-code set decides.
|
||||
type matcher struct {
|
||||
matchCodes map[int]struct{}
|
||||
filterCodes map[int]struct{}
|
||||
filterSizes map[int]struct{}
|
||||
filterWords map[int]struct{}
|
||||
filterRe *regexp.Regexp
|
||||
baselines []responseMeta // calibrated soft-404 shapes to suppress
|
||||
}
|
||||
|
||||
// newMatcher builds the matcher from raw flag strings. when -mc is empty the
|
||||
// match set is left nil, which Matches reads as "keep anything not explicitly
|
||||
// filtered" - i.e. the legacy behavior minus the hardcoded 404/403, which move
|
||||
// into the filter set instead.
|
||||
func newMatcher(opts *DirlistOptions) (*matcher, error) {
|
||||
m := &matcher{
|
||||
filterSizes: make(map[int]struct{}),
|
||||
filterWords: make(map[int]struct{}),
|
||||
}
|
||||
|
||||
codes, err := parseIntSet(opts.MatchCodes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse -mc: %w", err)
|
||||
}
|
||||
m.matchCodes = codes
|
||||
|
||||
m.filterCodes, err = parseIntSet(opts.FilterCodes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse -fc: %w", err)
|
||||
}
|
||||
// no explicit match set means we fall back to the historical "drop 404/403"
|
||||
// behavior; encode it as filters so the rest of the logic is uniform.
|
||||
if len(m.matchCodes) == 0 && len(m.filterCodes) == 0 {
|
||||
m.filterCodes[statusNotFound] = struct{}{}
|
||||
m.filterCodes[statusForbidden] = struct{}{}
|
||||
}
|
||||
|
||||
m.filterSizes, err = parseIntSet(opts.FilterSizes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse -fs: %w", err)
|
||||
}
|
||||
|
||||
m.filterWords, err = parseIntSet(opts.FilterWords)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse -fw: %w", err)
|
||||
}
|
||||
|
||||
if opts.FilterRegex != "" {
|
||||
re, err := regexp.Compile(opts.FilterRegex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse -fr: %w", err)
|
||||
}
|
||||
m.filterRe = re
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Matches reports whether the response should surface as a finding. filters win
|
||||
// over matches: a calibrated baseline, an -fc/-fs/-fw hit, or an -fr body match
|
||||
// always drops the response; otherwise the -mc set (when set) gates it.
|
||||
func (m *matcher) Matches(meta responseMeta, body []byte) bool {
|
||||
// a calibrated soft-404 shape is the same response the catch-all hands every
|
||||
// bogus path, so drop anything that matches a baseline exactly.
|
||||
for i := 0; i < len(m.baselines); i++ {
|
||||
b := m.baselines[i]
|
||||
if b.status == meta.status && b.size == meta.size && b.words == meta.words {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if _, drop := m.filterCodes[meta.status]; drop {
|
||||
return false
|
||||
}
|
||||
if _, drop := m.filterSizes[meta.size]; drop {
|
||||
return false
|
||||
}
|
||||
if _, drop := m.filterWords[meta.words]; drop {
|
||||
return false
|
||||
}
|
||||
if m.filterRe != nil && m.filterRe.Match(body) {
|
||||
return false
|
||||
}
|
||||
|
||||
// an explicit -mc set is allow-list semantics; without it we keep whatever
|
||||
// survived the filters above.
|
||||
if len(m.matchCodes) > 0 {
|
||||
_, keep := m.matchCodes[meta.status]
|
||||
return keep
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// parseIntSet turns a comma list like "200,301,500" into a set. empty input is a
|
||||
// nil set, not an error, so unset flags are a no-op.
|
||||
func parseIntSet(raw string) (map[int]struct{}, error) {
|
||||
set := make(map[int]struct{})
|
||||
if raw == "" {
|
||||
return set, nil
|
||||
}
|
||||
for _, part := range strings.Split(raw, ",") {
|
||||
part = strings.TrimSpace(part)
|
||||
if part == "" {
|
||||
continue
|
||||
}
|
||||
n, err := strconv.Atoi(part)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid integer %q: %w", part, err)
|
||||
}
|
||||
set[n] = struct{}{}
|
||||
}
|
||||
return set, nil
|
||||
}
|
||||
|
||||
// readMeta drains the response (capped) and returns its match shape plus the
|
||||
// body bytes the regex filter needs. it never returns the raw resp; callers
|
||||
// close the body before this returns.
|
||||
func readMeta(resp *http.Response) (responseMeta, []byte) {
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, dirlistBodyCap))
|
||||
if err != nil {
|
||||
// a truncated/aborted body still has a usable status; treat what we read
|
||||
// as the body rather than dropping the whole response.
|
||||
charmlog.Debugf("dirlist: read body: %v", err)
|
||||
}
|
||||
return responseMeta{
|
||||
status: resp.StatusCode,
|
||||
size: len(body),
|
||||
words: countWords(body),
|
||||
}, body
|
||||
}
|
||||
|
||||
// countWords counts whitespace-separated tokens; the cheap proxy ffuf uses to
|
||||
// tell a soft-404 stub apart from a real page of the same byte size.
|
||||
func countWords(body []byte) int {
|
||||
return len(strings.Fields(string(body)))
|
||||
}
|
||||
|
||||
// expandWords appends each extension to every base word, keeping the bare word
|
||||
// too. an empty extensions list returns the words unchanged.
|
||||
func expandWords(words []string, extensions string) []string {
|
||||
exts := splitExtensions(extensions)
|
||||
if len(exts) == 0 {
|
||||
return words
|
||||
}
|
||||
// each word yields itself plus one entry per extension.
|
||||
expanded := make([]string, 0, len(words)*(len(exts)+1))
|
||||
for i := 0; i < len(words); i++ {
|
||||
expanded = append(expanded, words[i])
|
||||
for j := 0; j < len(exts); j++ {
|
||||
expanded = append(expanded, words[i]+"."+exts[j])
|
||||
}
|
||||
}
|
||||
return expanded
|
||||
}
|
||||
|
||||
// splitExtensions normalizes "php, .bak ,env" into ["php","bak","env"]; a
|
||||
// leading dot is tolerated so both "php" and ".php" work.
|
||||
func splitExtensions(raw string) []string {
|
||||
if raw == "" {
|
||||
return nil
|
||||
}
|
||||
parts := strings.Split(raw, ",")
|
||||
exts := make([]string, 0, len(parts))
|
||||
for i := 0; i < len(parts); i++ {
|
||||
ext := strings.TrimSpace(parts[i])
|
||||
ext = strings.TrimPrefix(ext, ".")
|
||||
if ext != "" {
|
||||
exts = append(exts, ext)
|
||||
}
|
||||
}
|
||||
return exts
|
||||
}
|
||||
|
||||
// loadWordlist reads the fuzzing words. a custom -w overrides the size switch:
|
||||
// an http(s) value is fetched through the shared client, anything else is a
|
||||
// local file. with no -w it downloads the size-selected sif-runtime list.
|
||||
func loadWordlist(opts *DirlistOptions, size string, client *http.Client) ([]string, error) {
|
||||
if opts.Wordlist != "" {
|
||||
if strings.HasPrefix(opts.Wordlist, "http://") || strings.HasPrefix(opts.Wordlist, "https://") {
|
||||
return fetchWordlist(opts.Wordlist, client)
|
||||
}
|
||||
return readWordlistFile(opts.Wordlist)
|
||||
}
|
||||
|
||||
var file string
|
||||
switch size {
|
||||
case "small":
|
||||
file = smallFile
|
||||
case "medium":
|
||||
file = mediumFile
|
||||
case "large":
|
||||
file = bigFile
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown dirlist size %q", size)
|
||||
}
|
||||
return fetchWordlist(directoryURL+file, client)
|
||||
}
|
||||
|
||||
// fetchWordlist downloads a remote wordlist through the shared client so proxy
|
||||
// and rate-limit settings apply to the fetch too.
|
||||
func fetchWordlist(listURL string, client *http.Client) ([]string, error) {
|
||||
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, listURL, http.NoBody)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build wordlist request: %w", err)
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("download wordlist %q: %w", listURL, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
return scanLines(resp.Body), nil
|
||||
}
|
||||
|
||||
// readWordlistFile loads a local wordlist file.
|
||||
func readWordlistFile(path string) ([]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open wordlist %q: %w", path, err)
|
||||
}
|
||||
defer f.Close()
|
||||
return scanLines(f), nil
|
||||
}
|
||||
|
||||
// scanLines reads non-empty lines into a slice.
|
||||
func scanLines(r io.Reader) []string {
|
||||
var lines []string
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Split(bufio.ScanLines)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if line != "" {
|
||||
lines = append(lines, line)
|
||||
}
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
// calibrate probes a few paths that cannot exist and records the response shapes
|
||||
// the catch-all hands them. those baselines feed the matcher so a soft-404 200
|
||||
// (the SPA wildcard) is suppressed before the real run. deterministic by design:
|
||||
// the probe paths come from the loop index, never a random source.
|
||||
func calibrate(m *matcher, baseURL string, client *http.Client) {
|
||||
for i := 0; i < calibrationProbes; i++ {
|
||||
probe := baseURL + calibrationPrefix + strconv.Itoa(i)
|
||||
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, probe, http.NoBody)
|
||||
if err != nil {
|
||||
charmlog.Debugf("dirlist: build calibration request: %v", err)
|
||||
continue
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
charmlog.Debugf("dirlist: calibration probe %s: %v", probe, err)
|
||||
continue
|
||||
}
|
||||
meta, _ := readMeta(resp)
|
||||
resp.Body.Close()
|
||||
|
||||
// a genuine hard 404 already gets filtered by code; only soft responses
|
||||
// (a 200/30x catch-all) need a size/word baseline to suppress them.
|
||||
if meta.status == statusNotFound {
|
||||
continue
|
||||
}
|
||||
if !containsBaseline(m.baselines, meta) {
|
||||
m.baselines = append(m.baselines, meta)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// containsBaseline reports whether the shape is already recorded, so repeated
|
||||
// probes returning the same soft-404 don't bloat the baseline set.
|
||||
func containsBaseline(baselines []responseMeta, meta responseMeta) bool {
|
||||
for i := 0; i < len(baselines); i++ {
|
||||
if baselines[i] == meta {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Dirlist performs directory fuzzing on the target URL with ffuf-style response
|
||||
// filtering, soft-404 calibration and custom wordlists.
|
||||
//
|
||||
//nolint:gocritic // opts is the scanner's stable public config; passed by value to match the other scanners' entry points.
|
||||
func Dirlist(size string, url string, timeout time.Duration, threads int, logdir string, opts DirlistOptions) (DirectoryResults, error) {
|
||||
log := output.Module("DIRLIST")
|
||||
log.Start()
|
||||
|
||||
@@ -55,35 +388,27 @@ func Dirlist(size string, url string, timeout time.Duration, threads int, logdir
|
||||
}
|
||||
}
|
||||
|
||||
var list string
|
||||
switch size {
|
||||
case "small":
|
||||
list = directoryURL + smallFile
|
||||
case "medium":
|
||||
list = directoryURL + mediumFile
|
||||
case "large":
|
||||
list = directoryURL + bigFile
|
||||
matcher, err := newMatcher(&opts)
|
||||
if err != nil {
|
||||
log.Error("invalid matcher flags: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client := httpx.Client(timeout)
|
||||
|
||||
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, list, http.NoBody)
|
||||
directories, err := loadWordlist(&opts, size, client)
|
||||
if err != nil {
|
||||
log.Error("Error creating directory list request: %s", err)
|
||||
log.Error("Error loading directory list: %s", err)
|
||||
return nil, err
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
log.Error("Error downloading directory list: %s", err)
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
directories = expandWords(directories, opts.Extensions)
|
||||
|
||||
var directories []string
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
scanner.Split(bufio.ScanLines)
|
||||
for scanner.Scan() {
|
||||
directories = append(directories, scanner.Text())
|
||||
// -ac learns the wildcard baseline before the run so catch-all 200s drop.
|
||||
if opts.Calibrate {
|
||||
calibrate(matcher, url, client)
|
||||
if len(matcher.baselines) > 0 {
|
||||
log.Info("calibrated %d soft-404 baseline(s)", len(matcher.baselines))
|
||||
}
|
||||
}
|
||||
|
||||
progress := output.NewProgress(len(directories), "fuzzing")
|
||||
@@ -92,7 +417,7 @@ func Dirlist(size string, url string, timeout time.Duration, threads int, logdir
|
||||
var mu sync.Mutex
|
||||
wg.Add(threads)
|
||||
|
||||
results := make([]DirectoryResult, 0, 64)
|
||||
results := make(DirectoryResults, 0, 64)
|
||||
for thread := 0; thread < threads; thread++ {
|
||||
go func(thread int) {
|
||||
defer wg.Done()
|
||||
@@ -116,24 +441,35 @@ func Dirlist(size string, url string, timeout time.Duration, threads int, logdir
|
||||
continue
|
||||
}
|
||||
|
||||
if resp.StatusCode != 404 && resp.StatusCode != 403 {
|
||||
progress.Pause()
|
||||
log.Success("found: %s [%s]", output.Highlight.Render(directory), output.Status.Render(strconv.Itoa(resp.StatusCode)))
|
||||
progress.Resume()
|
||||
|
||||
if logdir != "" {
|
||||
_ = logger.Write(sanitizedURL, logdir, fmt.Sprintf("%s [%s]\n", strconv.Itoa(resp.StatusCode), directory))
|
||||
}
|
||||
|
||||
result := DirectoryResult{
|
||||
Url: resp.Request.URL.String(),
|
||||
StatusCode: resp.StatusCode,
|
||||
}
|
||||
mu.Lock()
|
||||
results = append(results, result)
|
||||
mu.Unlock()
|
||||
}
|
||||
meta, body := readMeta(resp)
|
||||
reqURL := resp.Request.URL.String()
|
||||
resp.Body.Close()
|
||||
|
||||
if !matcher.Matches(meta, body) {
|
||||
continue
|
||||
}
|
||||
|
||||
progress.Pause()
|
||||
log.Success("found: %s [%s] (size=%d words=%d)",
|
||||
output.Highlight.Render(directory),
|
||||
output.Status.Render(strconv.Itoa(meta.status)),
|
||||
meta.size, meta.words)
|
||||
progress.Resume()
|
||||
|
||||
if logdir != "" {
|
||||
_ = logger.Write(sanitizedURL, logdir,
|
||||
fmt.Sprintf("%s [%s] size=%d words=%d\n", strconv.Itoa(meta.status), directory, meta.size, meta.words))
|
||||
}
|
||||
|
||||
result := DirectoryResult{
|
||||
Url: reqURL,
|
||||
StatusCode: meta.status,
|
||||
Size: meta.size,
|
||||
Words: meta.words,
|
||||
}
|
||||
mu.Lock()
|
||||
results = append(results, result)
|
||||
mu.Unlock()
|
||||
}
|
||||
}(thread)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,360 @@
|
||||
/*
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
: :
|
||||
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
|
||||
: ▄█ █ █▀ · BSD 3-Clause License :
|
||||
: :
|
||||
: (c) 2022-2026 vmfunc, xyzeva, :
|
||||
: lunchcat alumni & contributors :
|
||||
: :
|
||||
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
|
||||
*/
|
||||
|
||||
package scan
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestMatcher_Matches(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
opts DirlistOptions
|
||||
meta responseMeta
|
||||
body string
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
// default behavior: 404/403 drop, everything else surfaces
|
||||
name: "default keeps 200",
|
||||
opts: DirlistOptions{},
|
||||
meta: responseMeta{status: 200, size: 10, words: 2},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "default drops 404",
|
||||
opts: DirlistOptions{},
|
||||
meta: responseMeta{status: 404, size: 9, words: 1},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "default drops 403",
|
||||
opts: DirlistOptions{},
|
||||
meta: responseMeta{status: 403, size: 9, words: 1},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
// -mc is allow-list: only listed codes survive
|
||||
name: "mc allowlist keeps listed",
|
||||
opts: DirlistOptions{MatchCodes: "200,301"},
|
||||
meta: responseMeta{status: 301, size: 0, words: 0},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "mc allowlist drops unlisted 200 already excluded",
|
||||
opts: DirlistOptions{MatchCodes: "301"},
|
||||
meta: responseMeta{status: 200, size: 5, words: 1},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "fc drops listed code",
|
||||
opts: DirlistOptions{FilterCodes: "500"},
|
||||
meta: responseMeta{status: 500, size: 5, words: 1},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
// with an explicit -fc and no -mc, the implicit 404/403 filter is not
|
||||
// added, so a 200 still surfaces
|
||||
name: "fc leaves others",
|
||||
opts: DirlistOptions{FilterCodes: "500"},
|
||||
meta: responseMeta{status: 200, size: 5, words: 1},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "fs drops listed size",
|
||||
opts: DirlistOptions{FilterSizes: "1024"},
|
||||
meta: responseMeta{status: 200, size: 1024, words: 50},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "fw drops listed word count",
|
||||
opts: DirlistOptions{FilterWords: "7"},
|
||||
meta: responseMeta{status: 200, size: 40, words: 7},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "fr drops body match",
|
||||
opts: DirlistOptions{FilterRegex: "not found"},
|
||||
meta: responseMeta{status: 200, size: 9, words: 2},
|
||||
body: "page not found",
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "fr keeps non-match",
|
||||
opts: DirlistOptions{FilterRegex: "not found"},
|
||||
meta: responseMeta{status: 200, size: 5, words: 1},
|
||||
body: "welcome",
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
// filter precedence: -mc would keep it, but a size filter drops it
|
||||
name: "filter wins over match",
|
||||
opts: DirlistOptions{MatchCodes: "200", FilterSizes: "12"},
|
||||
meta: responseMeta{status: 200, size: 12, words: 3},
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
m, err := newMatcher(&tt.opts)
|
||||
if err != nil {
|
||||
t.Fatalf("newMatcher: %v", err)
|
||||
}
|
||||
if got := m.Matches(tt.meta, []byte(tt.body)); got != tt.want {
|
||||
t.Errorf("Matches(%+v, %q) = %v, want %v", tt.meta, tt.body, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatcher_BaselineSuppresses(t *testing.T) {
|
||||
m, err := newMatcher(&DirlistOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("newMatcher: %v", err)
|
||||
}
|
||||
// a calibrated soft-404 shape drops an identical response
|
||||
m.baselines = []responseMeta{{status: 200, size: 42, words: 5}}
|
||||
|
||||
soft := responseMeta{status: 200, size: 42, words: 5}
|
||||
if m.Matches(soft, nil) {
|
||||
t.Error("baseline-matching response should be suppressed")
|
||||
}
|
||||
// a real page with a different size must still surface
|
||||
livePage := responseMeta{status: 200, size: 99, words: 12}
|
||||
if !m.Matches(livePage, nil) {
|
||||
t.Error("distinct response should not be suppressed by baseline")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewMatcher_InvalidFlags(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
opts DirlistOptions
|
||||
}{
|
||||
{"bad mc", DirlistOptions{MatchCodes: "abc"}},
|
||||
{"bad fc", DirlistOptions{FilterCodes: "20x"}},
|
||||
{"bad fs", DirlistOptions{FilterSizes: "big"}},
|
||||
{"bad fw", DirlistOptions{FilterWords: "-"}},
|
||||
{"bad regex", DirlistOptions{FilterRegex: "("}},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if _, err := newMatcher(&tt.opts); err == nil {
|
||||
t.Errorf("newMatcher(%+v) expected error, got nil", tt.opts)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpandWords(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
words []string
|
||||
exts string
|
||||
want []string
|
||||
}{
|
||||
{
|
||||
name: "no extensions unchanged",
|
||||
words: []string{"admin", "login"},
|
||||
exts: "",
|
||||
want: []string{"admin", "login"},
|
||||
},
|
||||
{
|
||||
name: "appends each extension and keeps bare",
|
||||
words: []string{"config"},
|
||||
exts: "php,bak,env",
|
||||
want: []string{"config", "config.php", "config.bak", "config.env"},
|
||||
},
|
||||
{
|
||||
name: "tolerates leading dot and spaces",
|
||||
words: []string{"db"},
|
||||
exts: " .sql , bak ",
|
||||
want: []string{"db", "db.sql", "db.bak"},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := expandWords(tt.words, tt.exts)
|
||||
if !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("expandWords(%v, %q) = %v, want %v", tt.words, tt.exts, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// softWildcardApp serves a couple of real paths and a catch-all soft-404: every
|
||||
// unknown path returns a fixed 200 body, the SPA pattern that floods dirlist.
|
||||
func softWildcardApp() *httptest.Server {
|
||||
const softBody = "<html><body>app shell - route handled client side</body></html>"
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/admin", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte("<html><body>admin control panel dashboard here</body></html>"))
|
||||
})
|
||||
mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte("<html><body>please sign in with your account credentials now</body></html>"))
|
||||
})
|
||||
// catch-all: anything else gets the identical soft-404 shell
|
||||
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/admin" || r.URL.Path == "/login" {
|
||||
return
|
||||
}
|
||||
w.Write([]byte(softBody))
|
||||
})
|
||||
return httptest.NewServer(mux)
|
||||
}
|
||||
|
||||
func TestDirlist_CalibrationSuppressesWildcard(t *testing.T) {
|
||||
srv := softWildcardApp()
|
||||
defer srv.Close()
|
||||
|
||||
// the wordlist mixes the two real paths with several bogus ones the catch-all
|
||||
// answers with the soft-404 shell.
|
||||
dir := t.TempDir()
|
||||
wordlist := filepath.Join(dir, "words.txt")
|
||||
if err := os.WriteFile(wordlist, []byte("admin\nlogin\nnope\nbogus\nmissing\n"), 0o600); err != nil {
|
||||
t.Fatalf("write wordlist: %v", err)
|
||||
}
|
||||
|
||||
// without calibration every bogus path is a soft-404 200 and floods output
|
||||
noAC, err := Dirlist("small", srv.URL, 5*time.Second, 3, "", DirlistOptions{Wordlist: wordlist})
|
||||
if err != nil {
|
||||
t.Fatalf("Dirlist (no -ac): %v", err)
|
||||
}
|
||||
if len(noAC) < 5 {
|
||||
t.Fatalf("expected the wildcard to flood all 5 paths without -ac, got %d", len(noAC))
|
||||
}
|
||||
|
||||
// with -ac the soft-404 baseline is learned and the bogus paths drop
|
||||
withAC, err := Dirlist("small", srv.URL, 5*time.Second, 3, "", DirlistOptions{
|
||||
Wordlist: wordlist,
|
||||
Calibrate: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Dirlist (-ac): %v", err)
|
||||
}
|
||||
|
||||
got := pathSet(withAC)
|
||||
if !has(got, "/admin") || !has(got, "/login") {
|
||||
t.Errorf("real paths admin/login must still surface with -ac, got %v", sortedKeys(got))
|
||||
}
|
||||
for _, bogus := range []string{"/nope", "/bogus", "/missing"} {
|
||||
if has(got, bogus) {
|
||||
t.Errorf("soft-404 path %s should be suppressed by -ac, got %v", bogus, sortedKeys(got))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDirlist_ExtensionExpansion(t *testing.T) {
|
||||
// the server only answers config.php; the bare word and other extensions hit
|
||||
// the catch-all soft-404, so -e must be what surfaces config.php.
|
||||
const realBody = "<?php // database connection settings live here ?>"
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/config.php", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte(realBody))
|
||||
})
|
||||
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
http.NotFound(w, r) // hard 404 for everything but config.php
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
dir := t.TempDir()
|
||||
wordlist := filepath.Join(dir, "words.txt")
|
||||
if err := os.WriteFile(wordlist, []byte("config\n"), 0o600); err != nil {
|
||||
t.Fatalf("write wordlist: %v", err)
|
||||
}
|
||||
|
||||
results, err := Dirlist("small", srv.URL, 5*time.Second, 2, "", DirlistOptions{
|
||||
Wordlist: wordlist,
|
||||
Extensions: "php,bak",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Dirlist: %v", err)
|
||||
}
|
||||
|
||||
got := pathSet(results)
|
||||
if !has(got, "/config.php") {
|
||||
t.Errorf("expected -e to surface config.php, got %v", sortedKeys(got))
|
||||
}
|
||||
if has(got, "/config") || has(got, "/config.bak") {
|
||||
t.Errorf("only config.php exists; bare word and .bak are 404s, got %v", sortedKeys(got))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDirlist_LocalWordlistOverridesSize(t *testing.T) {
|
||||
// a local -w must be used verbatim and never touch directoryURL; point the
|
||||
// remote at a sink that fails the test if it's ever hit.
|
||||
orig := directoryURL
|
||||
directoryURL = "http://127.0.0.1:0/should-not-be-fetched/"
|
||||
defer func() { directoryURL = orig }()
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/secret", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte("<html>top secret area found</html>"))
|
||||
})
|
||||
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
http.NotFound(w, r)
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
dir := t.TempDir()
|
||||
wordlist := filepath.Join(dir, "custom.txt")
|
||||
if err := os.WriteFile(wordlist, []byte("secret\nabsent\n"), 0o600); err != nil {
|
||||
t.Fatalf("write wordlist: %v", err)
|
||||
}
|
||||
|
||||
results, err := Dirlist("large", srv.URL, 5*time.Second, 2, "", DirlistOptions{Wordlist: wordlist})
|
||||
if err != nil {
|
||||
t.Fatalf("Dirlist: %v", err)
|
||||
}
|
||||
|
||||
got := pathSet(results)
|
||||
if !has(got, "/secret") {
|
||||
t.Errorf("expected the custom wordlist to find /secret, got %v", sortedKeys(got))
|
||||
}
|
||||
if has(got, "/absent") {
|
||||
t.Errorf("/absent is a 404 and should not surface, got %v", sortedKeys(got))
|
||||
}
|
||||
}
|
||||
|
||||
// pathSet collects each result's url path for membership checks. it reuses the
|
||||
// package-level sortedKeys (crawl.go) for deterministic failure output.
|
||||
func pathSet(results DirectoryResults) map[string]struct{} {
|
||||
set := make(map[string]struct{}, len(results))
|
||||
for i := 0; i < len(results); i++ {
|
||||
if idx := strings.Index(results[i].Url, "://"); idx >= 0 {
|
||||
rest := results[i].Url[idx+len("://"):]
|
||||
if slash := strings.Index(rest, "/"); slash >= 0 {
|
||||
set[rest[slash:]] = struct{}{}
|
||||
continue
|
||||
}
|
||||
}
|
||||
set[results[i].Url] = struct{}{}
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
// has is a tiny readability helper for set membership in assertions.
|
||||
func has(set map[string]struct{}, key string) bool {
|
||||
_, ok := set[key]
|
||||
return ok
|
||||
}
|
||||
@@ -134,7 +134,7 @@ func TestIntegrationDirlist(t *testing.T) {
|
||||
directoryURL = srv.URL + "/"
|
||||
defer func() { directoryURL = orig }()
|
||||
|
||||
results, err := Dirlist("small", srv.URL, 5*time.Second, 3, "")
|
||||
results, err := Dirlist("small", srv.URL, 5*time.Second, 3, "", DirlistOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Dirlist: %v", err)
|
||||
}
|
||||
|
||||
@@ -38,6 +38,30 @@ file with one url per line.
|
||||
.BR \-dirlist " \fIsize\fR"
|
||||
directory and file fuzzing (small/medium/large).
|
||||
.TP
|
||||
.BR \-mc " \fIcodes\fR"
|
||||
dirlist: match only these status codes (comma list, e.g. 200,301).
|
||||
.TP
|
||||
.BR \-fc " \fIcodes\fR"
|
||||
dirlist: filter out these status codes (comma list).
|
||||
.TP
|
||||
.BR \-fs " \fIsizes\fR"
|
||||
dirlist: filter out responses of these body sizes (comma list).
|
||||
.TP
|
||||
.BR \-fw " \fIcounts\fR"
|
||||
dirlist: filter out responses with these word counts (comma list).
|
||||
.TP
|
||||
.BR \-fr " \fIregex\fR"
|
||||
dirlist: filter out responses whose body matches this regex.
|
||||
.TP
|
||||
.B \-ac
|
||||
dirlist: auto\-calibrate the soft\-404 wildcard baseline so catch\-all 200s are dropped.
|
||||
.TP
|
||||
.BR \-w " \fIpath|url\fR"
|
||||
dirlist: custom wordlist (local file or url); overrides the \fB\-dirlist\fR size.
|
||||
.TP
|
||||
.BR \-e " \fIexts\fR"
|
||||
dirlist: extensions appended to each word (comma list, e.g. php,bak,env).
|
||||
.TP
|
||||
.BR \-dnslist " \fIsize\fR"
|
||||
subdomain enumeration (small/medium/large).
|
||||
.TP
|
||||
|
||||
@@ -231,11 +231,20 @@ func (app *App) Run() error {
|
||||
}
|
||||
|
||||
if app.settings.Dirlist != "none" {
|
||||
result, err := scan.Dirlist(app.settings.Dirlist, url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir)
|
||||
result, err := scan.Dirlist(app.settings.Dirlist, url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir, scan.DirlistOptions{
|
||||
MatchCodes: app.settings.DirMatchCodes,
|
||||
FilterCodes: app.settings.DirFilterCodes,
|
||||
FilterSizes: app.settings.DirFilterSizes,
|
||||
FilterWords: app.settings.DirFilterWords,
|
||||
FilterRegex: app.settings.DirFilterRegex,
|
||||
Calibrate: app.settings.DirCalibrate,
|
||||
Wordlist: app.settings.DirWordlist,
|
||||
Extensions: app.settings.DirExtensions,
|
||||
})
|
||||
if err != nil {
|
||||
log.Errorf("Error while running directory scan: %s", err)
|
||||
} else {
|
||||
moduleResults = append(moduleResults, ModuleResult{"dirlist", result})
|
||||
moduleResults = append(moduleResults, NewModuleResult(result))
|
||||
scansRun = append(scansRun, "Directory Listing")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user