feat(dirlist): response filters, wildcard calibration and custom wordlists

the old scanner surfaced every response that wasn't 404/403, so modern SPA
catch-all 200s flooded the output and made -dirlist near-useless. add ffuf-style
matching:

- -mc/-fc/-fr and -fs/-fw filter by status, regex, body size and word count;
  bodies are read through a capped io.LimitReader so size/word counts are
  deterministic and memory stays flat. filters win over matches.
- -ac auto-calibrates the soft-404 baseline from a few deterministic
  non-existent paths and drops responses matching that wildcard shape.
- -w overrides the size switch with a local file or remote list (fetched through
  the shared client so proxy/rate-limit apply); -e appends extensions per word.

size and words are added to DirectoryResult for the json output.
This commit is contained in:
vmfunc
2026-06-10 14:45:32 -07:00
parent 320fc3d4e7
commit 5050900f29
8 changed files with 833 additions and 44 deletions
+8
View File
@@ -157,6 +157,14 @@ sif has a modular architecture. modules are defined in yaml and can be extended
| flag | description |
|------|-------------|
| `-dirlist` | directory and file fuzzing (small/medium/large) |
| `-mc` | dirlist: match these status codes (comma list, e.g. 200,301) |
| `-fc` | dirlist: filter out these status codes (comma list) |
| `-fs` | dirlist: filter out responses of these body sizes (comma list) |
| `-fw` | dirlist: filter out responses with these word counts (comma list) |
| `-fr` | dirlist: filter out responses whose body matches this regex |
| `-ac` | dirlist: auto-calibrate the soft-404 wildcard baseline |
| `-w` | dirlist: custom wordlist (local file or url; overrides `-dirlist` size) |
| `-e` | dirlist: extensions appended to each word (comma list, e.g. php,bak,env) |
| `-dnslist` | subdomain enumeration (small/medium/large) |
| `-ports` | port scanning (common/full) |
| `-nuclei` | vulnerability scanning with nuclei templates |
+36
View File
@@ -33,6 +33,42 @@ sizes: `small`, `medium`, `large`
./sif -u https://example.com -dirlist medium
```
#### response filters
modern apps serve a catch-all 200 for unknown routes, so a naive scan reports
every path. these ffuf-style filters cut the noise (a filter always wins over a
match):
- `-mc <codes>` - match only these status codes (comma list, e.g. `200,301`)
- `-fc <codes>` - filter out these status codes
- `-fs <sizes>` - filter out responses of these body sizes
- `-fw <counts>` - filter out responses with these word counts
- `-fr <regex>` - filter out responses whose body matches this regex
```bash
./sif -u https://example.com -dirlist medium -mc 200,301 -fs 1234
```
#### wildcard calibration
`-ac` probes a few paths that cannot exist, learns the soft-404 baseline
(status + size + words), and auto-drops any response matching it - so SPA
catch-all 200s stop flooding the output:
```bash
./sif -u https://example.com -dirlist medium -ac
```
#### custom wordlists and extensions
`-w <path|url>` overrides the size switch with your own list (local file or
remote url); `-e <exts>` appends each extension to every word, keeping the bare
word too:
```bash
./sif -u https://example.com -w /path/to/words.txt -e php,bak,env
```
### subdomain enumeration
`-dnslist <size>` - enumerate subdomains
+16
View File
@@ -21,6 +21,14 @@ import (
type Settings struct {
Dirlist string
DirMatchCodes string // -mc dirlist: status codes to keep
DirFilterCodes string // -fc dirlist: status codes to drop
DirFilterSizes string // -fs dirlist: body sizes to drop
DirFilterWords string // -fw dirlist: word counts to drop
DirFilterRegex string // -fr dirlist: regex; body match drops response
DirCalibrate bool // -ac dirlist: auto-calibrate soft-404 baseline
DirWordlist string // -w dirlist: custom wordlist (file path or url)
DirExtensions string // -e dirlist: extensions appended to each word
Dnslist string
Debug bool
LogDir string
@@ -100,6 +108,14 @@ func Parse() *Settings {
portScopes := goflags.AllowdTypes{"common": Common, "full": Full, "none": Nil}
flagSet.CreateGroup("scans", "Scans",
flagSet.EnumVar(&settings.Dirlist, "dirlist", Nil, "Directory fuzzing scan size (small/medium/large)", listSizes),
flagSet.StringVar(&settings.DirMatchCodes, "mc", "", "Dirlist: match these status codes (comma list, e.g. 200,301)"),
flagSet.StringVar(&settings.DirFilterCodes, "fc", "", "Dirlist: filter out these status codes (comma list)"),
flagSet.StringVar(&settings.DirFilterSizes, "fs", "", "Dirlist: filter out responses of these body sizes (comma list)"),
flagSet.StringVar(&settings.DirFilterWords, "fw", "", "Dirlist: filter out responses with these word counts (comma list)"),
flagSet.StringVar(&settings.DirFilterRegex, "fr", "", "Dirlist: filter out responses whose body matches this regex"),
flagSet.BoolVar(&settings.DirCalibrate, "ac", false, "Dirlist: auto-calibrate the soft-404 wildcard baseline"),
flagSet.StringVar(&settings.DirWordlist, "w", "", "Dirlist: custom wordlist (local file path or url; overrides -dirlist size)"),
flagSet.StringVar(&settings.DirExtensions, "e", "", "Dirlist: extensions appended to each word (comma list, e.g. php,bak,env)"),
flagSet.EnumVar(&settings.Dnslist, "dnslist", Nil, "DNS fuzzing scan size (small/medium/large)", listSizes),
flagSet.EnumVar(&settings.Ports, "ports", Nil, "Port scanning scope (common/full)", portScopes),
flagSet.BoolVar(&settings.Dorking, "dork", false, "Enable Google dorking"),
+377 -41
View File
@@ -16,8 +16,12 @@ import (
"bufio"
"context"
"fmt"
"io"
"net/http"
"os"
"regexp"
"strconv"
"strings"
"sync"
"time"
@@ -36,13 +40,342 @@ const (
bigFile = "directory-list-2.3-big.txt"
)
// dirlistBodyCap bounds how many bytes we read per response before computing
// size/word counts. modern apps stream large html; capping keeps memory flat
// and makes size/word matching deterministic against arbitrarily large bodies.
const dirlistBodyCap = 512 * 1024
// soft-404 calibration probes. we ask for a handful of deterministic paths that
// cannot exist, then treat any response shape they share as the wildcard
// baseline. deterministic (no rng) so the workflow stays reproducible.
const (
calibrationProbes = 3
calibrationPrefix = "/sif-cal-"
)
// statusNotFound / statusForbidden are the historical default "not interesting"
// codes; they seed the filter set when no explicit -mc/-fc is given.
const (
statusNotFound = 404
statusForbidden = 403
)
type DirectoryResult struct {
Url string `json:"url"`
StatusCode int `json:"status_code"`
Size int `json:"size"`
Words int `json:"words"`
}
// Dirlist performs directory fuzzing on the target URL.
func Dirlist(size string, url string, timeout time.Duration, threads int, logdir string) ([]DirectoryResult, error) {
// DirlistOptions carries the ffuf-style matcher knobs. the zero value reproduces
// the legacy behavior (report everything that isn't 404/403), so callers that
// don't set anything keep the old defaults.
type DirlistOptions struct {
MatchCodes string // -mc comma list of status codes to keep
FilterCodes string // -fc comma list of status codes to drop
FilterSizes string // -fs comma list of body sizes to drop
FilterWords string // -fw comma list of word counts to drop
FilterRegex string // -fr regex; a body match drops the response
Calibrate bool // -ac auto-calibrate the soft-404 wildcard baseline
Wordlist string // -w local path or url; overrides the size switch
Extensions string // -e comma list appended to each word (php,bak,env)
}
// responseMeta is the shape we match on: just enough of the response to decide
// keep/drop without holding the whole body.
type responseMeta struct {
status int
size int
words int
}
// matcher decides whether a response is "interesting" using the same precedence
// as ffuf/feroxbuster: an explicit filter (-fc/-fs/-fw/-fr or a calibrated
// baseline) drops the response, otherwise the match-code set decides.
type matcher struct {
matchCodes map[int]struct{}
filterCodes map[int]struct{}
filterSizes map[int]struct{}
filterWords map[int]struct{}
filterRe *regexp.Regexp
baselines []responseMeta // calibrated soft-404 shapes to suppress
}
// newMatcher builds the matcher from raw flag strings. when -mc is empty the
// match set is left nil, which Matches reads as "keep anything not explicitly
// filtered" - i.e. the legacy behavior minus the hardcoded 404/403, which move
// into the filter set instead.
func newMatcher(opts *DirlistOptions) (*matcher, error) {
m := &matcher{
filterSizes: make(map[int]struct{}),
filterWords: make(map[int]struct{}),
}
codes, err := parseIntSet(opts.MatchCodes)
if err != nil {
return nil, fmt.Errorf("parse -mc: %w", err)
}
m.matchCodes = codes
m.filterCodes, err = parseIntSet(opts.FilterCodes)
if err != nil {
return nil, fmt.Errorf("parse -fc: %w", err)
}
// no explicit match set means we fall back to the historical "drop 404/403"
// behavior; encode it as filters so the rest of the logic is uniform.
if len(m.matchCodes) == 0 && len(m.filterCodes) == 0 {
m.filterCodes[statusNotFound] = struct{}{}
m.filterCodes[statusForbidden] = struct{}{}
}
m.filterSizes, err = parseIntSet(opts.FilterSizes)
if err != nil {
return nil, fmt.Errorf("parse -fs: %w", err)
}
m.filterWords, err = parseIntSet(opts.FilterWords)
if err != nil {
return nil, fmt.Errorf("parse -fw: %w", err)
}
if opts.FilterRegex != "" {
re, err := regexp.Compile(opts.FilterRegex)
if err != nil {
return nil, fmt.Errorf("parse -fr: %w", err)
}
m.filterRe = re
}
return m, nil
}
// Matches reports whether the response should surface as a finding. filters win
// over matches: a calibrated baseline, an -fc/-fs/-fw hit, or an -fr body match
// always drops the response; otherwise the -mc set (when set) gates it.
func (m *matcher) Matches(meta responseMeta, body []byte) bool {
// a calibrated soft-404 shape is the same response the catch-all hands every
// bogus path, so drop anything that matches a baseline exactly.
for i := 0; i < len(m.baselines); i++ {
b := m.baselines[i]
if b.status == meta.status && b.size == meta.size && b.words == meta.words {
return false
}
}
if _, drop := m.filterCodes[meta.status]; drop {
return false
}
if _, drop := m.filterSizes[meta.size]; drop {
return false
}
if _, drop := m.filterWords[meta.words]; drop {
return false
}
if m.filterRe != nil && m.filterRe.Match(body) {
return false
}
// an explicit -mc set is allow-list semantics; without it we keep whatever
// survived the filters above.
if len(m.matchCodes) > 0 {
_, keep := m.matchCodes[meta.status]
return keep
}
return true
}
// parseIntSet turns a comma list like "200,301,500" into a set. empty input is a
// nil set, not an error, so unset flags are a no-op.
func parseIntSet(raw string) (map[int]struct{}, error) {
set := make(map[int]struct{})
if raw == "" {
return set, nil
}
for _, part := range strings.Split(raw, ",") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
n, err := strconv.Atoi(part)
if err != nil {
return nil, fmt.Errorf("invalid integer %q: %w", part, err)
}
set[n] = struct{}{}
}
return set, nil
}
// readMeta drains the response (capped) and returns its match shape plus the
// body bytes the regex filter needs. it never returns the raw resp; callers
// close the body before this returns.
func readMeta(resp *http.Response) (responseMeta, []byte) {
body, err := io.ReadAll(io.LimitReader(resp.Body, dirlistBodyCap))
if err != nil {
// a truncated/aborted body still has a usable status; treat what we read
// as the body rather than dropping the whole response.
charmlog.Debugf("dirlist: read body: %v", err)
}
return responseMeta{
status: resp.StatusCode,
size: len(body),
words: countWords(body),
}, body
}
// countWords counts whitespace-separated tokens; the cheap proxy ffuf uses to
// tell a soft-404 stub apart from a real page of the same byte size.
func countWords(body []byte) int {
return len(strings.Fields(string(body)))
}
// expandWords appends each extension to every base word, keeping the bare word
// too. an empty extensions list returns the words unchanged.
func expandWords(words []string, extensions string) []string {
exts := splitExtensions(extensions)
if len(exts) == 0 {
return words
}
// each word yields itself plus one entry per extension.
expanded := make([]string, 0, len(words)*(len(exts)+1))
for i := 0; i < len(words); i++ {
expanded = append(expanded, words[i])
for j := 0; j < len(exts); j++ {
expanded = append(expanded, words[i]+"."+exts[j])
}
}
return expanded
}
// splitExtensions normalizes "php, .bak ,env" into ["php","bak","env"]; a
// leading dot is tolerated so both "php" and ".php" work.
func splitExtensions(raw string) []string {
if raw == "" {
return nil
}
parts := strings.Split(raw, ",")
exts := make([]string, 0, len(parts))
for i := 0; i < len(parts); i++ {
ext := strings.TrimSpace(parts[i])
ext = strings.TrimPrefix(ext, ".")
if ext != "" {
exts = append(exts, ext)
}
}
return exts
}
// loadWordlist reads the fuzzing words. a custom -w overrides the size switch:
// an http(s) value is fetched through the shared client, anything else is a
// local file. with no -w it downloads the size-selected sif-runtime list.
func loadWordlist(opts *DirlistOptions, size string, client *http.Client) ([]string, error) {
if opts.Wordlist != "" {
if strings.HasPrefix(opts.Wordlist, "http://") || strings.HasPrefix(opts.Wordlist, "https://") {
return fetchWordlist(opts.Wordlist, client)
}
return readWordlistFile(opts.Wordlist)
}
var file string
switch size {
case "small":
file = smallFile
case "medium":
file = mediumFile
case "large":
file = bigFile
default:
return nil, fmt.Errorf("unknown dirlist size %q", size)
}
return fetchWordlist(directoryURL+file, client)
}
// fetchWordlist downloads a remote wordlist through the shared client so proxy
// and rate-limit settings apply to the fetch too.
func fetchWordlist(listURL string, client *http.Client) ([]string, error) {
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, listURL, http.NoBody)
if err != nil {
return nil, fmt.Errorf("build wordlist request: %w", err)
}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("download wordlist %q: %w", listURL, err)
}
defer resp.Body.Close()
return scanLines(resp.Body), nil
}
// readWordlistFile loads a local wordlist file.
func readWordlistFile(path string) ([]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open wordlist %q: %w", path, err)
}
defer f.Close()
return scanLines(f), nil
}
// scanLines reads non-empty lines into a slice.
func scanLines(r io.Reader) []string {
var lines []string
scanner := bufio.NewScanner(r)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
lines = append(lines, line)
}
}
return lines
}
// calibrate probes a few paths that cannot exist and records the response shapes
// the catch-all hands them. those baselines feed the matcher so a soft-404 200
// (the SPA wildcard) is suppressed before the real run. deterministic by design:
// the probe paths come from the loop index, never a random source.
func calibrate(m *matcher, baseURL string, client *http.Client) {
for i := 0; i < calibrationProbes; i++ {
probe := baseURL + calibrationPrefix + strconv.Itoa(i)
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, probe, http.NoBody)
if err != nil {
charmlog.Debugf("dirlist: build calibration request: %v", err)
continue
}
resp, err := client.Do(req)
if err != nil {
charmlog.Debugf("dirlist: calibration probe %s: %v", probe, err)
continue
}
meta, _ := readMeta(resp)
resp.Body.Close()
// a genuine hard 404 already gets filtered by code; only soft responses
// (a 200/30x catch-all) need a size/word baseline to suppress them.
if meta.status == statusNotFound {
continue
}
if !containsBaseline(m.baselines, meta) {
m.baselines = append(m.baselines, meta)
}
}
}
// containsBaseline reports whether the shape is already recorded, so repeated
// probes returning the same soft-404 don't bloat the baseline set.
func containsBaseline(baselines []responseMeta, meta responseMeta) bool {
for i := 0; i < len(baselines); i++ {
if baselines[i] == meta {
return true
}
}
return false
}
// Dirlist performs directory fuzzing on the target URL with ffuf-style response
// filtering, soft-404 calibration and custom wordlists.
//
//nolint:gocritic // opts is the scanner's stable public config; passed by value to match the other scanners' entry points.
func Dirlist(size string, url string, timeout time.Duration, threads int, logdir string, opts DirlistOptions) (DirectoryResults, error) {
log := output.Module("DIRLIST")
log.Start()
@@ -55,35 +388,27 @@ func Dirlist(size string, url string, timeout time.Duration, threads int, logdir
}
}
var list string
switch size {
case "small":
list = directoryURL + smallFile
case "medium":
list = directoryURL + mediumFile
case "large":
list = directoryURL + bigFile
matcher, err := newMatcher(&opts)
if err != nil {
log.Error("invalid matcher flags: %v", err)
return nil, err
}
client := httpx.Client(timeout)
req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, list, http.NoBody)
directories, err := loadWordlist(&opts, size, client)
if err != nil {
log.Error("Error creating directory list request: %s", err)
log.Error("Error loading directory list: %s", err)
return nil, err
}
resp, err := client.Do(req)
if err != nil {
log.Error("Error downloading directory list: %s", err)
return nil, err
}
defer resp.Body.Close()
directories = expandWords(directories, opts.Extensions)
var directories []string
scanner := bufio.NewScanner(resp.Body)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
directories = append(directories, scanner.Text())
// -ac learns the wildcard baseline before the run so catch-all 200s drop.
if opts.Calibrate {
calibrate(matcher, url, client)
if len(matcher.baselines) > 0 {
log.Info("calibrated %d soft-404 baseline(s)", len(matcher.baselines))
}
}
progress := output.NewProgress(len(directories), "fuzzing")
@@ -92,7 +417,7 @@ func Dirlist(size string, url string, timeout time.Duration, threads int, logdir
var mu sync.Mutex
wg.Add(threads)
results := make([]DirectoryResult, 0, 64)
results := make(DirectoryResults, 0, 64)
for thread := 0; thread < threads; thread++ {
go func(thread int) {
defer wg.Done()
@@ -116,24 +441,35 @@ func Dirlist(size string, url string, timeout time.Duration, threads int, logdir
continue
}
if resp.StatusCode != 404 && resp.StatusCode != 403 {
progress.Pause()
log.Success("found: %s [%s]", output.Highlight.Render(directory), output.Status.Render(strconv.Itoa(resp.StatusCode)))
progress.Resume()
if logdir != "" {
_ = logger.Write(sanitizedURL, logdir, fmt.Sprintf("%s [%s]\n", strconv.Itoa(resp.StatusCode), directory))
}
result := DirectoryResult{
Url: resp.Request.URL.String(),
StatusCode: resp.StatusCode,
}
mu.Lock()
results = append(results, result)
mu.Unlock()
}
meta, body := readMeta(resp)
reqURL := resp.Request.URL.String()
resp.Body.Close()
if !matcher.Matches(meta, body) {
continue
}
progress.Pause()
log.Success("found: %s [%s] (size=%d words=%d)",
output.Highlight.Render(directory),
output.Status.Render(strconv.Itoa(meta.status)),
meta.size, meta.words)
progress.Resume()
if logdir != "" {
_ = logger.Write(sanitizedURL, logdir,
fmt.Sprintf("%s [%s] size=%d words=%d\n", strconv.Itoa(meta.status), directory, meta.size, meta.words))
}
result := DirectoryResult{
Url: reqURL,
StatusCode: meta.status,
Size: meta.size,
Words: meta.words,
}
mu.Lock()
results = append(results, result)
mu.Unlock()
}
}(thread)
}
+360
View File
@@ -0,0 +1,360 @@
/*
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
: :
: █▀ █ █▀▀ · Blazing-fast pentesting suite :
: ▄█ █ █▀ · BSD 3-Clause License :
: :
: (c) 2022-2026 vmfunc, xyzeva, :
: lunchcat alumni & contributors :
: :
·━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━·
*/
package scan
import (
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
"time"
)
func TestMatcher_Matches(t *testing.T) {
tests := []struct {
name string
opts DirlistOptions
meta responseMeta
body string
want bool
}{
{
// default behavior: 404/403 drop, everything else surfaces
name: "default keeps 200",
opts: DirlistOptions{},
meta: responseMeta{status: 200, size: 10, words: 2},
want: true,
},
{
name: "default drops 404",
opts: DirlistOptions{},
meta: responseMeta{status: 404, size: 9, words: 1},
want: false,
},
{
name: "default drops 403",
opts: DirlistOptions{},
meta: responseMeta{status: 403, size: 9, words: 1},
want: false,
},
{
// -mc is allow-list: only listed codes survive
name: "mc allowlist keeps listed",
opts: DirlistOptions{MatchCodes: "200,301"},
meta: responseMeta{status: 301, size: 0, words: 0},
want: true,
},
{
name: "mc allowlist drops unlisted 200 already excluded",
opts: DirlistOptions{MatchCodes: "301"},
meta: responseMeta{status: 200, size: 5, words: 1},
want: false,
},
{
name: "fc drops listed code",
opts: DirlistOptions{FilterCodes: "500"},
meta: responseMeta{status: 500, size: 5, words: 1},
want: false,
},
{
// with an explicit -fc and no -mc, the implicit 404/403 filter is not
// added, so a 200 still surfaces
name: "fc leaves others",
opts: DirlistOptions{FilterCodes: "500"},
meta: responseMeta{status: 200, size: 5, words: 1},
want: true,
},
{
name: "fs drops listed size",
opts: DirlistOptions{FilterSizes: "1024"},
meta: responseMeta{status: 200, size: 1024, words: 50},
want: false,
},
{
name: "fw drops listed word count",
opts: DirlistOptions{FilterWords: "7"},
meta: responseMeta{status: 200, size: 40, words: 7},
want: false,
},
{
name: "fr drops body match",
opts: DirlistOptions{FilterRegex: "not found"},
meta: responseMeta{status: 200, size: 9, words: 2},
body: "page not found",
want: false,
},
{
name: "fr keeps non-match",
opts: DirlistOptions{FilterRegex: "not found"},
meta: responseMeta{status: 200, size: 5, words: 1},
body: "welcome",
want: true,
},
{
// filter precedence: -mc would keep it, but a size filter drops it
name: "filter wins over match",
opts: DirlistOptions{MatchCodes: "200", FilterSizes: "12"},
meta: responseMeta{status: 200, size: 12, words: 3},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m, err := newMatcher(&tt.opts)
if err != nil {
t.Fatalf("newMatcher: %v", err)
}
if got := m.Matches(tt.meta, []byte(tt.body)); got != tt.want {
t.Errorf("Matches(%+v, %q) = %v, want %v", tt.meta, tt.body, got, tt.want)
}
})
}
}
func TestMatcher_BaselineSuppresses(t *testing.T) {
m, err := newMatcher(&DirlistOptions{})
if err != nil {
t.Fatalf("newMatcher: %v", err)
}
// a calibrated soft-404 shape drops an identical response
m.baselines = []responseMeta{{status: 200, size: 42, words: 5}}
soft := responseMeta{status: 200, size: 42, words: 5}
if m.Matches(soft, nil) {
t.Error("baseline-matching response should be suppressed")
}
// a real page with a different size must still surface
livePage := responseMeta{status: 200, size: 99, words: 12}
if !m.Matches(livePage, nil) {
t.Error("distinct response should not be suppressed by baseline")
}
}
func TestNewMatcher_InvalidFlags(t *testing.T) {
tests := []struct {
name string
opts DirlistOptions
}{
{"bad mc", DirlistOptions{MatchCodes: "abc"}},
{"bad fc", DirlistOptions{FilterCodes: "20x"}},
{"bad fs", DirlistOptions{FilterSizes: "big"}},
{"bad fw", DirlistOptions{FilterWords: "-"}},
{"bad regex", DirlistOptions{FilterRegex: "("}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if _, err := newMatcher(&tt.opts); err == nil {
t.Errorf("newMatcher(%+v) expected error, got nil", tt.opts)
}
})
}
}
func TestExpandWords(t *testing.T) {
tests := []struct {
name string
words []string
exts string
want []string
}{
{
name: "no extensions unchanged",
words: []string{"admin", "login"},
exts: "",
want: []string{"admin", "login"},
},
{
name: "appends each extension and keeps bare",
words: []string{"config"},
exts: "php,bak,env",
want: []string{"config", "config.php", "config.bak", "config.env"},
},
{
name: "tolerates leading dot and spaces",
words: []string{"db"},
exts: " .sql , bak ",
want: []string{"db", "db.sql", "db.bak"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := expandWords(tt.words, tt.exts)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("expandWords(%v, %q) = %v, want %v", tt.words, tt.exts, got, tt.want)
}
})
}
}
// softWildcardApp serves a couple of real paths and a catch-all soft-404: every
// unknown path returns a fixed 200 body, the SPA pattern that floods dirlist.
func softWildcardApp() *httptest.Server {
const softBody = "<html><body>app shell - route handled client side</body></html>"
mux := http.NewServeMux()
mux.HandleFunc("/admin", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("<html><body>admin control panel dashboard here</body></html>"))
})
mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("<html><body>please sign in with your account credentials now</body></html>"))
})
// catch-all: anything else gets the identical soft-404 shell
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/admin" || r.URL.Path == "/login" {
return
}
w.Write([]byte(softBody))
})
return httptest.NewServer(mux)
}
func TestDirlist_CalibrationSuppressesWildcard(t *testing.T) {
srv := softWildcardApp()
defer srv.Close()
// the wordlist mixes the two real paths with several bogus ones the catch-all
// answers with the soft-404 shell.
dir := t.TempDir()
wordlist := filepath.Join(dir, "words.txt")
if err := os.WriteFile(wordlist, []byte("admin\nlogin\nnope\nbogus\nmissing\n"), 0o600); err != nil {
t.Fatalf("write wordlist: %v", err)
}
// without calibration every bogus path is a soft-404 200 and floods output
noAC, err := Dirlist("small", srv.URL, 5*time.Second, 3, "", DirlistOptions{Wordlist: wordlist})
if err != nil {
t.Fatalf("Dirlist (no -ac): %v", err)
}
if len(noAC) < 5 {
t.Fatalf("expected the wildcard to flood all 5 paths without -ac, got %d", len(noAC))
}
// with -ac the soft-404 baseline is learned and the bogus paths drop
withAC, err := Dirlist("small", srv.URL, 5*time.Second, 3, "", DirlistOptions{
Wordlist: wordlist,
Calibrate: true,
})
if err != nil {
t.Fatalf("Dirlist (-ac): %v", err)
}
got := pathSet(withAC)
if !has(got, "/admin") || !has(got, "/login") {
t.Errorf("real paths admin/login must still surface with -ac, got %v", sortedKeys(got))
}
for _, bogus := range []string{"/nope", "/bogus", "/missing"} {
if has(got, bogus) {
t.Errorf("soft-404 path %s should be suppressed by -ac, got %v", bogus, sortedKeys(got))
}
}
}
func TestDirlist_ExtensionExpansion(t *testing.T) {
// the server only answers config.php; the bare word and other extensions hit
// the catch-all soft-404, so -e must be what surfaces config.php.
const realBody = "<?php // database connection settings live here ?>"
mux := http.NewServeMux()
mux.HandleFunc("/config.php", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(realBody))
})
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
http.NotFound(w, r) // hard 404 for everything but config.php
})
srv := httptest.NewServer(mux)
defer srv.Close()
dir := t.TempDir()
wordlist := filepath.Join(dir, "words.txt")
if err := os.WriteFile(wordlist, []byte("config\n"), 0o600); err != nil {
t.Fatalf("write wordlist: %v", err)
}
results, err := Dirlist("small", srv.URL, 5*time.Second, 2, "", DirlistOptions{
Wordlist: wordlist,
Extensions: "php,bak",
})
if err != nil {
t.Fatalf("Dirlist: %v", err)
}
got := pathSet(results)
if !has(got, "/config.php") {
t.Errorf("expected -e to surface config.php, got %v", sortedKeys(got))
}
if has(got, "/config") || has(got, "/config.bak") {
t.Errorf("only config.php exists; bare word and .bak are 404s, got %v", sortedKeys(got))
}
}
func TestDirlist_LocalWordlistOverridesSize(t *testing.T) {
// a local -w must be used verbatim and never touch directoryURL; point the
// remote at a sink that fails the test if it's ever hit.
orig := directoryURL
directoryURL = "http://127.0.0.1:0/should-not-be-fetched/"
defer func() { directoryURL = orig }()
mux := http.NewServeMux()
mux.HandleFunc("/secret", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("<html>top secret area found</html>"))
})
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
http.NotFound(w, r)
})
srv := httptest.NewServer(mux)
defer srv.Close()
dir := t.TempDir()
wordlist := filepath.Join(dir, "custom.txt")
if err := os.WriteFile(wordlist, []byte("secret\nabsent\n"), 0o600); err != nil {
t.Fatalf("write wordlist: %v", err)
}
results, err := Dirlist("large", srv.URL, 5*time.Second, 2, "", DirlistOptions{Wordlist: wordlist})
if err != nil {
t.Fatalf("Dirlist: %v", err)
}
got := pathSet(results)
if !has(got, "/secret") {
t.Errorf("expected the custom wordlist to find /secret, got %v", sortedKeys(got))
}
if has(got, "/absent") {
t.Errorf("/absent is a 404 and should not surface, got %v", sortedKeys(got))
}
}
// pathSet collects each result's url path for membership checks. it reuses the
// package-level sortedKeys (crawl.go) for deterministic failure output.
func pathSet(results DirectoryResults) map[string]struct{} {
set := make(map[string]struct{}, len(results))
for i := 0; i < len(results); i++ {
if idx := strings.Index(results[i].Url, "://"); idx >= 0 {
rest := results[i].Url[idx+len("://"):]
if slash := strings.Index(rest, "/"); slash >= 0 {
set[rest[slash:]] = struct{}{}
continue
}
}
set[results[i].Url] = struct{}{}
}
return set
}
// has is a tiny readability helper for set membership in assertions.
func has(set map[string]struct{}, key string) bool {
_, ok := set[key]
return ok
}
+1 -1
View File
@@ -134,7 +134,7 @@ func TestIntegrationDirlist(t *testing.T) {
directoryURL = srv.URL + "/"
defer func() { directoryURL = orig }()
results, err := Dirlist("small", srv.URL, 5*time.Second, 3, "")
results, err := Dirlist("small", srv.URL, 5*time.Second, 3, "", DirlistOptions{})
if err != nil {
t.Fatalf("Dirlist: %v", err)
}
+24
View File
@@ -38,6 +38,30 @@ file with one url per line.
.BR \-dirlist " \fIsize\fR"
directory and file fuzzing (small/medium/large).
.TP
.BR \-mc " \fIcodes\fR"
dirlist: match only these status codes (comma list, e.g. 200,301).
.TP
.BR \-fc " \fIcodes\fR"
dirlist: filter out these status codes (comma list).
.TP
.BR \-fs " \fIsizes\fR"
dirlist: filter out responses of these body sizes (comma list).
.TP
.BR \-fw " \fIcounts\fR"
dirlist: filter out responses with these word counts (comma list).
.TP
.BR \-fr " \fIregex\fR"
dirlist: filter out responses whose body matches this regex.
.TP
.B \-ac
dirlist: auto\-calibrate the soft\-404 wildcard baseline so catch\-all 200s are dropped.
.TP
.BR \-w " \fIpath|url\fR"
dirlist: custom wordlist (local file or url); overrides the \fB\-dirlist\fR size.
.TP
.BR \-e " \fIexts\fR"
dirlist: extensions appended to each word (comma list, e.g. php,bak,env).
.TP
.BR \-dnslist " \fIsize\fR"
subdomain enumeration (small/medium/large).
.TP
+11 -2
View File
@@ -231,11 +231,20 @@ func (app *App) Run() error {
}
if app.settings.Dirlist != "none" {
result, err := scan.Dirlist(app.settings.Dirlist, url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir)
result, err := scan.Dirlist(app.settings.Dirlist, url, app.settings.Timeout, app.settings.Threads, app.settings.LogDir, scan.DirlistOptions{
MatchCodes: app.settings.DirMatchCodes,
FilterCodes: app.settings.DirFilterCodes,
FilterSizes: app.settings.DirFilterSizes,
FilterWords: app.settings.DirFilterWords,
FilterRegex: app.settings.DirFilterRegex,
Calibrate: app.settings.DirCalibrate,
Wordlist: app.settings.DirWordlist,
Extensions: app.settings.DirExtensions,
})
if err != nil {
log.Errorf("Error while running directory scan: %s", err)
} else {
moduleResults = append(moduleResults, ModuleResult{"dirlist", result})
moduleResults = append(moduleResults, NewModuleResult(result))
scansRun = append(scansRun, "Directory Listing")
}
}