mirror of
https://github.com/Jieyab89/OSINT-Cheat-sheet.git
synced 2026-06-12 11:01:18 -07:00
44 lines
696 B
Plaintext
44 lines
696 B
Plaintext
User-agent: *
|
|
Allow: /
|
|
|
|
# Optional crawl delay
|
|
Crawl-delay: 5
|
|
|
|
# Sitemap
|
|
Sitemap: https://jieyab89.github.io/OSINT-Cheat-sheet/sitemap.xml
|
|
Sitemap: https://jieyab89-osint.gitbook.io/jieyab89-osint-cheat-sheet-wiki-tips/sitemap-pages.xml
|
|
|
|
# Block unnecessary paths
|
|
Disallow: /Script
|
|
Disallow: /mail
|
|
|
|
# AI / LLM discovery
|
|
Allow: /llms.txt
|
|
|
|
# Block archive crawlers
|
|
User-agent: ia_archiver
|
|
Disallow: /
|
|
|
|
User-agent: archive.org_bot
|
|
Disallow: /
|
|
|
|
User-agent: archivebot
|
|
Disallow: /
|
|
|
|
User-agent: wayback
|
|
Disallow: /
|
|
|
|
User-agent: waybackbot
|
|
Disallow: /
|
|
|
|
User-agent: archive-is
|
|
Disallow: /
|
|
|
|
User-agent: archive.today_bot
|
|
Disallow: /
|
|
|
|
User-agent: archive.ph
|
|
Disallow: /
|
|
|
|
User-agent: saveweb2zip
|
|
Disallow: / |