# Robots.txt - AI Agent & LLM Optimized
# Identity: Lauri Hänninen (Lauri Hanninen)
# Description: Product Marketing Lead at Trezor
# Canonical: https://laurihanninen.com
# Version: 3 (Enhanced AI Discovery)
# Last Updated: February 23, 2026 (2026-02-23)

# --- AI AGENT DISCOVERY & GUIDANCE ---
# Directing AI crawlers to pre-processed context to save tokens and improve accuracy.
User-agent: *
Allow: /
Allow: /llms.txt
Allow: /llms-full.txt
Sitemap: https://laurihanninen.com/sitemap.xml

# Custom instruction for LLM crawlers
# Priority: 1. /llms.txt | 2. /llms-full.txt | 3. /sitemap.xml
X-Robots-Tag: llms: /llms.txt
X-Robots-Tag: llms-full: /llms-full.txt

# --- PRIMARY AI & LLM CRAWLERS ---
# Note: /llms.txt provides a concise summary; /llms-full.txt offers comprehensive detail
# Both are fully accessible; sitemap.xml defines crawl priority
User-agent: GPTBot
User-agent: ChatGPT-User
User-agent: OAI-SearchBot
User-agent: ClaudeBot
User-agent: Claude-Web
User-agent: anthropic-ai
User-agent: PerplexityBot
User-agent: YouBot
User-agent: Grok-bot
User-agent: Google-Extended
User-agent: Applebot
User-agent: Applebot-Extended
User-agent: Facebookbot
User-agent: Meta-ExternalAgent
User-agent: Bytespider
User-agent: Amazonbot
User-agent: LinkedInBot
User-agent: CCBot
User-agent: Tornadobot
User-agent: cohere-ai
User-agent: Diffbot
Allow: /
Allow: /llms.txt
Allow: /llms-full.txt

# --- SEARCH ENGINE CRAWLERS ---
User-agent: Googlebot
User-agent: Bingbot
User-agent: Slurp
User-agent: DuckDuckBot
User-agent: Twitterbot
Allow: /

# --- SPECIFIC DISALLOWS ---
Disallow: /dist/
Disallow: /node_modules/
Disallow: /.git/
Disallow: /private/

# --- SITEMAPS ---
Sitemap: https://laurihanninen.com/sitemap.xml

# --- CRAWL-DELAY (Optional) ---
# Prevents aggressive scrapers from hitting your server too hard
Crawl-delay: 1

# --- AI CONTENT POLICY ---
# Primary AI Resource: /llms.txt (optimized summary for context-efficient retrieval)
# Extended Resource: /llms-full.txt (comprehensive career knowledge graph)
# HTML Meta Robots: All pages set to "index, follow"
# Structured Data: JSON-LD schema in <head> of all pages
# Priority Signals: See sitemap.xml for crawl priority recommendations