# Content Signals — declare AI content usage preferences
# Spec: https://contentsignals.org/
#   ai-train  — allow content to be used for AI model training
#   search    — allow content to be indexed by traditional search engines
#   ai-input  — allow content to be used as input to AI applications (e.g. RAG)
Content-Signal: ai-train=no, search=yes, ai-input=yes

# Default: every crawler not named below is welcome.
User-agent: *
Allow: /
Disallow: /review-card.html
Disallow: /review-card.pdf
Disallow: /review-qr.png

# Training-oriented AI crawlers. Content-Signal communicates the same intent
# declaratively, but explicit User-agent rules are respected by every crawler,
# including older ones that do not yet honour Content-Signal.
User-agent: GPTBot
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: ClaudeBot-Train
Disallow: /

# Search / retrieval / live-browsing AI crawlers — allowed (matches ai-input=yes,
# search=yes). Listed explicitly so future tooling that audits robots.txt can
# confirm intent without parsing Content-Signal.
User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: GoogleOther
Allow: /

User-agent: Google-Extended
Disallow: /

User-agent: Applebot-Extended
Disallow: /

Sitemap: https://virginiamitchell.ca/sitemap-index.xml