# Canonical host: https://aideazz.xyz (apex). www → apex 301 in DNS/hosting + GSC property on apex.
# GEO: explicit Allow for known AI/search crawlers (each can be tuned independently; training vs citations).

# --- Classic search & previews ---
User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: Applebot
Allow: /

User-agent: Twitterbot
Allow: /

User-agent: facebookexternalhit
Allow: /

User-agent: LinkedInBot
Allow: /

User-agent: Slackbot
Allow: /

User-agent: Discordbot
Allow: /

# --- Google: Gemini / Vertex AI training signal (token — explicit opt-in to crawl use for AI) ---
User-agent: Google-Extended
Allow: /

# --- Apple: Apple Intelligence training signal (token — explicit opt-in) ---
User-agent: Applebot-Extended
Allow: /

# --- OpenAI: training, ChatGPT browsing, ChatGPT search / citations ---
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

# --- Anthropic ---
User-agent: ClaudeBot
Allow: /

# --- Perplexity ---
User-agent: PerplexityBot
Allow: /

# --- Meta (AI + link previews; UA strings vary — list common tokens) ---
User-agent: Meta-ExternalAgent
Allow: /

User-agent: FacebookBot
Allow: /

# --- Common Crawl (feeds many downstream models) ---
User-agent: CCBot
Allow: /

# --- Other AI / ML crawlers often cited in GEO guides ---
User-agent: Amazonbot
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: Diffbot
Allow: /

User-agent: Bytespider
Allow: /

# --- Fallback: everyone else (must remain Allow for crawlers not listed above) ---
User-agent: *
Allow: /

# Hard deny only junk paths (never index secrets-as-filenames at origin)
Disallow: /.gitignore
Disallow: /gitignore

Sitemap: https://aideazz.xyz/sitemap.txt
Sitemap: https://aideazz.xyz/sitemap.xml