Sitemap: https://wageindicator.fi/sitemap.xml.gz # Define access-restrictions for robots/spiders # http://www.robotstxt.org/wc/norobots.html # By default we allow robots to access all areas of our site # already accessible to anonymous users User-agent: * Disallow: crawl-delay: 4 User-agent: www.deadlinkchecker.com Crawl-delay: 1 # Add Googlebot-specific syntax extension to exclude forms # that are repeated for each piece of content in the site # the wildcard is only supported by Googlebot # http://www.google.com/support/webmasters/bin/answer.py?answer=40367&ctx=sibling User-Agent: Googlebot # we want pages like our landing pages to be indexed (?job-id=7412100000000) # Disallow: /*? Disallow: /*atct_album_view$ Disallow: /*folder_factories$ Disallow: /*folder_summary_view$ Disallow: /*login_form$ Disallow: /*mail_password_form$ Disallow: /@@search Disallow: /*search_rss$ Disallow: /*sendto_form$ Disallow: /*summary_view$ Disallow: /*thumbnail_view$ Disallow: /*?job-id=* Disallow: /google-search-result?q=* # waarschijnlijk kan het geen kwaad om deze "view" aan het einde van de URL te indexeren? # Disallow: /*view$ # Do not index archive folders with this ID Disallow: /*archive-no-index$