################################################################ # To those that want to pinch this file for themselves, know # this, it is a hints file, not a Holy Law writ large and # carved in stone. # Ergo, should you really want to secure something, refer to an # Internet Security professional, or at least someone who can # demonstrate both competency and skill. # Remember this, the Robots.txt' will often be entirely ignored # and IS NOT a security measure, it's just a recommended # suggestion list for attentive Internet Spiders, on how to # behave, while at this site. # # Author: John [dot] Harris [at] NCHD [dot] NET # # Copyright: Creative Commons License # creativecommons [dot] org # # Revision: 2.5 # # Change-log: File order further revised, for seriously impaired # bots, who can't parse a robots text file. # # Last Modified: 2007-09-02 ################################################################ # The following are allowed in, assuming they behave ##################################### # InfoSeek User-agent: Sidewinder # Altavista User-agent: Scooter # Lycos User-agent: T-Rex # Excite User-agent: Architext # Google User-agent: Backrub # Google User-agent: GoogleBot # Thumbnail the Internet? User-agent: girafabot # Whois source User-agent: SurveyBot # Wayback machine User-agent: ia_archiver # Thunderstone / Webinator User-agent: Webinator # Thunderstone / Webinator User-agent: T-H-U-N-D-E-R-S-T-O-N-E # # ##################################### # List exclusions first - All spiders ##################################### User-agent: * # Disallow: /_borders/ Disallow: /_derived/ Disallow: /_fpclass/ Disallow: /_overlay/ Disallow: /_private/ Disallow: /_themes/ Disallow: /_vti_bin/ Disallow: /_vti_cnf/ Disallow: /_vti_log/ Disallow: /_vti_map/ Disallow: /_vti_pvt/ Disallow: /_vti_txt/ # ############################### # This is what we DON'T want # you to play with, or even # look at ############################### # # This is NOT to be indexed; # No point, no use-able content. # As we use PHP Templates Disallow: /styles/ # # This is a virtual URL space. # No point, no use-able content. Disallow: /central/ # these are copyright - not # to be indexed Disallow: /images/ # # these are not to be indexed # No point, no use-able content. Disallow: /code/ # # This is NOT to be indexed, nor # should it even be visible Disallow: /Zend/ # This is NOT to be indexed, nor # should it even be visible Disallow: /usage/ # This is NOT to be indexed, nor # should it even be visible Disallow: /mrtg/ # # This is not to be indexed # No point, no use-able content. Disallow: /main/ # # This is NOT to be indexed, nor # should it even be visible Disallow: /bin/ # This is NOT to be indexed, nor # should it even be visible Disallow: /cgi-bin/ # This is NOT to be indexed, nor # should it even be visible Disallow: /search/ # This is NOT to be indexed, nor # should it even be visible Disallow: /query/ # This is NOT to be indexed, nor # should it even be visible Disallow: /help/ # # This is NOT to be indexed. # No point, no use-able content. # just scripts. Disallow: /jsp/ # # This is NOT to be indexed. # No point, no use-able content. # Mostly Just PHP stuff. Disallow: /templates/ # # ############################### # List of Search Bots we # seriously don't like ############################### User-agent: msnbot Disallow: / # ############################### # In Theory, all other # unknown search bots are # unwelcome. # # So, SOD OFF # User-agent: # Can't use wild cards, it confuses the idiot's # # so we have to identify them individually # Disallow: / ################################################################