Ver Mensaje Individual
  #2 (permalink)  
Antiguo 04/04/2008, 20:14
usermax
(Desactivado)
 
Fecha de Ingreso: diciembre-2006
Mensajes: 529
Antigüedad: 17 años, 5 meses
Puntos: 11
Re: Lista de nombres de spider-bots

Al final... encontré una lista muy buena.
Por si a alguien alguna vez le sirve aqui va:

Son los nombres de los Spider-bots que te van a permitir saber si un robot ha indexado tu website implementando esto en algún sistema de estadísticas web.

Código:
 
(1, 'acme.spider', 'Acme Spider'),
(2, 'ahoythehomepagefinder', 'Ahoy! The Homepage Finder'),
(3, 'alkaline', 'Alkaline'),
(4, 'appie', 'Walhello appie'),
(5, 'arachnophilia', 'Arachnophilia'),
(6, 'architext', 'ArchitextSpider'),
(7, 'aretha', 'Aretha'),
(8, 'ariadne', 'ARIADNE'),
(9, 'arks', 'arks'),
(10, 'aspider', 'ASpider (Associative Spider)'),
(11, 'atn.txt', 'ATN Worldwide'),
(12, 'atomz', 'Atomz.com Search Robot'),
(13, 'auresys', 'AURESYS'),
(14, 'backrub', 'BackRub'),
(15, 'biUKrother', 'Big Brother'),
(16, 'bjaaland', 'Bjaaland'),
(17, 'blackwidow', 'BlackWidow'),
(18, 'blindekuh', 'Die Blinde Kuh'),
(19, 'bloodhound', 'Bloodhound'),
(20, 'brightnet', 'bright.net caching robot'),
(21, 'bspider', 'BSpider'),
(22, 'cactvschemistryspider', 'CACTVS Chemistry Spider'),
(23, 'calif[^r]', 'Calif'),
(24, 'cassandra', 'Cassandra'),
(25, 'cgireader', 'Digimarc Marcspider/CGI'),
(26, 'checkbot', 'Checkbot'),
(27, 'churl', 'churl'),
(28, 'cmc', 'CMC/0.01'),
(29, 'collective', 'Collective'),
(30, 'combine', 'Combine System'),
(31, 'conceptbot', 'Conceptbot'),
(32, 'coolbot', 'CoolBot'),
(33, 'core', 'Web Core / Roots'),
(34, 'cosmos', 'XYLEME Robot'),
(35, 'cruiser', 'Internet Cruiser Robot'),
(36, 'cusco', 'Cusco'),
(37, 'cyberspyder', 'CyberSpyder Link Test'),
(38, 'deweb', 'DeWeb(c) Katalog/Index'),
(39, 'dienstspider', 'DienstSpider'),
(40, 'digger', 'Digger'),
(41, 'diibot', 'Digital Integrity Robot'),
(42, 'directhit', 'Direct Hit Grabber'),
(43, 'dnabot', 'DNAbot'),
(44, 'download_express', 'DownLoad Express'),
(45, 'dragonbot', 'DragonBot'),
(46, 'dwcp', 'DWCP (Dridus Web Cataloging Project)'),
(47, 'e-collector', 'e-collector'),
(48, 'ebiness', 'EbiNess'),
(49, 'eit', 'EIT Link Verifier Robot'),
(50, 'elfinbot', 'ELFINBOT'),
(51, 'emacs', 'Emacs-w3 Search Engine'),
(52, 'emcspider', 'ananzi'),
(53, 'esther', 'Esther'),
(54, 'evliyacelebi', 'Evliya Celebi'),
(55, 'nzexplorer', 'nzexplorer'),
(56, 'fdse', 'Fluid Dynamics Search Engine robot'),
(57, 'felix', 'Felix IDE'),
(58, 'ferret', 'Wild Ferret Web Hopper #1, #2, #3'),
(59, 'fetchrover', 'FetchRover'),
(60, 'fido', 'fido'),
(61, 'finnish', 'Hämähäkki'),
(62, 'fireball', 'KIT-Fireball'),
(63, '[^a]fish', 'Fish search'),
(64, 'fouineur', 'Fouineur'),
(65, 'francoroute', 'Robot Francoroute'),
(66, 'freecrawl', 'Freecrawl'),
(67, 'funnelweb', 'FunnelWeb'),
(68, 'gama', 'gammaSpider, FocusedCrawler'),
(69, 'gazz', 'gazz'),
(70, 'gcreep', 'GCreep'),
(71, 'getbot', 'GetBot'),
(72, 'geturl', 'GetURL'),
(73, 'golem', 'Golem'),
(74, 'googlebot', 'Googlebot (Google)'),
(75, 'grapnel', 'Grapnel/0.01 Experiment'),
(76, 'griffon', 'Griffon'),
(77, 'gromit', 'Gromit'),
(78, 'gulliver', 'Northern Light Gulliver'),
(79, 'hambot', 'HamBot'),
(80, 'harvest', 'Harvest'),
(81, 'havindex', 'havIndex'),
(82, 'hometown', 'Hometown Spider Pro'),
(83, 'htdig', 'ht://Dig'),
(84, 'htmlgobble', 'HTMLgobble'),
(85, 'hyperdecontextualizer', 'Hyper-Decontextualizer'),
(86, 'iajabot', 'iajaBot'),
(87, 'ibm', 'IBM_Planetwide'),
(88, 'iconoclast', 'Popular Iconoclast'),
(89, 'ilse', 'Ingrid'),
(90, 'imagelock', 'Imagelock'),
(91, 'incywincy', 'IncyWincy'),
(92, 'informant', 'Informant'),
(93, 'infoseek', 'InfoSeek Robot 1.0'),
(94, 'infoseeksidewinder', 'Infoseek Sidewinder'),
(95, 'infospider', 'InfoSpiders'),
(96, 'inspectorwww', 'Inspector Web'),
(97, 'intelliagent', 'IntelliAgent'),
(98, 'irobot', 'I, Robot'),
(99, 'iron33', 'Iron33'),
(100, 'israelisearch', 'Israeli-search'),
(101, 'javabee', 'JavaBee'),
(102, 'jbot', 'JBot Java Web Robot'),
(103, 'jcrawler', 'JCrawler'),
(104, 'jeeves', 'Jeeves'),
(105, 'jobo', 'JoBo Java Web Robot'),
(106, 'jobot', 'Jobot'),
(107, 'joebot', 'JoeBot'),
(108, 'jubii', 'The Jubii Indexing Robot'),
(109, 'jumpstation', 'JumpStation'),
(110, 'katipo', 'Katipo'),
(111, 'kdd', 'KDD-Explorer'),
(112, 'kilroy', 'Kilroy'),
(113, 'ko_yappo_robot', 'KO_Yappo_Robot'),
(114, 'labelgrabber.txt', 'LabelGrabber'),
(115, 'larbin', 'larbin'),
(116, 'legs', 'legs'),
(117, 'linkidator', 'Link Validator'),
(118, 'linkscan', 'LinkScan'),
(119, 'linkwalker', 'LinkWalker'),
(120, 'lockon', 'Lockon'),
(121, 'logo_gif', 'logo.gif Crawler'),
(122, 'lycos', 'Lycos'),
(123, 'macworm', 'Mac WWWWorm'),
(124, 'magpie', 'Magpie'),
(125, 'marvin', 'marvin/infoseek'),
(126, 'mattie', 'Mattie'),
(127, 'mediafox', 'MediaFox'),
(128, 'merzscope', 'MerzScope'),
(129, 'meshexplorer', 'NEC-MeshExplorer'),
(130, 'mindcrawler', 'MindCrawler'),
(131, 'moget', 'moget'),
(132, 'momspider', 'MOMspider'),
(133, 'monster', 'Monster'),
(134, 'motor', 'Motor'),
(135, 'muscatferret', 'Muscat Ferret'),
(136, 'mwdsearch', 'Mwd.Search'),
(137, 'myweb', 'Internet Shinchakubin'),
(138, 'netcarta', 'NetCarta WebMap Engine'),
(139, 'netcraft', 'Netcraft Web Server Survey'),
(140, 'netmechanic', 'NetMechanic'),
(141, 'netscoop', 'NetScoop'),
(142, 'newscan-online', 'newscan-online'),
(143, 'nhse', 'NHSE Web Forager'),
(144, 'nomad', 'Nomad'),
(145, 'northstar', 'The NorthStar Robot'),
(146, 'occam', 'Occam'),
(147, 'octopus', 'HKU WWW Octopus'),
(148, 'openfind', 'Openfind data gatherer'),
(149, 'orb_search', 'Orb Search'),
(150, 'packrat', 'Pack Rat'),
(151, 'pageboy', 'PageBoy'),
(152, 'parasite', 'ParaSite'),
(153, 'patric', 'Patric'),
(154, 'pegasus', 'pegasus'),
(155, 'perignator', 'The Peregrinator'),
(156, 'perlcrawler', 'PerlCrawler 1.0'),
(157, 'phantom', 'Phantom'),
(158, 'piltdownman', 'PiltdownMan'),
(159, 'pimptrain', 'Pimptrain.com\'s robot'),
(160, 'pioneer', 'Pioneer'),
(161, 'pitkow', 'html_analyzer'),
(162, 'pjspider', 'Portal Juice Spider'),
(163, 'pka', 'PGP Key Agent'),
(164, 'plumtreewebaccessor', 'PlumtreeWebAccessor'),
(165, 'poppi', 'Poppi'),
(166, 'portalb', 'PortalB Spider'),
(167, 'puu', 'GetterroboPlus Puu'),
(168, 'python', 'The Python Robot'),
(169, 'raven', 'Raven Search'),
(170, 'rbse', 'RBSE Spider'),
(171, 'resumerobot', 'Resume Robot'),
(172, 'rhcs', 'RoadHouse Crawling System'),
(173, 'roadrunner', 'Road Runner: The ImageScape Robot'),
(174, 'robbie', 'Robbie the Robot'),
(175, 'robi', 'ComputingSite Robi/1.0'),
(176, 'robofox', 'RoboFox'),
(177, 'robozilla', 'Robozilla'),
(178, 'roverbot', 'Roverbot'),
(179, 'rules', 'RuLeS'),
(180, 'safetynetrobot', 'SafetyNet Robot'),
(181, 'scooter', 'Scooter (AltaVista)'),
(182, 'search_au', 'Search.Aus-AU.COM'),
(183, 'searchprocess', 'SearchProcess'),
(184, 'senrigan', 'Senrigan'),
(185, 'sgscout', 'SG-Scout'),
(186, 'shaggy', 'ShagSeeker'),
(187, 'shaihulud', 'Shai\'Hulud'),
(188, 'sift', 'Sift'),
(189, 'simbot', 'Simmany Robot Ver1.0'),
(190, 'site-valet', 'Site Valet'),
(191, 'sitegrabber', 'Open Text Index Robot'),
(192, 'sitetech', 'SiteTech-Rover'),
(193, 'slcrawler', 'SLCrawler'),
(194, 'slurp', 'Inktomi Slurp'),
(195, 'smartspider', 'Smart Spider'),
(196, 'snooper', 'Snooper'),
(197, 'solbot', 'Solbot'),
(198, 'spanner', 'Spanner'),
(199, 'speedy', 'Speedy Spider'),
(200, 'spider_monkey', 'spider_monkey'),
(201, 'spiderbot', 'SpiderBot'),
(202, 'spiderline', 'Spiderline Crawler'),
(203, 'spiderman', 'SpiderMan'),
(204, 'spiderview', 'SpiderView(tm)'),
(205, 'spry', 'Spry Wizard Robot'),
(206, 'ssearcher', 'Site Searcher'),
(207, 'suke', 'Suke'),
(208, 'suntek', 'suntek search engine'),
(209, 'sven', 'Sven'),
(210, 'tach_bw', 'TACH Black Widow'),
(211, 'tarantula', 'Tarantula'),
(212, 'tarspider', 'tarspider'),
(213, 'techbot', 'TechBOT'),
(214, 'templeton', 'Templeton'),
(215, 'teoma_agent1', 'TeomaTechnologies'),
(216, 'titin', 'TitIn'),
(217, 'titan', 'TITAN'),
(218, 'tkwww', 'The TkWWW Robot'),
(219, 'tlspider', 'TLSpider'),
(220, 'ucsd', 'UCSD Crawl'),
(221, 'udmsearch', 'UdmSearch'),
(222, 'urlck', 'URL Check'),
(223, 'valkyrie', 'Valkyrie'),
(224, 'victoria', 'Victoria'),
(225, 'visionsearch', 'vision-search'),
(226, 'voyager', 'Voyager'),
(227, 'vwbot', 'VWbot'),
(228, 'w3index', 'The NWI Robot'),
(229, 'w3m2', 'W3M2'),
(230, 'wallpaper', 'WallPaper'),
(231, 'wanderer', 'the World Wide Web Wanderer'),
(232, 'wapspider', 'w@pSpider by wap4.com'),
(233, 'webbandit', 'WebBandit Web Spider'),
(234, 'webcatcher', 'WebCatcher'),
(235, 'webcopy', 'WebCopy'),
(236, 'webfetcher', 'Webfetcher'),
(237, 'webfoot', 'The Webfoot Robot'),
(238, 'weblayers', 'Weblayers'),
(239, 'weblinker', 'WebLinker'),
(240, 'webmirror', 'WebMirror'),
(241, 'webmoose', 'The Web Moose'),
(242, 'webquest', 'WebQuest'),
(243, 'webreader', 'Digimarc MarcSpider'),
(244, 'webreaper', 'WebReaper'),
(245, 'websnarf', 'Websnarf'),
(246, 'webspider', 'WebSpider'),
(247, 'webvac', 'WebVac'),
(248, 'webwalk', 'webwalk'),
(249, 'webwalker', 'WebWalker'),
(250, 'webwatch', 'WebWatch'),
(251, 'wget', 'Wget'),
(252, 'whatuseek', 'whatUseek Winona'),
(253, 'whowhere', 'WhoWhere Robot'),
(254, 'wired-digital', 'Wired Digital'),
(255, 'wmir', 'w3mir'),
(256, 'wolp', 'WebStolperer'),
(257, 'wombat', 'The Web Wombat'),
(258, 'worm', 'The World Wide Web Worm'),
(259, 'wwwc', 'WWWC Ver 0.2.5'),
(260, 'wz101', 'WebZinger'),
(261, 'xget', 'XGET'),
(262, 'nederland.zoek', 'Nederland.zoek'),
(263, 'antibot', 'Antibot'),
(264, 'awbot', 'AWBot'),
(265, 'baiduspider', 'BaiDuSpider'),
(266, 'bobby', 'Bobby'),
(267, 'boris', 'Boris'),
(268, 'bumblebee', 'Bumblebee (relevare.com)'),
(269, 'cscrawler', 'CsCrawler'),
(270, 'daviesbot', 'DaviesBot'),
(271, 'digout4u', 'Digout4u'),
(272, 'echo', 'EchO!'),
(273, 'exactseek', 'ExactSeek Crawler'),
(274, 'ezresult', 'Ezresult'),
(275, 'fast-webcrawler', 'Fast-Webcrawler (AllTheWeb)'),
(276, 'gigabot', 'GigaBot'),
(277, 'gnodspider', 'GNOD Spider'),
(278, 'ia_archiver', 'Alexa (IA Archiver)'),
(279, 'internetseer', 'InternetSeer'),
(280, 'jennybot', 'JennyBot'),
Hasta pronto