From 764e17785b647211b8398bb76f70665e10dc2729 Mon Sep 17 00:00:00 2001 From: Graham Plumb Date: Sat, 12 Jan 2019 12:33:43 +0000 Subject: [PATCH] Performance and general clean up (https://github.com/JayBizzle/Crawler-Detect/pull/312) --- .../NetCrawlerDetect.Tests.csproj | 2 +- .../NetCrawlerDetect.Tests/UnitTest1.cs | 2 +- .../NetCrawlerDetect.Tests/crawlers.txt | 6 +- NetCrawlerDetect/NetCrawlerDetect.sln | 2 +- .../NetCrawlerDetect/Fixtures/Crawlers.cs | 277 ++++++++---------- .../NetCrawlerDetect/NetCrawlerDetect.csproj | 6 +- 6 files changed, 127 insertions(+), 168 deletions(-) diff --git a/NetCrawlerDetect/NetCrawlerDetect.Tests/NetCrawlerDetect.Tests.csproj b/NetCrawlerDetect/NetCrawlerDetect.Tests/NetCrawlerDetect.Tests.csproj index 47d49de..58f4e6e 100644 --- a/NetCrawlerDetect/NetCrawlerDetect.Tests/NetCrawlerDetect.Tests.csproj +++ b/NetCrawlerDetect/NetCrawlerDetect.Tests/NetCrawlerDetect.Tests.csproj @@ -4,7 +4,7 @@ netcoreapp2.0 false - 0.1.8 + 0.1.9 diff --git a/NetCrawlerDetect/NetCrawlerDetect.Tests/UnitTest1.cs b/NetCrawlerDetect/NetCrawlerDetect.Tests/UnitTest1.cs index dcfdcff..81191a6 100644 --- a/NetCrawlerDetect/NetCrawlerDetect.Tests/UnitTest1.cs +++ b/NetCrawlerDetect/NetCrawlerDetect.Tests/UnitTest1.cs @@ -66,7 +66,7 @@ public void ReturnCorrectlyMatchedBotName() { var result = _detector.IsCrawler("Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)"); Assert.True(result, "Yahoo Ad monitoring IS a bot!"); - Assert.Equal("Yahoo Ad monitoring", _detector.Matches[0].Value); + Assert.Equal("monitoring", _detector.Matches[0].Value); } diff --git a/NetCrawlerDetect/NetCrawlerDetect.Tests/crawlers.txt b/NetCrawlerDetect/NetCrawlerDetect.Tests/crawlers.txt index 799c311..113b75f 100644 --- a/NetCrawlerDetect/NetCrawlerDetect.Tests/crawlers.txt +++ b/NetCrawlerDetect/NetCrawlerDetect.Tests/crawlers.txt @@ -3406,4 +3406,8 @@ Canary%20Mail/397 CFNetwork/893.13.1 Darwin/17.4.0 (x86_64) Sendsay.Ru/1.0; https://Sendsay.Ru/; ask@sendsay.ru Mozilla/5.0 (Zoom.Mac 10.8.5 x86) Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 TryJsoup/1.0 (+http://try.jsoup.org/) -Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0 ; BacklinkHttpStatus) \ No newline at end of file +Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0 ; BacklinkHttpStatus) +SilverReader/1.0; http://silverreader.com +ExtractorPro +WebsiteExtractor +Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1 google_partner_monitoring FWSzVTDDBz14547302713138T \ No newline at end of file diff --git a/NetCrawlerDetect/NetCrawlerDetect.sln b/NetCrawlerDetect/NetCrawlerDetect.sln index 3e09af5..ca3966d 100644 --- a/NetCrawlerDetect/NetCrawlerDetect.sln +++ b/NetCrawlerDetect/NetCrawlerDetect.sln @@ -22,7 +22,7 @@ Global EndGlobalSection GlobalSection(MonoDevelopProperties) = preSolution description = A .net standard port of JayBizzle's CrawlerDetect project (https://github.com/JayBizzle/Crawler-Detect). - version = 0.1.8 + version = 0.1.9 Policies = $0 $0.VersionControlPolicy = $1 EndGlobalSection diff --git a/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Crawlers.cs b/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Crawlers.cs index f1728f0..dd10c59 100644 --- a/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Crawlers.cs +++ b/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Crawlers.cs @@ -15,7 +15,7 @@ public Crawlers() // Collection of user-agent regex snippets _data = new List() { - @".*Java.*outbrain", + @"outbrain", @" YLT", @"^b0t$", @"^bluefish ", @@ -47,7 +47,6 @@ public Crawlers() @"13TABS", @"192\.comAgent", @"2ip\.ru", - @"404checker", @"404enemy", @"7Siters", @"80legs", @@ -66,41 +65,39 @@ public Crawlers() @"AddThis", @"ADmantX", @"adressendeutschland", - @"adscanner\/", - @"Advanced Email Extractor v", + @"adscanner", @"agentslug", @"AHC", @"aihit", @"aiohttp\/", @"Airmail", - @"Akamai_Site_Analyzer", @"akka-http\/", @"akula\/", @"alertra", @"alexa site audit", @"Alibaba\.Security\.Heimdall", @"Alligator", - @"allloadin\.com", + @"allloadin", @"AllSubmitter", @"alyze\.info", @"amagit", @"Anarchie", @"AndroidDownloadManager", @"Anemone", - @"AngleSharp\/", + @"AngleSharp", @"annotate_google", @"Ant\.com", @"Anturis Agent", @"AnyEvent-HTTP\/", @"Apache Droid", @"Apache OpenOffice", - @"Apache-HttpAsyncClient\/", - @"Apache-HttpClient\/", - @"ApacheBench\/", + @"Apache-HttpAsyncClient", + @"Apache-HttpClient", + @"ApacheBench", @"Apexoo", @"APIs-Google", - @"AportWorm\/[0-9]", - @"AppBeat\/[0-9]", + @"AportWorm\/", + @"AppBeat\/", @"AppEngine-Google", @"AppStoreScraperZ", @"Aprc\/[0-9]", @@ -109,7 +106,7 @@ public Crawlers() @"Arachnophilia", @"aria2", @"Arukereso", - @"asafaweb\.com", + @"asafaweb", @"AskQuickly", @"Ask Jeeves", @"ASPSeek", @@ -132,15 +129,14 @@ public Crawlers() @"Bandit", @"basicstate", @"BatchFTP", - @"Battleztar\ Bazinga", - @"baypup\/[0-9]", - @"baypup\/colbert", + @"Battleztar Bazinga", + @"baypup\/", @"BazQux", @"BBBike", @"BCKLINKS", @"BDFetch", - @"BegunAdvertising\/", - @"Bidtellect\/", + @"BegunAdvertising", + @"Bidtellect", @"BigBozz", @"Bigfoot", @"biglotron", @@ -150,24 +146,23 @@ public Crawlers() @"biNu image cacher", @"Bitacle", @"biz_Directory", - @"Black\ Hole", + @"Black Hole", @"Blackboard Safeassign", @"BlackWidow", @"BlockNote\.Net", - @"Bloglines\/", + @"Bloglines", @"Bloglovin", @"BlogPulseLive", @"BlogSearch", @"Blogtrottr", @"BlowFish", - @"Boardreader", @"boitho\.com-dc", @"BPImageWalker", @"Braintree-Webhooks", @"Branch Metrics API", @"Branch-Passthrough", @"Brandprotect", - @"BrandVerity\/[0-9]", + @"BrandVerity", @"Brandwatch", @"Brodie\/", @"Browsershots", @@ -184,7 +179,6 @@ public Crawlers() @"CakePHP", @"Calculon", @"Canary%20Mail", - @"CapsuleChecker", @"CaretNail", @"catexplorador", @"CC Metadata Scaper", @@ -194,37 +188,36 @@ public Crawlers() @"CERT\.at-Statistics-Survey", @"cg-eye", @"changedetection", - @"ChangesMeter\/", + @"ChangesMeter", @"Charlotte", @"CheckHost", @"checkprivacy", @"CherryPicker", @"ChinaClaw", - @"Chirp\/[0-9]", + @"Chirp\/", @"chkme\.com", @"Chlooe", @"Chromaxa", - @"CirrusExplorer\/", + @"CirrusExplorer", @"CISPA Vulnerability Notification", @"Citoid", @"CJNetworkQuality", @"Clarsentia", @"clips\.ua\.ac\.be", - @"Cloud\ mapping", + @"Cloud mapping", @"CloudEndure", @"CloudFlare-AlwaysOnline", - @"Cloudinary\/[0-9]", + @"Cloudinary", @"cmcm\.com", @"coccoc", @"cognitiveseo", @"colly -", @"CommaFeed", @"Commons-HttpClient", - @"Comodo SSL Checker", @"contactbigdatafr", @"contentkingapp", @"convera", - @"CookieReports\.com", + @"CookieReports", @"copyright sheriff", @"CopyRightCheck", @"Copyscape", @@ -237,7 +230,7 @@ public Crawlers() @"curb", @"Curious George", @"curl", - @"cuwhois\/[0-9]", + @"cuwhois\/", @"cybo\.com", @"DAP\/NetHTTP", @"DareBoost", @@ -249,7 +242,6 @@ public Crawlers() @"dataprovider", @"DataXu", @"Daum(oa)?[ \/][0-9]", - @"DemandbasePublisherAnalyzer\/", @"Demon", @"DeuSu", @"developers\.google\.com\/\+\/web\/snippet\/", @@ -263,23 +255,21 @@ public Crawlers() @"DittoSpyder", @"dlvr", @"DMBrowser", - @"DNS-Tools Header-Analyzer", @"DNSPod-reporting", @"docoloc", - @"Dolphin http client\/", + @"Dolphin http client", @"DomainAppender", @"Donuts Content Explorer", @"dotMailer content retrieval", @"dotSemantic", @"downforeveryoneorjustme", - @"Download\ Wonder", - @"downnotifier\.com", + @"Download Wonder", + @"downnotifier", @"DowntimeDetector", - @"Dragonfly File Reader", @"Drip", @"drupact", @"Drupal \(\+http:\/\/drupal\.org\/\)", - @"DTS\ Agent", + @"DTS Agent", @"dubaiindex", @"EARTHCOM", @"Easy-Thumb", @@ -295,8 +285,7 @@ public Crawlers() @"ElectricMonk", @"elefent", @"EMail Exractor", - @"EMail\ Wolf", - @"Email%20Extractor", + @"EMail Wolf", @"EmailWolf", @"Embarcadero", @"Embed PHP Library", @@ -314,8 +303,7 @@ public Crawlers() @"exif", @"Exploratodo", @"Express WebPictures", - @"ExtractorPro", - @"Extreme\ Picture\ Finder", + @"Extreme Picture Finder", @"EyeNetIE", @"ezooms", @"facebookexternalhit", @@ -332,14 +320,12 @@ public Crawlers() @"Feedbin", @"FeedBooster", @"FeedBucket", - @"FeedBunch\/[0-9]", + @"FeedBunch\/", @"FeedBurner", - @"FeedChecker", @"Feedly", - @"Feedreader", @"FeedshowOnline", @"Feedspot", - @"Feedwind\/[0-9]", + @"Feedwind\/", @"FeedZcollector", @"feeltiptop", @"Fetch API", @@ -358,35 +344,32 @@ public Crawlers() @"Flunky", @"flynxapp", @"forensiq", - @"FoundSeoTool\/[0-9]", + @"FoundSeoTool", // 'Francis [Bot]' @"http:\/\/www.neomo.de\/", @"free thumbnails", @"Freeuploader", - @"FreeWebMonitoring SiteChecker", @"Funnelback", @"G-i-g-a-b-o-t", @"g00g1e\.net", - @"GAChecker", - @"ganarvisitas\/[0-9]", + @"ganarvisitas", @"geek-tools", - @"Genderanalyzer", @"Genieo", @"GentleSource", @"GetCode", @"Getintent", @"GetLinkInfo", - @"getprismatic\.com", + @"getprismatic", @"GetRight", @"getroot", - @"GetURLInfo\/[0-9]", + @"GetURLInfo\/", @"GetWeb", @"Ghost Inspector", @"GigablastOpenSource", @"GIS-LABS", @"github-camo", - @"github\.com\/", + @"github\.com", @"Go [\d\.]* package http", @"Go http package", @"Go-Ahead-Got-It", @@ -396,7 +379,7 @@ public Crawlers() @"gofetch", @"GomezAgent", @"gooblog", - @"Goodzer\/[0-9]", + @"Goodzer\/", @"Google AppsViewer", @"Google Desktop", @"Google favicon", @@ -406,7 +389,6 @@ public Crawlers() @"Google PP Default", @"Google Search Console", @"Google Web Preview", - @"google_partner_monitoring", @"Google-Adwords", @"Google-Apps-Script", @"Google-Calendar-Importer", @@ -418,7 +400,6 @@ public Crawlers() @"Google-Structured-Data-Testing-Tool", @"Google-Youtube-Links", @"google-xrawler", - @"GoogleCloudMonitoring", @"GoogleDocs", @"GoogleHC\/", @"GoogleProducer", @@ -427,7 +408,6 @@ public Crawlers() @"Gookey", @"GoScraper", @"GoSpotCheck", - @"GoSquared-Status-Checker", @"gosquared-thumbnailer", @"Gotit", @"GoZilla", @@ -452,27 +432,26 @@ public Crawlers() @"Hadi Agent", @"Hatena", @"Havij", - @"hawkReader", @"HeadlessChrome", @"HEADMasterSEO", @"HeartRails_Capture", @"help@dataminr\.com", @"heritrix", - @"historious\/", + @"historious", @"hkedcity", - @"hledejLevne\.cz\/[0-9]", + @"hledejLevne\.cz", @"Hloader", @"HMView", @"Holmes", - @"HonesoSearchEngine\/", + @"HonesoSearchEngine", @"HootSuite Image proxy", - @"Hootsuite-WebFeed\/[0-9]", + @"Hootsuite-WebFeed", @"hosterstats", @"HostTracker", @"ht:\/\/check", @"htdig", @"HTMLparser", - @"htmlyse\.com", + @"htmlyse", @"HTTP Banner Detection", @"HTTP_Compression_Test", @"http_request2", @@ -504,12 +483,12 @@ public Crawlers() @"Id-search", @"IdeelaborPlagiaat", @"IDG Twitter Links Resolver", - @"IDwhois\/[0-9]", + @"IDwhois\/", @"Iframely", @"igdeSpyder", @"IlTrovatore", - @"Image\ Fetch", - @"Image\ Sucker", + @"Image Fetch", + @"Image Sucker", @"ImageEngine\/", @"ImageVisu\/", @"Imagga", @@ -518,12 +497,12 @@ public Crawlers() @"InAGist", @"inbound\.li parser", @"InDesign%20CC", - @"Indy\ Library", + @"Indy Library", @"InetURL", @"infegy", @"infohelfer", @"InfoTekies", - @"InfoWizards Reciprocal Link System PRO", + @"InfoWizards Reciprocal Link", @"inpwrd\.com", @"instabid", @"Instapaper", @@ -532,7 +511,7 @@ public Crawlers() @"Intelliseek", @"InterGET", @"internet_archive", - @"Internet\ Ninja", + @"Internet Ninja", @"InternetSeer", @"internetVista monitor", @"intraVnews", @@ -548,7 +527,7 @@ public Crawlers() @"isitup\.org", @"iskanie", @"isUp\.li", - @"iThemes Sync\/[0-9]", + @"iThemes Sync\/", @"iZSearch", @"JAHHO", @"janforman", @@ -570,12 +549,10 @@ public Crawlers() @"JS-Kit", @"JustView", @"Kaspersky Lab CFR link resolver", - @"KeepRight OpenStreetMap Checker", @"Kelny\/", @"Kerrigan\/", @"KeyCDN", - @"Keyword Extractor", - @"Keyword\ Density", + @"Keyword Density", @"Keywords Research", @"KickFire", @"KimonoLabs\/", @@ -589,7 +566,6 @@ public Crawlers() @"L\.webis", @"Larbin", @"Lavf\/", - @"LayeredExtractor", @"LeechFTP", @"LeechGet", @"letsencrypt", @@ -602,7 +578,6 @@ public Crawlers() @"Liferea\/", @"Lightspeedsystems", @"Likse", - @"link checker", @"Link Valet", @"link_thumbnailer", @"LinkAlarm\/", @@ -642,15 +617,14 @@ public Crawlers() @"marketinggrader", @"MarkMonitor", @"MarkWatch", - @"Mass\ Downloader", - @"masscan\/[0-9]", - @"Mata\ Hari", + @"Mass Downloader", + @"masscan\/", + @"Mata Hari", @"Mediapartners-Google", @"mediawords", @"MegaIndex\.ru", @"MeltwaterNews", - @"Melvil Rawi\/", - @"MergeFlow-PageReader", + @"Melvil Rawi", @"Metaspinner", @"MetaURI", @"MFC_Tear_Sample", @@ -659,13 +633,13 @@ public Crawlers() @"Microsoft Outlook", @"Microsoft Windows Network Diagnostics", @"Microsoft-WebDAV-MiniRedir", - @"Microsoft\ Data\ Access", - @"MIDown\ tool", + @"Microsoft Data Access", + @"MIDown tool", @"MIIxpc", @"Mindjet", - @"Miniature\.io\/", + @"Miniature\.io", @"Miniflux", - @"Mister\ PiX", + @"Mister PiX", @"mixdata dot com", @"mixed-content-scan", @"Mixmax-LinkPreview", @@ -676,16 +650,16 @@ public Crawlers() @"Mojolicious \(Perl\)", @"Monit\/", @"monitis", - @"Monitority\/[0-9]", + @"Monitority\/", @"montastic", @"MonTools", @"Moreover", - @"Morfeus\ Fucking\ Scanner", + @"Morfeus Fucking Scanner", @"Morning Paper", @"MovableType", @"mowser", @"Mrcgiguy", - @"MS\ Web\ Services\ Client\ Protocol", + @"MS Web Services Client Protocol", @"MSFrontPage", @"mShots", @"MuckRack\/", @@ -693,14 +667,14 @@ public Crawlers() @"MVAClient", @"MxToolbox\/", @"nagios", - @"Najdi\.si\/", - @"Name\ Intelligence", + @"Najdi\.si", + @"Name Intelligence", @"Nameprotect", @"Navroad", @"NearSite", @"Needle", @"Nessus", - @"Net\ Vampire", + @"Net Vampire", @"NetAnts", @"NETCRAFT", @"NetLyzer", @@ -725,11 +699,11 @@ public Crawlers() @"Nibbler", @"NICErsPRO", @"Nikto", - @"nineconnections\.com", + @"nineconnections", @"NLNZ_IAHarvester", @"Nmap Scripting Engine", @"node-superagent", - @"node-urllib\/", + @"node-urllib", @"node\.io", @"Nodemeter", @"NodePing", @@ -743,14 +717,13 @@ public Crawlers() @"nWormFeedFinder", @"Nymesis", @"NYU", - @"Ocelli\/[0-9]", + @"Ocelli\/", @"Octopus", @"oegp", @"Offline Explorer", - @"Offline\ Navigator", - @"og-scraper\/", + @"Offline Navigator", + @"og-scraper", @"okhttp", - @"Omea Reader", @"omgili", @"OMSC", @"Online Domain Tools", @@ -761,26 +734,24 @@ public Crawlers() @"OpenVAS", @"Optimizer", @"Orbiter", - @"OrgProbe\/[0-9]", + @"OrgProbe\/", @"orion-semantics", @"Outlook-Express", @"ow\.ly", @"Owler", @"ownCloud News", - @"OxfordCloudService\/[0-9]", - @"Page Analyzer", + @"OxfordCloudService", @"Page Valet", @"page_verifier", - @"page\ scorer", + @"page scorer", @"page2rss", - @"PageAnalyzer", @"PageGrabber", @"PagePeeker", @"PageScorer", - @"Pagespeed\/[0-9]", + @"Pagespeed\/", @"Panopta", @"panscient", - @"Papa\ Foto", + @"Papa Foto", @"parsijoo", @"Pavuk", @"PayPal IPN", @@ -801,7 +772,7 @@ public Crawlers() @"Picsearch", @"PictureFinder", @"Pimonster", - @"ping\.blo\.gs\/", + @"ping\.blo\.gs", @"Pingability", @"PingAdmin\.Ru", @"Pingdom", @@ -822,17 +793,16 @@ public Crawlers() @"Porkbun", @"Port Monitor", @"postano", - @"PostmanRuntime\/", + @"PostmanRuntime", @"PostPost", @"postrank", @"PowerPoint\/", @"Priceonomics Analysis Engine", - @"PrintFriendly\.com", - @"PritTorrent\/[0-9]", + @"PrintFriendly", + @"PritTorrent", @"Prlog", @"probethenet", @"Project 25499", - @"Promotion_Tools_www\.searchenginepromotionhelp\.com", @"prospectb2b", @"Protopage", @"ProWebWalker", @@ -850,8 +820,8 @@ public Crawlers() @"QQDownload", @"QrafterPro", @"Qseero", - @"Qualidator\.com SiteAnalyzer", - @"QueryN\ Metasearch", + @"Qualidator", + @"QueryN Metasearch", @"queuedriver", @"Quora Link Preview", @"Qwantify", @@ -867,22 +837,19 @@ public Crawlers() @"Recorder", @"RecurPost\/", @"redback\/", - @"Redirect Checker Tool", @"ReederForMac", @"ReGet", @"RepoMonkey", @"request\.js", - @"ResponseCodeTest\/[0-9]", + @"ResponseCodeTest", @"RestSharp", @"Riddler", @"Rival IQ", @"Robosourcer", - @"Robozilla\/[0-9]", + @"Robozilla", @"ROI Hunter", @"RPT-HTTPClient", - @"rss reader", @"RSSOwl", - @"RssReader\/", @"safe-agent-scanner", @"SalesIntelligent", @"Saleslift", @@ -900,11 +867,12 @@ public Crawlers() @"SimpleScraper", @"Scrapy", @"Screaming", - @"ScreenShotService\/[0-9]", + @"ScreenShotService", @"Scrubby", @"Scrutiny\/", @"search\.thunderstone", - @"Search37\/", + @"Search37", + @"searchenginepromotionhelp", @"Searchestate", @"SearchExpress", @"SearchSight", @@ -924,29 +892,25 @@ public Crawlers() @"SEOkicks", @"Seomoz", @"SEOprofiler", - @"SEOsearch\/", - @"SeopultContentAnalyzer", + @"SEOsearch", @"seoscanners", @"SEOstats", - @"Server Density Service Monitoring", - @"servernfo\.com", + @"servernfo", @"sexsearcher", @"Seznam", @"Shelob", @"Shodan", - @"Shoppimon Analyzer", - @"ShoppimonAgent\/[0-9]", + @"Shoppimon", @"ShopWiki", @"ShortLinkTranslate", @"shrinktheweb", @"Sideqik", - @"SilverReader", @"SimplePie", @"SimplyFast", @"Siphon", @"SISTRIX", @"Site-Shot\/", - @"Site\ Sucker", + @"Site Sucker", @"Site24x7", @"SiteBar", @"Sitebeam", @@ -1014,7 +978,6 @@ public Crawlers() @"suchen", @"Sucuri", @"summify", - @"Super Monitoring", @"SuperHTTP", @"Surphace Scout", @"Suzuran", @@ -1036,16 +999,15 @@ public Crawlers() @"Telesphorep", @"Tenon\.io", @"teoma", - @"terrainformatica\.com", + @"terrainformatica", @"Test Certificate Info", - @"Tetrahedron\/[0-9]", + @"Tetrahedron", @"The Drop Reaper", @"The Expert HTML Source Viewer", @"The Knowledge AI", - @"The\ Intraformant", + @"The Intraformant", @"theinternetrules", @"TheNomad", - @"theoldreader\.com", @"Thinklab", @"Thumbshots", @"ThumbSniper", @@ -1061,13 +1023,13 @@ public Crawlers() @"TrapitAgent", @"Trendiction", @"Trendsmap", - @"trendspottr\.com", + @"trendspottr", @"truwoGPS", @"TryJsoup", @"TulipChain", @"Turingos", @"Turnitin", - @"tweetedtimes\.com", + @"tweetedtimes", @"Tweetminster", @"Tweezler\/", @"twibble", @@ -1089,16 +1051,15 @@ public Crawlers() @"Upflow", @"Uptimia", @"URL Verifier", - @"URLChecker", - @"URLitor\.com", + @"URLitor", @"urlresolver", @"Urlstat", @"UrlTrends Ranking Updater", - @"URLy\ Warning", + @"URLy Warning", @"URLy\.Warning", @"Vacuum", @"Vagabondo", - @"VB\ Project", + @"VB Project", @"vBSEO", @"VCI", @"via ggpht\.com GoogleImageProxy", @@ -1110,12 +1071,11 @@ public Crawlers() @"Voil", @"voltron", @"voyager\/", - @"VSAgent\/[0-9]", - @"VSB-TUO\/[0-9]", + @"VSAgent\/", + @"VSB-TUO\/", @"Vulnbusters Meter", @"VYU2", @"w3af\.org", - @"W3C_I18n-Checker", @"W3C_Unicorn", @"W3C-checklink", @"W3C-mobileOK", @@ -1128,16 +1088,15 @@ public Crawlers() @"WbSrch\/", @"WDT\.io", @"web-capture\.net", - @"Web-Monitoring", @"Web-sniffer", - @"Web\ Auto", - @"Web\ Collage", - @"Web\ Enhancer", - @"Web\ Fetch", - @"Web\ Fuck", - @"Web\ Pix", - @"Web\ Sauger", - @"Web\ Sucker", + @"Web Auto", + @"Web Collage", + @"Web Enhancer", + @"Web Fetch", + @"Web Fuck", + @"Web Pix", + @"Web Sauger", + @"Web Sucker", @"Webalta", @"Webauskunft", @"WebAuto", @@ -1147,12 +1106,12 @@ public Crawlers() @"WebCookies", @"WebCopier", @"WebCorp", - @"WebDataStats\/[0-9]", + @"WebDataStats", @"WebDoc", @"WebEnhancer", @"WebFetch", @"WebFuck", - @"WebGo\ IS", + @"WebGo IS", @"WebImageCollector", @"WebImages", @"WebIndex", @@ -1166,17 +1125,14 @@ public Crawlers() @"webscreenie", @"Webshag", @"Webshot", - @"Website Analyzer\/", - @"Website\ Quester", - @"WebsiteExtractor", + @"Website Quester", @"websitepulse agent", - @"websitepulse[+ ]checker", @"WebsiteQuester", - @"Websnapr\/", + @"Websnapr", @"Webster", @"WebStripper", @"WebSucker", - @"Webthumb\/[0-9]", + @"Webthumb\/", @"WebThumbnail", @"WebWhacker", @"WebZIP", @@ -1218,13 +1174,12 @@ public Crawlers() @"x22Mozilla", @"XaxisSemanticsClassifier", @"Xenu Link Sleuth", - @"XING-contenttabreceiver\/[0-9]", + @"XING-contenttabreceiver", @"xpymep([0-9]?)\.exe", @"Y!J-(ASR|BSC)", @"Y\!J-BRW", @"Yaanb", @"yacy", - @"Yahoo Ad monitoring", @"Yahoo Link Preview", @"YahooCacheSystem", @"YahooYSMcm", @@ -1236,7 +1191,7 @@ public Crawlers() @"Yoleo Consumer", @"yoogliFetchAgent", @"YottaaMonitor", - @"Your-Website-Sucks\/[0-9]", + @"Your-Website-Sucks", @"yourls\.org", @"YoYs\.net", @"YP\.PL", @@ -1254,7 +1209,7 @@ public Crawlers() @"Zombie\.js", @"Zoom\.Mac", @"ZyBorg", - @"[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron)", + @"[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer)" }; } } diff --git a/NetCrawlerDetect/NetCrawlerDetect/NetCrawlerDetect.csproj b/NetCrawlerDetect/NetCrawlerDetect/NetCrawlerDetect.csproj index 19dae3b..bf0be32 100644 --- a/NetCrawlerDetect/NetCrawlerDetect/NetCrawlerDetect.csproj +++ b/NetCrawlerDetect/NetCrawlerDetect/NetCrawlerDetect.csproj @@ -2,8 +2,8 @@ netstandard2.0 - 0.1.8 - 0.1.8 + 0.1.9 + 0.1.9 Graham "Gee" Plumb https://github.com/gplumb/NetCrawlerDetect/blob/master/LICENSE Graham "Gee" Plumb @@ -12,7 +12,7 @@ A .net standard port of JayBizzle's CrawlerDetect project (https://github.com/JayBizzle/Crawler-Detect). true NetCrawlerDetect - Bumper update (over 50 new bots and 10 new user agents) + Performance and general clean up (https://github.com/JayBizzle/Crawler-Detect/pull/312) A .net standard port of JayBizzle's CrawlerDetect project (https://github.com/JayBizzle/Crawler-Detect).