diff --git a/NetCrawlerDetect/NetCrawlerDetect.Tests/NetCrawlerDetect.Tests.csproj b/NetCrawlerDetect/NetCrawlerDetect.Tests/NetCrawlerDetect.Tests.csproj
index 58f4e6e..ab31da3 100644
--- a/NetCrawlerDetect/NetCrawlerDetect.Tests/NetCrawlerDetect.Tests.csproj
+++ b/NetCrawlerDetect/NetCrawlerDetect.Tests/NetCrawlerDetect.Tests.csproj
@@ -4,7 +4,7 @@
netcoreapp2.0
false
- 0.1.9
+ 0.2.0
diff --git a/NetCrawlerDetect/NetCrawlerDetect.Tests/crawlers.txt b/NetCrawlerDetect/NetCrawlerDetect.Tests/crawlers.txt
index 113b75f..4670322 100644
--- a/NetCrawlerDetect/NetCrawlerDetect.Tests/crawlers.txt
+++ b/NetCrawlerDetect/NetCrawlerDetect.Tests/crawlers.txt
@@ -3410,4 +3410,37 @@ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0 ; BacklinkHttpStatus)
SilverReader/1.0; http://silverreader.com
ExtractorPro
WebsiteExtractor
-Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1 google_partner_monitoring FWSzVTDDBz14547302713138T
\ No newline at end of file
+Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1 google_partner_monitoring FWSzVTDDBz14547302713138T
+Outlook-iOS/696.1188109.prod.iphone (2.102.0)
+Polymail/1.47 (ctrlla.Polymail; build:64; OS X 10.12.6) Alamofire/4.5.1
+nyawc/1.8.1 CPython/3.6.5 Linux/3.10.0-862.9.1.el7.x86_64
+commonscan.org cralwer v1.01
+AdminLabs
+WebGazer/1.0 (+https://www.webgazer.io)
+HappyApps-WebCheck/1.0
+Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6; +feeder.co) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36
+Mozilla/5.0 (compatible; TestURI; +http://testuri.org/)
+WebSniffer/1.0 (+http://websniffer.cc/)
+Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 SeoSiteCheckup (https://seositecheckup.co
+Adstxtaggregator.com/1.0
+Discourse Forum Onebox v2.0.0.beta10
+faviconarchive.org
+GreatNews/1.0
+MemGator:1.0-rc2 <@WebSciDL>
+URLTester/1 CFNetwork/974.1 Darwin/18.0.0 (x86_64)
+URLTester/1 CFNetwork/975.0.3 Darwin/18.2.0 (x86_64)
+Urlcheckr/2.0
+SimpleChecker
+reqwest/0.9.9
+ReactorNetty/0.7.10.RELEASE
+Mozilla/5.0 (compatible; woorankreview/2.0; +https://www.woorank.com/)
+https://www.nominet.uk/privacy-notice
+ UnChaos From Chaos To Order Hybrid Web Search Engine.(vadim_gonchar@unchaos.com)
+Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/69.0.3464.0 Safari/537.36 Chrome-Lighthouse
+Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5 Build/MRA58N) AppleWebKit/537.36(KHTML, like Gecko) Chrome/61.0.3116.0 Mobile Safari/537.36 Chrome-Lighthouse
+Trackuity (+http://trackuity.com)
+deeris/1.0 (+http://www.deeris.com)
+Mediametric
+TextRazor Downloader (https://www.textrazor.com)
+ddline.cn rank history
+Web spyder
\ No newline at end of file
diff --git a/NetCrawlerDetect/NetCrawlerDetect.Tests/devices.txt b/NetCrawlerDetect/NetCrawlerDetect.Tests/devices.txt
index c82550b..0fae60b 100644
--- a/NetCrawlerDetect/NetCrawlerDetect.Tests/devices.txt
+++ b/NetCrawlerDetect/NetCrawlerDetect.Tests/devices.txt
@@ -30260,7 +30260,6 @@ Mozilla/5.0 (Linux; Android 4.4.4; GT-I9300 Build/KTU84Q) AppleWebKit/537.36 (KH
Mozilla/5.0 (Linux; Android 4.4.4; G7-L01 Build/HuaweiG7-L01) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.89 Mobile Safari/537.36
Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Mobile/14C92 (5298492416)
Mozilla/5.0 (Linux; Android 6.0.1; Nexus 6 Build/MMB29V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.91 Mobile Safari/537.36
-Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36 Mediametric
Mozilla/5.0 (Linux; U; Android 5.1; en-US; SLIDE2 Build/LMY47I) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/10.9.0.731 U3/0.8.0 Mobile Safari/534.30
Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; Sony Tablet S Build/TISU0143) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30
Opera/9.80 (SpreadTrum; Opera Mini/4.4.31492/37.8678; U; en) Presto/2.12.423 Version/12.16
@@ -122332,7 +122331,6 @@ Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_2 like Mac OS X) AppleWebKit/604.3.5 (KH
Mozilla/5.0 (iPhone; CPU iPhone OS 11_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Mobile/15B93 (4347460352)
Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Mobile/15A402 (4310753168)
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_2 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C202 (4695841712)
-Polymail/1.47 (ctrlla.Polymail; build:64; OS X 10.12.6) Alamofire/4.5.1
Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 (4300266224)
Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_3 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Mobile/15A432 (4761665200)
Mozilla/5.0 (Linux; Android 6.0.1; SM-G900V Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.98 Mobile Safari/537.36
@@ -156257,7 +156255,6 @@ Mozilla/5.0 (X11; CrOS x86_64 10032.86.0) AppleWebKit/537.36 (KHTML, like Gecko)
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Mobile/15D100 [FBAN/FBIOS;FBAV/170.1.0.80.91;FBBV/106613464;FBDV/iPhone10,3;FBMD/iPhone;FBSN/iOS;FBSV/11.2.6;FBSS/3;FBCR/giffgaff;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/107080238]
Mozilla/5.0 (Linux; Android 7.1.2; Swift 2 X Build/N2G47Z) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.109 Mobile Safari/537.36
Mozilla/5.0 (Linux; Android 6.0; XT1700 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.109 Mobile Safari/537.36
-Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5 Build/MRA58N) AppleWebKit/537.36(KHTML, like Gecko) Chrome/61.0.3116.0 Mobile Safari/537.36 Chrome-Lighthouse
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3278.0 Safari/537.36
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C153 [FBAN/FBIOS;FBAV/164.0.0.56.96;FBBV/98434650;FBDV/iPhone9,3;FBMD/iPhone;FBSN/iOS;FBSV/11.2.1;FBSS/2;FBCR/O2;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/100018292]
Mozilla/5.0 (X11; Linux i686) AppleWebKit/538.1 (KHTML, like Gecko) QupZilla/1.8.6 Safari/538.1
@@ -161339,7 +161336,6 @@ Mozilla/5.0 (Linux; Android 5.0.2; A0001 Build/LRX22G) AppleWebKit/537.36 (KHTML
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Mobile/15D100 [FBAN/FBIOS;FBAV/162.0.0.47.94;FBBV/95649710;FBDV/iPhone8,4;FBMD/iPhone;FBSN/iOS;FBSV/11.2.6;FBSS/2;FBCR/Virgin;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/97201607]
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Mobile/15D100 [FBAN/FBIOS;FBAV/164.0.0.56.96;FBBV/98434650;FBDV/iPhone8,4;FBMD/iPhone;FBSN/iOS;FBSV/11.2.6;FBSS/2;FBCR/giffgaff;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/100018292]
Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.9.1b3) Gecko/20090305 Firefox/3.1b3
-Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/61.0.3116.0 Safari/537.36 Chrome-Lighthouse
Mozilla/5.0 (Linux; Android 7.1.1; ONEPLUS A3003 Build/NMF26F; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/65.0.3325.109 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/164.0.0.44.95;]
Mozilla/5.0 (Linux; Android 5.1.1; SM-G361F Build/LMY48B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36
Mozilla/5.0 (Linux; Android 8.0.0; F8331 Build/41.3.A.0.401; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/64.0.3282.137 Mobile Safari/537.36
@@ -165981,3 +165977,5 @@ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/604.4.7 (KHTML, like
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36
Mozilla/5.0 (Linux; Android 8.1.0; Pixel Build/OPM4.171019.016.B1; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/66.0.3359.158 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/171.0.0.49.92;]
Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E302 [FBAN/FBIOS;FBAV/171.0.0.49.95;FBBV/107251038;FBDV/iPhone7,2;FBMD/iPhone;FBSN/iOS;FBSV/11.3.1;FBSS/2;FBCR/EE;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/108659124]
+Mozilla/5.0 (Linux; Android 5.1; CRONO 54 Build/LMY47I) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.111 Mobile Safari/537.36
+Mozilla/5.0 (Linux; Android 4.2.1; MAJESTIC Zeus21 Build/JOP40D) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.72 Mobile Safari/537.36
\ No newline at end of file
diff --git a/NetCrawlerDetect/NetCrawlerDetect.sln b/NetCrawlerDetect/NetCrawlerDetect.sln
index ca3966d..791d203 100644
--- a/NetCrawlerDetect/NetCrawlerDetect.sln
+++ b/NetCrawlerDetect/NetCrawlerDetect.sln
@@ -22,8 +22,11 @@ Global
EndGlobalSection
GlobalSection(MonoDevelopProperties) = preSolution
description = A .net standard port of JayBizzle's CrawlerDetect project (https://github.com/JayBizzle/Crawler-Detect).
- version = 0.1.9
+ version = 0.2.0
Policies = $0
$0.VersionControlPolicy = $1
+ $0.DotNetNamingPolicy = $2
+ $2.DirectoryNamespaceAssociation = PrefixedHierarchical
+ $0.StandardHeader = $3
EndGlobalSection
EndGlobal
diff --git a/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Crawlers.cs b/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Crawlers.cs
index dd10c59..3ae017b 100644
--- a/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Crawlers.cs
+++ b/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Crawlers.cs
@@ -15,7 +15,7 @@ public Crawlers()
// Collection of user-agent regex snippets
_data = new List()
{
- @"outbrain",
+ @".*Java.*outbrain",
@" YLT",
@"^b0t$",
@"^bluefish ",
@@ -64,8 +64,10 @@ public Crawlers()
@"adbeat",
@"AddThis",
@"ADmantX",
+ @"AdminLabs",
@"adressendeutschland",
@"adscanner",
+ @"Adstxtaggregator",
@"agentslug",
@"AHC",
@"aihit",
@@ -214,6 +216,7 @@ public Crawlers()
@"colly -",
@"CommaFeed",
@"Commons-HttpClient",
+ @"commonscan",
@"contactbigdatafr",
@"contentkingapp",
@"convera",
@@ -242,6 +245,8 @@ public Crawlers()
@"dataprovider",
@"DataXu",
@"Daum(oa)?[ \/][0-9]",
+ @"ddline",
+ @"deeris",
@"Demon",
@"DeuSu",
@"developers\.google\.com\/\+\/web\/snippet\/",
@@ -250,6 +255,7 @@ public Crawlers()
@"Digincore",
@"DigitalPebble",
@"Dirbuster",
+ @"Discourse Forum Onebox",
@"Disqus\/",
@"Dispatch\/",
@"DittoSpyder",
@@ -314,6 +320,7 @@ public Crawlers()
@"Faveeo",
@"Favicon downloader",
@"faviconkit",
+ @"faviconarchive",
@"FavOrg",
@"Feed Wrangler",
@"Feedable\/",
@@ -322,6 +329,7 @@ public Crawlers()
@"FeedBucket",
@"FeedBunch\/",
@"FeedBurner",
+ @"feeder",
@"Feedly",
@"FeedshowOnline",
@"Feedspot",
@@ -346,7 +354,7 @@ public Crawlers()
@"forensiq",
@"FoundSeoTool",
- // 'Francis [Bot]'
+ //'Francis [Bot]'
@"http:\/\/www.neomo.de\/",
@"free thumbnails",
@"Freeuploader",
@@ -416,6 +424,7 @@ public Crawlers()
@"Grafula",
@"Grammarly",
@"GrapeFX",
+ @"GreatNews",
@"Gregarius",
@"GRequests",
@"grokkit",
@@ -430,6 +439,7 @@ public Crawlers()
@"Haansoft",
@"hackney\/",
@"Hadi Agent",
+ @"HappyApps-WebCheck",
@"Hatena",
@"Havij",
@"HeadlessChrome",
@@ -476,7 +486,6 @@ public Crawlers()
@"huaweisymantec",
@"HubSpot ",
@"Humanlinks",
- @"HyperZbozi\.cz Feeder",
@"i2kconnect\/",
@"Iblog",
@"ichiro",
@@ -577,6 +586,7 @@ public Crawlers()
@"Licorne",
@"Liferea\/",
@"Lightspeedsystems",
+ @"Lighthouse",
@"Likse",
@"Link Valet",
@"link_thumbnailer",
@@ -620,11 +630,13 @@ public Crawlers()
@"Mass Downloader",
@"masscan\/",
@"Mata Hari",
+ @"Mediametric",
@"Mediapartners-Google",
@"mediawords",
@"MegaIndex\.ru",
@"MeltwaterNews",
@"Melvil Rawi",
+ @"MemGator",
@"Metaspinner",
@"MetaURI",
@"MFC_Tear_Sample",
@@ -708,6 +720,7 @@ public Crawlers()
@"Nodemeter",
@"NodePing",
@"nominet\.org\.uk",
+ @"nominet\.uk",
@"Norton-Safeweb",
@"Notifixious",
@"notifyninja",
@@ -715,6 +728,7 @@ public Crawlers()
@"nutch",
@"Nuzzel",
@"nWormFeedFinder",
+ @"nyawc\/",
@"Nymesis",
@"NYU",
@"Ocelli\/",
@@ -737,6 +751,7 @@ public Crawlers()
@"OrgProbe\/",
@"orion-semantics",
@"Outlook-Express",
+ @"Outlook-iOS",
@"ow\.ly",
@"Owler",
@"ownCloud News",
@@ -789,6 +804,7 @@ public Crawlers()
@"PocketParser",
@"Pockey",
@"POE-Component-Client-HTTP",
+ @"Polymail\/",
@"Pompos",
@"Porkbun",
@"Port Monitor",
@@ -830,6 +846,7 @@ public Crawlers()
@"RankFlex",
@"RankSonicSiteAuditor",
@"Re-re Studio",
+ @"ReactorNetty",
@"Readability",
@"RealDownload",
@"RealPlayer%20Downloader",
@@ -841,6 +858,7 @@ public Crawlers()
@"ReGet",
@"RepoMonkey",
@"request\.js",
+ @"reqwest\/",
@"ResponseCodeTest",
@"RestSharp",
@"Riddler",
@@ -894,6 +912,7 @@ public Crawlers()
@"SEOprofiler",
@"SEOsearch",
@"seoscanners",
+ @"seositecheckup",
@"SEOstats",
@"servernfo",
@"sexsearcher",
@@ -1001,7 +1020,9 @@ public Crawlers()
@"teoma",
@"terrainformatica",
@"Test Certificate Info",
+ @"testuri",
@"Tetrahedron",
+ @"TextRazor Downloader",
@"The Drop Reaper",
@"The Expert HTML Source Viewer",
@"The Knowledge AI",
@@ -1020,6 +1041,7 @@ public Crawlers()
@"touche\.com",
@"Traackr\.com",
@"tracemyfile",
+ @"Trackuity",
@"TrapitAgent",
@"Trendiction",
@"Trendsmap",
@@ -1041,6 +1063,7 @@ public Crawlers()
@"ubermetrics-technologies",
@"uclassify",
@"UdmSearch",
+ @"unchaos",
@"unirest-java",
@"UniversalFeedParser",
@"Unshorten\.It",
@@ -1050,10 +1073,12 @@ public Crawlers()
@"updown\.io daemon",
@"Upflow",
@"Uptimia",
+ @"Urlcheckr",
@"URL Verifier",
@"URLitor",
@"urlresolver",
@"Urlstat",
+ @"URLTester",
@"UrlTrends Ranking Updater",
@"URLy Warning",
@"URLy\.Warning",
@@ -1096,6 +1121,7 @@ public Crawlers()
@"Web Fuck",
@"Web Pix",
@"Web Sauger",
+ @"Web spyder",
@"Web Sucker",
@"Webalta",
@"Webauskunft",
@@ -1111,6 +1137,7 @@ public Crawlers()
@"WebEnhancer",
@"WebFetch",
@"WebFuck",
+ @"WebGazer",
@"WebGo IS",
@"WebImageCollector",
@"WebImages",
@@ -1129,6 +1156,7 @@ public Crawlers()
@"websitepulse agent",
@"WebsiteQuester",
@"Websnapr",
+ @"WebSniffer",
@"Webster",
@"WebStripper",
@"WebSucker",
@@ -1154,6 +1182,7 @@ public Crawlers()
@"wkhtmlto",
@"wmtips",
@"Woko",
+ @"woorankreview",
@"Word\/",
@"WordPress\/",
@"WordupinfoSearch",
@@ -1203,7 +1232,7 @@ public Crawlers()
@"Zend_Http_Client",
@"Zend\\\\Http\\\\Client",
@"Zermelo",
- @"Zeus",
+ @"Zeus ",
@"zgrab",
@"ZnajdzFoto",
@"Zombie\.js",
diff --git a/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Exclusions.cs b/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Exclusions.cs
index 344ac4c..6ab8248 100644
--- a/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Exclusions.cs
+++ b/NetCrawlerDetect/NetCrawlerDetect/Fixtures/Exclusions.cs
@@ -58,6 +58,7 @@ public Exclusions()
@" \.NET[\d\.]*",
@"cubot",
@"; M bot",
+ @"; CRONO",
@"; B bot",
@"; IDbot",
@"; ID bot",
diff --git a/NetCrawlerDetect/NetCrawlerDetect/NetCrawlerDetect.csproj b/NetCrawlerDetect/NetCrawlerDetect/NetCrawlerDetect.csproj
index bf0be32..3307dd6 100644
--- a/NetCrawlerDetect/NetCrawlerDetect/NetCrawlerDetect.csproj
+++ b/NetCrawlerDetect/NetCrawlerDetect/NetCrawlerDetect.csproj
@@ -2,8 +2,8 @@
netstandard2.0
- 0.1.9
- 0.1.9
+ 0.2.0
+ 0.2.0
Graham "Gee" Plumb
https://github.com/gplumb/NetCrawlerDetect/blob/master/LICENSE
Graham "Gee" Plumb
@@ -12,7 +12,7 @@
A .net standard port of JayBizzle's CrawlerDetect project (https://github.com/JayBizzle/Crawler-Detect).
true
NetCrawlerDetect
- Performance and general clean up (https://github.com/JayBizzle/Crawler-Detect/pull/312)
+ Bot additions and removal of false positives (up to https://github.com/JayBizzle/Crawler-Detect/pull/325)
A .net standard port of JayBizzle's CrawlerDetect project (https://github.com/JayBizzle/Crawler-Detect).