From 898de338f2f6d59b590dcfb27eca46626797792c Mon Sep 17 00:00:00 2001 From: Nurahmadie Date: Sun, 15 Sep 2013 12:56:54 +0700 Subject: [PATCH 01/24] Ignore Gemfile.lock --- .gitignore | 1 + Gemfile.lock | 32 -------------------------------- 2 files changed, 1 insertion(+), 32 deletions(-) delete mode 100644 Gemfile.lock diff --git a/.gitignore b/.gitignore index 1377554..b782f12 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.swp +Gemfile.lock diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index 5673205..0000000 --- a/Gemfile.lock +++ /dev/null @@ -1,32 +0,0 @@ -PATH - remote: . - specs: - feedbag (0.9.1) - nokogiri - -GEM - remote: https://rubygems.org/ - specs: - activesupport (3.2.13) - i18n (= 0.6.1) - multi_json (~> 1.0) - i18n (0.6.1) - metaclass (0.0.1) - mocha (0.12.10) - metaclass (~> 0.0.1) - multi_json (1.7.2) - nokogiri (1.5.9) - shoulda (3.4.0) - shoulda-context (~> 1.0, >= 1.0.1) - shoulda-matchers (~> 1.0, >= 1.4.1) - shoulda-context (1.1.0) - shoulda-matchers (1.4.1) - activesupport (>= 3.0.0) - -PLATFORMS - ruby - -DEPENDENCIES - feedbag! - mocha (~> 0.12.0) - shoulda From c737c293cc2ff91566484a3bb3f72c9be653965e Mon Sep 17 00:00:00 2001 From: Nurahmadie Date: Sun, 15 Sep 2013 12:58:59 +0700 Subject: [PATCH 02/24] Allow http to https redirection, and add user-agent. Some site actually enable HSTS, support this via `open_uri_redirections` gem. Also add default user agent and introduce ENV["FEEDBAG_UA"] to override this. --- feedbag.gemspec | 13 +- lib/feedbag.rb | 314 ++++++++++++++++++++++++------------------------ 2 files changed, 167 insertions(+), 160 deletions(-) diff --git a/feedbag.gemspec b/feedbag.gemspec index cef181a..8ac92a1 100644 --- a/feedbag.gemspec +++ b/feedbag.gemspec @@ -1,11 +1,11 @@ # -*- encoding: utf-8 -*- - + Gem::Specification.new do |s| s.name = %q{feedbag} s.version = "0.9.1" s.homepage = "http://axiombox.com/feedbag" s.rubyforge_project = "feedbag" - + s.authors = ["Axiombox", "David Moreno", "Derek Willis"] s.date = %q{2012-03-16} s.description = %q{Ruby's favorite feed auto-discoverty tool} @@ -15,9 +15,12 @@ Gem::Specification.new do |s| s.has_rdoc = true s.rdoc_options = ["--main", "README.markdown"] s.summary = %q{Ruby's favorite feed auto-discovery tool} - s.add_dependency("nokogiri") - s.add_development_dependency "shoulda" - s.add_development_dependency "mocha", "~> 0.12.0" + + s.add_dependency("nokogiri") + s.add_dependency("open_uri_redirections") + + s.add_development_dependency "shoulda" + s.add_development_dependency "mocha", ">= 0.12.0" s.bindir = 'bin' s.default_executable = %q{feedbag} s.executables = ["feedbag"] diff --git a/lib/feedbag.rb b/lib/feedbag.rb index e44def9..74be521 100644 --- a/lib/feedbag.rb +++ b/lib/feedbag.rb @@ -24,18 +24,21 @@ require "rubygems" require "nokogiri" require "open-uri" +require "open_uri_redirections" require "net/http" +USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36" + class Feedbag - CONTENT_TYPES = [ - 'application/x.atom+xml', - 'application/atom+xml', - 'application/xml', - 'text/xml', - 'application/rss+xml', - 'application/rdf+xml', - ].freeze + CONTENT_TYPES = [ + 'application/x.atom+xml', + 'application/atom+xml', + 'application/xml', + 'text/xml', + 'application/rss+xml', + 'application/rdf+xml', + ].freeze def self.feed?(url) new.feed?(url) @@ -49,160 +52,161 @@ def initialize @feeds = [] end - def feed?(url) - # use LWR::Simple.normalize some time - url_uri = URI.parse(url) - url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" - url << "?#{url_uri.query}" if url_uri.query - - # hack: - url.sub!(/^feed:\/\//, 'http://') - - res = Feedbag.find(url) - if res.size == 1 and res.first == url - return true - else - return false - end - end - - def find(url, args = {}) - url_uri = URI.parse(url) - url = nil - if url_uri.scheme.nil? - url = "http://#{url_uri.to_s}" - elsif url_uri.scheme == "feed" - return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil) - else - url = url_uri.to_s - end - #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" - - # check if feed_valid is avail + def feed?(url) + # use LWR::Simple.normalize some time + url_uri = URI.parse(url) + url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" + url << "?#{url_uri.query}" if url_uri.query + + # hack: + url.sub!(/^feed:\/\//, 'http://') + + res = Feedbag.find(url) + if res.size == 1 and res.first == url + return true + else + return false + end + end + + def find(url, args = {}) + url_uri = URI.parse(url) + url = nil + if url_uri.scheme.nil? + url = "http://#{url_uri.to_s}" + elsif url_uri.scheme == "feed" + return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil) + else + url = url_uri.to_s + end + #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" + + # check if feed_valid is avail begin - require "feed_validator" - v = W3C::FeedValidator.new - v.validate_url(url) - return self.add_feed(url, nil) if v.valid? - rescue LoadError - # scoo - rescue REXML::ParseException - # usually indicates timeout - # TODO: actually find out timeout. use Terminator? - # $stderr.puts "Feed looked like feed but might not have passed validation or timed out" + require "feed_validator" + v = W3C::FeedValidator.new + v.validate_url(url) + return self.add_feed(url, nil) if v.valid? + rescue LoadError + # scoo + rescue REXML::ParseException + # usually indicates timeout + # TODO: actually find out timeout. use Terminator? + # $stderr.puts "Feed looked like feed but might not have passed validation or timed out" rescue => ex - $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}" - end - - begin - html = open(url) do |f| - content_type = f.content_type.downcase - if content_type == "application/octet-stream" # open failed - content_type = f.meta["content-type"].gsub(/;.*$/, '') - end - if CONTENT_TYPES.include?(content_type) - return self.add_feed(url, nil) - end - - doc = Nokogiri::HTML(f.read) - - if doc.at("base") and doc.at("base")["href"] - @base_uri = doc.at("base")["href"] - else - @base_uri = nil - end - - # first with links + $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}" + end + + begin + user_agent = ENV["FEEDBAG_UA"] || USER_AGENT + html = open(url, "User-Agent" => user_agent, :allow_redirections => :safe) do |f| + content_type = f.content_type.downcase + if content_type == "application/octet-stream" # open failed + content_type = f.meta["content-type"].gsub(/;.*$/, '') + end + if CONTENT_TYPES.include?(content_type) + return self.add_feed(url, nil) + end + + doc = Nokogiri::HTML(f.read) + + if doc.at("base") and doc.at("base")["href"] + @base_uri = doc.at("base")["href"] + else + @base_uri = nil + end + + # first with links (doc/"atom:link").each do |l| - next unless l["rel"] - if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self" - self.add_feed(l["href"], url, @base_uri) - end - end - - (doc/"link").each do |l| - next unless l["rel"] - if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed") - self.add_feed(l["href"], url, @base_uri) - end - end - - (doc/"a").each do |a| - next unless a["href"] - if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/) - self.add_feed(a["href"], url, @base_uri) - end - end - - (doc/"a").each do |a| - next unless a["href"] - if self.looks_like_feed?(a["href"]) - self.add_feed(a["href"], url, @base_uri) - end - end + next unless l["rel"] + if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self" + self.add_feed(l["href"], url, @base_uri) + end + end + + (doc/"link").each do |l| + next unless l["rel"] + if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed") + self.add_feed(l["href"], url, @base_uri) + end + end + + (doc/"a").each do |a| + next unless a["href"] + if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/) + self.add_feed(a["href"], url, @base_uri) + end + end + + (doc/"a").each do |a| + next unless a["href"] + if self.looks_like_feed?(a["href"]) + self.add_feed(a["href"], url, @base_uri) + end + end # Added support for feeds like http://tabtimes.com/tbfeed/mashable/full.xml if url.match(/.xml$/) and doc.root and doc.root["xml:base"] and doc.root["xml:base"].strip == url.strip - self.add_feed(url, nil) + self.add_feed(url, nil) end - end - rescue Timeout::Error => err - $stderr.puts "Timeout error ocurred with `#{url}: #{err}'" - rescue OpenURI::HTTPError => the_error - $stderr.puts "Error ocurred with `#{url}': #{the_error}" - rescue SocketError => err - $stderr.puts "Socket error ocurred with: `#{url}': #{err}" - rescue => ex - $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}" - ensure - return @feeds - end - - end - - def looks_like_feed?(url) - if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i - true - else - false - end - end - - def add_feed(feed_url, orig_url, base_uri = nil) - # puts "#{feed_url} - #{orig_url}" - url = feed_url.sub(/^feed:/, '').strip - - if base_uri - # url = base_uri + feed_url - url = URI.parse(base_uri).merge(feed_url).to_s - end - - begin - uri = URI.parse(url) - rescue - puts "Error with `#{url}'" - exit 1 - end - unless uri.absolute? - orig = URI.parse(orig_url) - url = orig.merge(url).to_s - end - - # verify url is really valid - @feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url) - end - - # not used. yet. - def _is_http_valid(uri, orig_url) - req = Net::HTTP.get_response(uri) - orig_uri = URI.parse(orig_url) - case req - when Net::HTTPSuccess then - return true - else - return false - end - end + end + rescue Timeout::Error => err + $stderr.puts "Timeout error ocurred with `#{url}: #{err}'" + rescue OpenURI::HTTPError => the_error + $stderr.puts "Error ocurred with `#{url}': #{the_error}" + rescue SocketError => err + $stderr.puts "Socket error ocurred with: `#{url}': #{err}" + rescue => ex + $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}" + ensure + return @feeds + end + + end + + def looks_like_feed?(url) + if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i + true + else + false + end + end + + def add_feed(feed_url, orig_url, base_uri = nil) + # $stderr.puts "#{feed_url} - #{orig_url}\n" + url = feed_url.sub(/^feed:/, '').strip + + if base_uri + # url = base_uri + feed_url + url = URI.parse(base_uri).merge(feed_url).to_s + end + + begin + uri = URI.parse(url) + rescue + puts "Error with `#{url}'" + exit 1 + end + unless uri.absolute? + orig = URI.parse(orig_url) + url = orig.merge(url).to_s + end + + # verify url is really valid + @feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url) + end + + # not used. yet. + def _is_http_valid(uri, orig_url) + req = Net::HTTP.get_response(uri) + orig_uri = URI.parse(orig_url) + case req + when Net::HTTPSuccess then + return true + else + return false + end + end end if __FILE__ == $0 From f4fac1fa9f56112e193ddd74833aff4589a2629c Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sat, 7 Dec 2013 18:13:34 +0100 Subject: [PATCH 03/24] Updated README info. --- README.markdown | 88 +++++++++---------------------------------------- 1 file changed, 16 insertions(+), 72 deletions(-) diff --git a/README.markdown b/README.markdown index 0f08d59..66a23b1 100644 --- a/README.markdown +++ b/README.markdown @@ -1,92 +1,40 @@ Feedbag ======= -> Do you want me to drag my sack across your face? -> - Glenn Quagmire -Feedbag is a feed auto-discovery Ruby library. You don't need to know more about it. It is said to be: - -> Ruby's favorite auto-discovery tool/library! +Feedbag is Ruby's favorite auto-discovery tool/library! ### Quick synopsis - >> require "rubygems" - => true >> require "feedbag" => true - >> Feedbag.find "log.damog.net" - => ["http://feeds.feedburner.com/TeoremaDelCerdoInfinito", "http://log.damog.net/comments/feed/"] - >> Feedbag.feed?("google.com") - => false - >> Feedbag.feed?("http://planet.debian.org/rss20.xml") - => true - -### Installation - - $ sudo gem install damog-feedbag -s http://gems.github.com/ - -Or just grab feedbag.rb and use it on your own project: - - $ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb - -## Tutorial - -So you want to know more about it. - -OK, if the URL passed to the find method is a feed itself, that only feed URL will be returned. - - >> Feedbag.find "github.com/damog.atom" - => ["http://github.com/damog.atom"] - >> - -Otherwise, it will always return LINK feeds first, A (anchor tags) feeds later. Between A feeds, the ones hosted on the same URL's host, will have larger priority: + >> Feedbag.find "damog.net/blog" + => ["http://damog.net/blog/index.rss", "http://damog.net/blog/tags/feed", "http://damog.net/blog/tags/rfeed"] + >> Feedbag.feed? "perl.org" + => false + >> Feedbag.feed?("http://jobs.perl.org/rss/standard.rss") + => true - >> Feedbag.find "http://ve.planetalinux.org" - => ["http://feedproxy.google.com/PlanetaLinuxVenezuela", "http://rendergraf.wordpress.com/feed/", "http://rootweiller.wordpress.com/feed/", "http://skatox.com/blog/feed/", "http://kodegeek.com/atom.xml", "http://blog.0x29.com.ve/?feed=rss2&cat=8"] - >> +You can also use an installed command line tool for quick queries, if you install the gem: -On your application you should only take the very first element of the array, most of the times: + $ feedbag http://rubygems.org/profiles/damog + == http://rubygems.org/profiles/damog: + - http://feeds.feedburner.com/gemcutter-latest - >> Feedbag.find("planet.debian.org").first(3) - => ["http://planet.debian.org/rss10.xml", "http://planet.debian.org/rss20.xml", "http://planet.debian.org/atom.xml"] - >> -(Try running that same example without the "first" method. That example's host is a blog aggregator, so it has hundreds of feed URLs:) +### Installation - >> Feedbag.find("planet.debian.org").size - => 104 - >> + $ sudo gem install feedbag -Feedbag will find them all, but it will return the most important ones on the first elements on the array returned. +Or just grab feedbag.rb and use it on your own project: - >> Feedbag.find("cnn.com") - => ["http://rss.cnn.com/rss/cnn_topstories.rss", "http://rss.cnn.com/rss/cnn_latest.rss", "http://rss.cnn.com/services/podcasting/robinmeade/rss.xml"] - >> + $ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb ### Why should you use it? -- Because it's cool. - Because it only uses [Hpricot](https://code.whytheluckystiff.net/hpricot/) as dependency. - Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc). - Because it's a single file you can embed easily in your application. -- Because it passes most of the Mark Pilgrim's [Atom auto-discovery test suite](http://diveintomark.org/tests/client/autodiscovery/). It doesn't pass them all because some of those tests are broken (citation needed). - -### Why did I build it? - -- Because I liked Benjamin Trott's [Feed::Find](http://search.cpan.org/~btrott/Feed-Find-0.06/lib/Feed/Find.pm). -- Because I thought it would be good to have Feed::Find's functionality in Ruby. -- Because I thought it was going to be easy to maintain. -- Because I was going to use it on [rFeed](http://github.com/damog/rfeed). -- And finally, because I didn't know [rfeedfinder](http://rfeedfinder.rubyforge.org/) existed :-) - -### Bugs - -Please, report bugs to [rt@support.axiombox.com](rt@support.axiombox.com) or directly to the author. - -### Contribute - -> git clone git://github.com/damog/feedbag.git - -...patch, build, hack and make pull requests. I'll be glad. +- Because it's faster than rfeedfinder. ### Author @@ -96,7 +44,3 @@ Please, report bugs to [rt@support.axiombox.com](rt@support.axiombox.com) or dir This is free software. See [COPYING](http://github.com/damog/feedbag/master/COPYING) for more information. -### Thanks - -[Raquel](http://maggit.net), for making [Axiombox](http://axiombox.com) and most of my dreams possible. Also, [GitHub](http://github.com) for making a nice code sharing service that doesn't suck. - From da3a92233f6fca451b564f31cea7e4fa6570c85a Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sat, 7 Dec 2013 18:13:44 +0100 Subject: [PATCH 04/24] up to date --- feedbag.gemspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/feedbag.gemspec b/feedbag.gemspec index f6f05f3..bf72f25 100644 --- a/feedbag.gemspec +++ b/feedbag.gemspec @@ -3,10 +3,10 @@ Gem::Specification.new do |s| s.name = %q{feedbag} s.version = "0.9.1" - s.homepage = "http://axiombox.com/feedbag" + s.homepage = "http://github.com/damog/feedbag" s.rubyforge_project = "feedbag" - s.authors = ["Axiombox", "David Moreno"] + s.authors = ["David Moreno"] s.date = %q{2012-03-16} s.description = %q{Ruby's favorite feed auto-discoverty tool} s.email = %q{david@axiombox.com} From 5e036e523c6de39534db9e70f8533acdb14acdf2 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sat, 7 Dec 2013 18:14:36 +0100 Subject: [PATCH 05/24] useless file --- TODO | 1 - 1 file changed, 1 deletion(-) delete mode 100644 TODO diff --git a/TODO b/TODO deleted file mode 100644 index f3fe1e2..0000000 --- a/TODO +++ /dev/null @@ -1 +0,0 @@ -- Document Feedbag.feed? From 5170f7a3761fd85c225d5c1340400bfb285a9994 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sat, 7 Dec 2013 18:16:19 +0100 Subject: [PATCH 06/24] another useless file --- index.html | 115 ----------------------------------------------------- 1 file changed, 115 deletions(-) delete mode 100644 index.html diff --git a/index.html b/index.html deleted file mode 100644 index 0b02147..0000000 --- a/index.html +++ /dev/null @@ -1,115 +0,0 @@ -

Feedbag

- -
-

Do you want me to drag my sack across your face? - - Glenn Quagmire

-
- -

Feedbag is a feed auto-discovery Ruby library. You don't need to know more about it. It is said to be:

- -
-

Ruby's favorite auto-discovery tool/library!

-
- -

Quick synopsis

- -
>> require "rubygems"
-=> true
->> require "feedbag"
-=> true
->> Feedbag.find "log.damog.net"
-=> ["http://feeds.feedburner.com/TeoremaDelCerdoInfinito", "http://log.damog.net/comments/feed/"]
-
- -

Installation

- -
$ sudo gem install damog-feedbag -s http://gems.github.com/
-
- -

Or just grab feedbag.rb and use it on your own project:

- -
$ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb
-
- -

Tutorial

- -

So you want to know more about it.

- -

OK, if the URL passed to the find method is a feed itself, that only feed URL will be returned.

- -
>> Feedbag.find "github.com/damog.atom"
-=> ["http://github.com/damog.atom"]
->>
-
- -

Otherwise, it will always return LINK feeds first, A (anchor tags) feeds later. Between A feeds, the ones hosted on the same URL's host, will have larger priority:

- -
>> Feedbag.find "http://ve.planetalinux.org"
-=> ["http://feedproxy.google.com/PlanetaLinuxVenezuela", "http://rendergraf.wordpress.com/feed/", "http://rootweiller.wordpress.com/feed/", "http://skatox.com/blog/feed/", "http://kodegeek.com/atom.xml", "http://blog.0x29.com.ve/?feed=rss2&cat=8"]
->>
-
- -

On your application you should only take the very first element of the array, most of the times:

- -
>> Feedbag.find("planet.debian.org").first(3)
-=> ["http://planet.debian.org/rss10.xml", "http://planet.debian.org/rss20.xml", "http://planet.debian.org/atom.xml"]
->>
-
- -

(Try running that same example without the "first" method. That example's host is a blog aggregator, so it has hundreds of feed URLs:)

- -
>> Feedbag.find("planet.debian.org").size
-=> 104
->>
-
- -

Feedbag will find them all, but it will return the most important ones on the first elements on the array returned.

- -
>> Feedbag.find("cnn.com")
-=> ["http://rss.cnn.com/rss/cnn_topstories.rss", "http://rss.cnn.com/rss/cnn_latest.rss", "http://rss.cnn.com/services/podcasting/robinmeade/rss.xml"]
->>
-
- -

Why should you use it?

- -
    -
  • Because it's cool.
  • -
  • Because it only uses Hpricot as dependency.
  • -
  • Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc).
  • -
  • Because it's a single file you can embed easily in your application.
  • -
  • Because it passes most of the Mark Pilgrim's Atom auto-discovery test suite. It doesn't pass them all because some of those tests are broken (citation needed).
  • -
- -

Why did I build it?

- -
    -
  • Because I liked Benjamin Trott's Feed::Find.
  • -
  • Because I thought it would be good to have Feed::Find's functionality in Ruby.
  • -
  • Because I thought it was going to be easy to maintain.
  • -
  • Because I was going to use it on rFeed.
  • -
  • And finally, because I didn't know rfeedfinder existed :-)
  • -
- -

Bugs

- -

Please, report bugs to rt@support.axiombox.com or directly to the author.

- -

Contribute

- -
-

git clone git://github.com/damog/feedbag.git

-
- -

...patch, build, hack and make pull requests. I'll be glad.

- -

Author

- -

David Moreno <david@axiombox.com>.

- -

Copyright

- -

This is free software. See COPYING for more information.

- -

Thanks

- -

Raquel, for making Axiombox and most of my dreams possible. Also, GitHub for making a nice code sharing service that doesn't suck.

From 317db5a0cc9c001f5ad916aff5f2b15338f276c9 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sat, 7 Dec 2013 18:17:19 +0100 Subject: [PATCH 07/24] release date notice from two years ago --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index ca257d3..01984ed 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ +* 0.9.1 - Fri Mar 16 12:00:00 EDT 2012 + - Small error on the release date. + * 0.9 - Fri Mar 16 10:59:00 EDT 2012 - Changed license to MIT. From c8d3a4826de1421ad4ad197b3aac5023cccdec94 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sat, 7 Dec 2013 18:32:52 +0100 Subject: [PATCH 08/24] 0.9.2 --- ChangeLog | 3 +++ feedbag.gemspec | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 01984ed..5872273 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ +* 0.9.2 - Sat Dec 7 18:32:17 CET 2013 + - Up to date bits and pieces. + * 0.9.1 - Fri Mar 16 12:00:00 EDT 2012 - Small error on the release date. diff --git a/feedbag.gemspec b/feedbag.gemspec index bf72f25..e2c232e 100644 --- a/feedbag.gemspec +++ b/feedbag.gemspec @@ -2,12 +2,12 @@ Gem::Specification.new do |s| s.name = %q{feedbag} - s.version = "0.9.1" + s.version = "0.9.2" s.homepage = "http://github.com/damog/feedbag" s.rubyforge_project = "feedbag" s.authors = ["David Moreno"] - s.date = %q{2012-03-16} + s.date = %q{2013-12-07} s.description = %q{Ruby's favorite feed auto-discoverty tool} s.email = %q{david@axiombox.com} s.extra_rdoc_files = ["README.markdown", "COPYING"] From 205261b75c5388793afe3ce5e04fbe40250df5b8 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Mon, 9 Dec 2013 18:47:26 +0100 Subject: [PATCH 09/24] fixing the last bits of a bad merge :( --- README.markdown | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/README.markdown b/README.markdown index 2c44116..9def1f4 100644 --- a/README.markdown +++ b/README.markdown @@ -22,21 +22,12 @@ Or just grab feedbag.rb and use it on your own project: $ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb -You can also use an installed command line tool for quick queries, if you install the gem: +You can also use the command line tool for quick queries, if you install the gem: $ feedbag http://rubygems.org/profiles/damog == http://rubygems.org/profiles/damog: - http://feeds.feedburner.com/gemcutter-latest - -### Installation - - $ sudo gem install feedbag - -Or just grab feedbag.rb and use it on your own project: - - $ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb - ### Why should you use it? - Because it only uses [Nokogiri](http://nokogiri.org/) as dependency. From a1cbd28982fd4622ac0969c6f0b9b7f0c636f40f Mon Sep 17 00:00:00 2001 From: Eric Mill Date: Thu, 3 Apr 2014 21:16:39 -0400 Subject: [PATCH 10/24] fixed recurring 'ocurred' typo --- lib/feedbag.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/feedbag.rb b/lib/feedbag.rb index e44def9..030306c 100644 --- a/lib/feedbag.rb +++ b/lib/feedbag.rb @@ -91,7 +91,7 @@ def find(url, args = {}) # TODO: actually find out timeout. use Terminator? # $stderr.puts "Feed looked like feed but might not have passed validation or timed out" rescue => ex - $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}" + $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}" end begin @@ -147,13 +147,13 @@ def find(url, args = {}) end end rescue Timeout::Error => err - $stderr.puts "Timeout error ocurred with `#{url}: #{err}'" + $stderr.puts "Timeout error occurred with `#{url}: #{err}'" rescue OpenURI::HTTPError => the_error - $stderr.puts "Error ocurred with `#{url}': #{the_error}" + $stderr.puts "Error occurred with `#{url}': #{the_error}" rescue SocketError => err - $stderr.puts "Socket error ocurred with: `#{url}': #{err}" + $stderr.puts "Socket error occurred with: `#{url}': #{err}" rescue => ex - $stderr.puts "#{ex.class} error ocurred with: `#{url}': #{ex.message}" + $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}" ensure return @feeds end From 7f4241f5b4ff9295500836f353fa2efdcdc46856 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sun, 3 Aug 2014 12:58:56 +0200 Subject: [PATCH 11/24] finally fixed indentation --- feedbag.gemspec | 4 +- lib/feedbag.rb | 310 +++++++++++++++++++++---------------------- test/feedbag_test.rb | 4 +- 3 files changed, 159 insertions(+), 159 deletions(-) diff --git a/feedbag.gemspec b/feedbag.gemspec index 3a9fa2c..4193201 100644 --- a/feedbag.gemspec +++ b/feedbag.gemspec @@ -1,11 +1,11 @@ # -*- encoding: utf-8 -*- - + Gem::Specification.new do |s| s.name = %q{feedbag} s.version = "0.9.2" s.homepage = "http://github.com/damog/feedbag" s.rubyforge_project = "feedbag" - + s.authors = ["David Moreno", "Derek Willis"] s.date = %q{2013-12-07} s.description = %q{Ruby's favorite feed auto-discoverty tool} diff --git a/lib/feedbag.rb b/lib/feedbag.rb index 030306c..ce626e2 100644 --- a/lib/feedbag.rb +++ b/lib/feedbag.rb @@ -28,14 +28,14 @@ class Feedbag - CONTENT_TYPES = [ - 'application/x.atom+xml', - 'application/atom+xml', - 'application/xml', - 'text/xml', - 'application/rss+xml', - 'application/rdf+xml', - ].freeze + CONTENT_TYPES = [ + 'application/x.atom+xml', + 'application/atom+xml', + 'application/xml', + 'text/xml', + 'application/rss+xml', + 'application/rdf+xml', + ].freeze def self.feed?(url) new.feed?(url) @@ -49,160 +49,160 @@ def initialize @feeds = [] end - def feed?(url) - # use LWR::Simple.normalize some time - url_uri = URI.parse(url) - url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" - url << "?#{url_uri.query}" if url_uri.query - - # hack: - url.sub!(/^feed:\/\//, 'http://') - - res = Feedbag.find(url) - if res.size == 1 and res.first == url - return true - else - return false - end - end - - def find(url, args = {}) - url_uri = URI.parse(url) - url = nil - if url_uri.scheme.nil? - url = "http://#{url_uri.to_s}" - elsif url_uri.scheme == "feed" - return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil) - else - url = url_uri.to_s - end - #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" - - # check if feed_valid is avail + def feed?(url) + # use LWR::Simple.normalize some time + url_uri = URI.parse(url) + url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" + url << "?#{url_uri.query}" if url_uri.query + + # hack: + url.sub!(/^feed:\/\//, 'http://') + + res = Feedbag.find(url) + if res.size == 1 and res.first == url + return true + else + return false + end + end + + def find(url, args = {}) + url_uri = URI.parse(url) + url = nil + if url_uri.scheme.nil? + url = "http://#{url_uri.to_s}" + elsif url_uri.scheme == "feed" + return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil) + else + url = url_uri.to_s + end + #url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}" + + # check if feed_valid is avail begin - require "feed_validator" - v = W3C::FeedValidator.new - v.validate_url(url) - return self.add_feed(url, nil) if v.valid? - rescue LoadError - # scoo - rescue REXML::ParseException - # usually indicates timeout - # TODO: actually find out timeout. use Terminator? - # $stderr.puts "Feed looked like feed but might not have passed validation or timed out" + require "feed_validator" + v = W3C::FeedValidator.new + v.validate_url(url) + return self.add_feed(url, nil) if v.valid? + rescue LoadError + # scoo + rescue REXML::ParseException + # usually indicates timeout + # TODO: actually find out timeout. use Terminator? + # $stderr.puts "Feed looked like feed but might not have passed validation or timed out" rescue => ex - $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}" - end - - begin - html = open(url) do |f| - content_type = f.content_type.downcase - if content_type == "application/octet-stream" # open failed - content_type = f.meta["content-type"].gsub(/;.*$/, '') - end - if CONTENT_TYPES.include?(content_type) - return self.add_feed(url, nil) - end - - doc = Nokogiri::HTML(f.read) - - if doc.at("base") and doc.at("base")["href"] - @base_uri = doc.at("base")["href"] - else - @base_uri = nil - end - - # first with links + $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}" + end + + begin + html = open(url) do |f| + content_type = f.content_type.downcase + if content_type == "application/octet-stream" # open failed + content_type = f.meta["content-type"].gsub(/;.*$/, '') + end + if CONTENT_TYPES.include?(content_type) + return self.add_feed(url, nil) + end + + doc = Nokogiri::HTML(f.read) + + if doc.at("base") and doc.at("base")["href"] + @base_uri = doc.at("base")["href"] + else + @base_uri = nil + end + + # first with links (doc/"atom:link").each do |l| - next unless l["rel"] - if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self" - self.add_feed(l["href"], url, @base_uri) - end - end - - (doc/"link").each do |l| - next unless l["rel"] - if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed") - self.add_feed(l["href"], url, @base_uri) - end - end - - (doc/"a").each do |a| - next unless a["href"] - if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/) - self.add_feed(a["href"], url, @base_uri) - end - end - - (doc/"a").each do |a| - next unless a["href"] - if self.looks_like_feed?(a["href"]) - self.add_feed(a["href"], url, @base_uri) - end - end + next unless l["rel"] + if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self" + self.add_feed(l["href"], url, @base_uri) + end + end + + (doc/"link").each do |l| + next unless l["rel"] + if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed") + self.add_feed(l["href"], url, @base_uri) + end + end + + (doc/"a").each do |a| + next unless a["href"] + if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/) + self.add_feed(a["href"], url, @base_uri) + end + end + + (doc/"a").each do |a| + next unless a["href"] + if self.looks_like_feed?(a["href"]) + self.add_feed(a["href"], url, @base_uri) + end + end # Added support for feeds like http://tabtimes.com/tbfeed/mashable/full.xml if url.match(/.xml$/) and doc.root and doc.root["xml:base"] and doc.root["xml:base"].strip == url.strip - self.add_feed(url, nil) + self.add_feed(url, nil) end - end - rescue Timeout::Error => err - $stderr.puts "Timeout error occurred with `#{url}: #{err}'" - rescue OpenURI::HTTPError => the_error - $stderr.puts "Error occurred with `#{url}': #{the_error}" - rescue SocketError => err - $stderr.puts "Socket error occurred with: `#{url}': #{err}" - rescue => ex - $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}" - ensure - return @feeds - end - - end - - def looks_like_feed?(url) - if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i - true - else - false - end - end - - def add_feed(feed_url, orig_url, base_uri = nil) - # puts "#{feed_url} - #{orig_url}" - url = feed_url.sub(/^feed:/, '').strip - - if base_uri - # url = base_uri + feed_url - url = URI.parse(base_uri).merge(feed_url).to_s - end - - begin - uri = URI.parse(url) - rescue - puts "Error with `#{url}'" - exit 1 - end - unless uri.absolute? - orig = URI.parse(orig_url) - url = orig.merge(url).to_s - end - - # verify url is really valid - @feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url) - end - - # not used. yet. - def _is_http_valid(uri, orig_url) - req = Net::HTTP.get_response(uri) - orig_uri = URI.parse(orig_url) - case req - when Net::HTTPSuccess then - return true - else - return false - end - end + end + rescue Timeout::Error => err + $stderr.puts "Timeout error occurred with `#{url}: #{err}'" + rescue OpenURI::HTTPError => the_error + $stderr.puts "Error occurred with `#{url}': #{the_error}" + rescue SocketError => err + $stderr.puts "Socket error occurred with: `#{url}': #{err}" + rescue => ex + $stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}" + ensure + return @feeds + end + + end + + def looks_like_feed?(url) + if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i + true + else + false + end + end + + def add_feed(feed_url, orig_url, base_uri = nil) + # puts "#{feed_url} - #{orig_url}" + url = feed_url.sub(/^feed:/, '').strip + + if base_uri + # url = base_uri + feed_url + url = URI.parse(base_uri).merge(feed_url).to_s + end + + begin + uri = URI.parse(url) + rescue + puts "Error with `#{url}'" + exit 1 + end + unless uri.absolute? + orig = URI.parse(orig_url) + url = orig.merge(url).to_s + end + + # verify url is really valid + @feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url) + end + + # not used. yet. + def _is_http_valid(uri, orig_url) + req = Net::HTTP.get_response(uri) + orig_uri = URI.parse(orig_url) + case req + when Net::HTTPSuccess then + return true + else + return false + end + end end if __FILE__ == $0 diff --git a/test/feedbag_test.rb b/test/feedbag_test.rb index acd409f..6a8486a 100644 --- a/test/feedbag_test.rb +++ b/test/feedbag_test.rb @@ -1,7 +1,7 @@ require 'test_helper' class FeedbagTest < Test::Unit::TestCase - + context "Feedbag.feed? should know that an RSS url is a feed" do setup do @rss_url = 'http://example.com/rss/' @@ -11,7 +11,7 @@ class FeedbagTest < Test::Unit::TestCase assert Feedbag.feed?(@rss_url) end end - + context "Feedbag.feed? should know that an RSS url with parameters is a feed" do setup do @rss_url = "http://example.com/data?format=rss" From 6bd7c7f2f3fe9dedcc7d856a937dc8d02bd95910 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sun, 3 Aug 2014 13:48:53 +0200 Subject: [PATCH 12/24] 0.9.3 --- ChangeLog | 3 +++ feedbag.gemspec | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 5872273..3b1babc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ +* 0.9.3 - Sun Aug 3 13:48:01 CEST 2014 + - Rebuild of the gem to get rid of Hpricot. + * 0.9.2 - Sat Dec 7 18:32:17 CET 2013 - Up to date bits and pieces. diff --git a/feedbag.gemspec b/feedbag.gemspec index 4193201..31eabc9 100644 --- a/feedbag.gemspec +++ b/feedbag.gemspec @@ -2,7 +2,7 @@ Gem::Specification.new do |s| s.name = %q{feedbag} - s.version = "0.9.2" + s.version = "0.9.3" s.homepage = "http://github.com/damog/feedbag" s.rubyforge_project = "feedbag" From 1b5ea4147d45e261c1e3542a89e2dcb3ac0e5d02 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sun, 3 Aug 2014 13:56:25 +0200 Subject: [PATCH 13/24] adding gems --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1377554..f1b1301 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.swp +*.gem From 0c1e147e331257cb4acaf54fa5ef416a87ebccea Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sun, 3 Aug 2014 13:56:47 +0200 Subject: [PATCH 14/24] This little project has been around for a while now :) --- COPYING | 2 +- lib/feedbag.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/COPYING b/COPYING index be5ea3f..acbbd20 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,4 @@ -Copyright (C) 2012 David Moreno +Copyright (C) 2008-2014 David Moreno Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/lib/feedbag.rb b/lib/feedbag.rb index ce626e2..36b68c5 100644 --- a/lib/feedbag.rb +++ b/lib/feedbag.rb @@ -1,6 +1,6 @@ #!/usr/bin/ruby -# Copyright (c) 2012 David Moreno +# Copyright (c) 2008-2014 David Moreno # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the From eac192974dba2d1eef0bf706ca27ac8fc5628195 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Sun, 3 Aug 2014 13:57:30 +0200 Subject: [PATCH 15/24] 0.9.4 --- ChangeLog | 5 +++++ feedbag.gemspec | 11 ++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3b1babc..b4cdb5e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ +* 0.9.4 - Sun Aug 3 13:51:09 CEST 2014 + - Enhancements to the spec. + - Updated years on license notes + * 0.9.3 - Sun Aug 3 13:48:01 CEST 2014 - Rebuild of the gem to get rid of Hpricot. + - Appropriate build on Nokogiri. * 0.9.2 - Sat Dec 7 18:32:17 CET 2013 - Up to date bits and pieces. diff --git a/feedbag.gemspec b/feedbag.gemspec index 31eabc9..c32f898 100644 --- a/feedbag.gemspec +++ b/feedbag.gemspec @@ -2,12 +2,13 @@ Gem::Specification.new do |s| s.name = %q{feedbag} - s.version = "0.9.3" + s.version = "0.9.4" s.homepage = "http://github.com/damog/feedbag" s.rubyforge_project = "feedbag" + s.licenses = ["MIT"] s.authors = ["David Moreno", "Derek Willis"] - s.date = %q{2013-12-07} + s.date = %q{2014-08-03} s.description = %q{Ruby's favorite feed auto-discoverty tool} s.email = %q{david@axiombox.com} s.extra_rdoc_files = ["README.markdown", "COPYING"] @@ -15,9 +16,9 @@ Gem::Specification.new do |s| s.has_rdoc = true s.rdoc_options = ["--main", "README.markdown"] s.summary = %q{Ruby's favorite feed auto-discovery tool} - s.add_dependency("nokogiri") - s.add_development_dependency "shoulda" - s.add_development_dependency "mocha", "~> 0.12.0" + s.add_dependency('nokogiri', '~> 0') + s.add_development_dependency 'shoulda', '~> 0' + s.add_development_dependency 'mocha', '~> 0.12', '>= 0.12.0' s.bindir = 'bin' s.default_executable = %q{feedbag} s.executables = ["feedbag"] From e5dd173aed498e3e80aa5adf0b051b29c97cb628 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Tue, 12 Aug 2014 12:29:30 +0200 Subject: [PATCH 16/24] superfeedr proposal #1 --- README.markdown | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/README.markdown b/README.markdown index 9def1f4..937f5fe 100644 --- a/README.markdown +++ b/README.markdown @@ -5,22 +5,22 @@ Feedbag is Ruby's favorite auto-discovery tool/library! ### Quick synopsis - >> require "feedbag" - => true - >> Feedbag.find "damog.net/blog" + >> require "feedbag" + => true + >> Feedbag.find "damog.net/blog" => ["http://damog.net/blog/index.rss", "http://damog.net/blog/tags/feed", "http://damog.net/blog/tags/rfeed"] >> Feedbag.feed? "perl.org" => false - >> Feedbag.feed?("http://jobs.perl.org/rss/standard.rss") + >> Feedbag.feed?("http://jobs.perl.org/rss/standard.rss") => true ### Installation - $ gem install feedbag + $ gem install feedbag Or just grab feedbag.rb and use it on your own project: - $ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb + $ wget http://github.com/damog/feedbag/raw/master/lib/feedbag.rb You can also use the command line tool for quick queries, if you install the gem: @@ -39,7 +39,12 @@ You can also use the command line tool for quick queries, if you install the gem [David Moreno](http://damog.net/) <[david@axiombox.com](mailto:david@axiombox.com)>. +### Donations + +![Superfeedr](http://damog.net/files/misc/superfeedr_150.png) + +[Superfeedr](http://superfeedr.com) has kindly financially [supported](https://github.com/damog/feedbag/issues/9) the development of Feedbag. + ### Copyright This is free software. See [COPYING](http://github.com/damog/feedbag/master/COPYING) for more information. - From bbabf00b8f5dedcfc9fb7a830541c5afb357ce36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matic=20Jurgli=C4=8D?= Date: Wed, 15 Oct 2014 21:37:53 +0200 Subject: [PATCH 17/24] fix nokogiri dependency version --- feedbag.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/feedbag.gemspec b/feedbag.gemspec index c32f898..9990a05 100644 --- a/feedbag.gemspec +++ b/feedbag.gemspec @@ -16,7 +16,7 @@ Gem::Specification.new do |s| s.has_rdoc = true s.rdoc_options = ["--main", "README.markdown"] s.summary = %q{Ruby's favorite feed auto-discovery tool} - s.add_dependency('nokogiri', '~> 0') + s.add_dependency('nokogiri', '~> 1.0') s.add_development_dependency 'shoulda', '~> 0' s.add_development_dependency 'mocha', '~> 0.12', '>= 0.12.0' s.bindir = 'bin' From a73e98bd522f5a4acd1278f65b2f4e8db1425960 Mon Sep 17 00:00:00 2001 From: David Moreno Date: Thu, 16 Oct 2014 14:14:16 +0200 Subject: [PATCH 18/24] 0.9.5 --- feedbag.gemspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/feedbag.gemspec b/feedbag.gemspec index 9990a05..d52c05f 100644 --- a/feedbag.gemspec +++ b/feedbag.gemspec @@ -2,13 +2,13 @@ Gem::Specification.new do |s| s.name = %q{feedbag} - s.version = "0.9.4" + s.version = "0.9.5" s.homepage = "http://github.com/damog/feedbag" s.rubyforge_project = "feedbag" s.licenses = ["MIT"] s.authors = ["David Moreno", "Derek Willis"] - s.date = %q{2014-08-03} + s.date = %q{2014-10-16} s.description = %q{Ruby's favorite feed auto-discoverty tool} s.email = %q{david@axiombox.com} s.extra_rdoc_files = ["README.markdown", "COPYING"] From c6762187946128438ed3c47eb671685b3585ef22 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 6 Dec 2014 20:06:51 +0100 Subject: [PATCH 19/24] Remove redundant 'rdf' in looks_like_feed? --- lib/feedbag.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/feedbag.rb b/lib/feedbag.rb index 36b68c5..aee72da 100644 --- a/lib/feedbag.rb +++ b/lib/feedbag.rb @@ -161,7 +161,7 @@ def find(url, args = {}) end def looks_like_feed?(url) - if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i + if url =~ /(\.(rdf|xml|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i true else false From bc76dba9d79da3e2400b4a230e105798a03fd94c Mon Sep 17 00:00:00 2001 From: David Moreno Date: Mon, 16 Nov 2015 10:44:15 +0100 Subject: [PATCH 20/24] Fixed broken link --- README.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index 937f5fe..eab7bed 100644 --- a/README.markdown +++ b/README.markdown @@ -47,4 +47,4 @@ You can also use the command line tool for quick queries, if you install the gem ### Copyright -This is free software. See [COPYING](http://github.com/damog/feedbag/master/COPYING) for more information. +This is free software. See [COPYING](https://raw.githubusercontent.com/damog/feedbag/master/COPYING) for more information. From a35e130254c549a977c8e45b49301ca5b62bf79b Mon Sep 17 00:00:00 2001 From: Nurahmadie Date: Sun, 7 Feb 2016 03:44:48 +0700 Subject: [PATCH 21/24] Fix tests --- test/feedbag_test.rb | 2 +- test/test_helper.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/feedbag_test.rb b/test/feedbag_test.rb index 6a8486a..b0f294f 100644 --- a/test/feedbag_test.rb +++ b/test/feedbag_test.rb @@ -1,6 +1,6 @@ require 'test_helper' -class FeedbagTest < Test::Unit::TestCase +class FeedbagTest < MiniTest::Unit::TestCase context "Feedbag.feed? should know that an RSS url is a feed" do setup do diff --git a/test/test_helper.rb b/test/test_helper.rb index 6f6a2d8..87befa3 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,8 +1,8 @@ require 'rubygems' -require 'test/unit' +require 'minitest/autorun' require 'shoulda' require 'mocha/setup' $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) $LOAD_PATH.unshift(File.dirname(__FILE__)) -require 'feedbag' \ No newline at end of file +require 'feedbag' From 24e0ef4d7bbe48b6abc344aede7348284b022f35 Mon Sep 17 00:00:00 2001 From: Nurahmadie Date: Sun, 7 Feb 2016 04:38:50 +0700 Subject: [PATCH 22/24] Read html as binary first, then convert to utf8 explicitly --- lib/feedbag.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/feedbag.rb b/lib/feedbag.rb index 4a2cf02..9ba8912 100644 --- a/lib/feedbag.rb +++ b/lib/feedbag.rb @@ -99,7 +99,7 @@ def find(url, args = {}) begin user_agent = ENV["FEEDBAG_UA"] || USER_AGENT - html = open(url, "User-Agent" => user_agent, :allow_redirections => :safe) do |f| + html = open(url, 'r:binary', "User-Agent" => user_agent, :allow_redirections => :safe) do |f| content_type = f.content_type.downcase if content_type == "application/octet-stream" # open failed content_type = f.meta["content-type"].gsub(/;.*$/, '') @@ -108,7 +108,7 @@ def find(url, args = {}) return self.add_feed(url, nil) end - doc = Nokogiri::HTML(f.read) + doc = Nokogiri::HTML.parse(f.read.toutf8, nil, 'UTF-8') if doc.at("base") and doc.at("base")["href"] @base_uri = doc.at("base")["href"] From 44cd9f8b2e83c7988ce50563de1c68ae24dc691d Mon Sep 17 00:00:00 2001 From: Nurahmadie Date: Sun, 7 Feb 2016 04:46:33 +0700 Subject: [PATCH 23/24] Require kconv --- lib/feedbag.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/feedbag.rb b/lib/feedbag.rb index 9ba8912..7e3deaf 100644 --- a/lib/feedbag.rb +++ b/lib/feedbag.rb @@ -26,6 +26,7 @@ require "open-uri" require "open_uri_redirections" require "net/http" +require "kconv" USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36" From 1b15db941d58d29a6c274b50170eb22b6c4c218f Mon Sep 17 00:00:00 2001 From: Nurahmadie Date: Sun, 7 Feb 2016 05:02:04 +0700 Subject: [PATCH 24/24] Update readme --- README.markdown | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/README.markdown b/README.markdown index eab7bed..c4c9de2 100644 --- a/README.markdown +++ b/README.markdown @@ -1,7 +1,7 @@ Feedbag ======= -Feedbag is Ruby's favorite auto-discovery tool/library! +Yet another fork of Feedbag, Ruby's favorite auto-discovery tool/library! ### Quick synopsis @@ -35,15 +35,16 @@ You can also use the command line tool for quick queries, if you install the gem - Because it's a single file you can embed easily in your application. - Because it's faster than rfeedfinder. -### Author +### Why the fork? -[David Moreno](http://damog.net/) <[david@axiombox.com](mailto:david@axiombox.com)>. +This fork introduce one new dependency to support HTTP redirection flow. Another enhancement such as: -### Donations +- Support for custom user agent, just set your preferred user agent from environment variable: `FEEDBAG_UA` +- Better encoding conversion, specifically for Japanese sites with non utf8 encoding. -![Superfeedr](http://damog.net/files/misc/superfeedr_150.png) +### Original Author -[Superfeedr](http://superfeedr.com) has kindly financially [supported](https://github.com/damog/feedbag/issues/9) the development of Feedbag. +[David Moreno](http://damog.net/) <[david@axiombox.com](mailto:david@axiombox.com)>. ### Copyright