Class | FeedTools::Feed |
In: |
lib/feed_tools/feed.rb
|
Parent: | Object |
The FeedTools::Feed class represents a web feed‘s structure.
passes missing methods to the FeedTools.feed_cache
# File lib/feed_tools/feed.rb, line 2884 2884: def Feed.method_missing(msg, *params) 2885: if FeedTools.feed_cache.nil? 2886: raise NoMethodError, "Invalid method Feed.#{msg.to_s}" 2887: end 2888: result = FeedTools.feed_cache.send(msg, params) 2889: if result.kind_of? FeedTools.feed_cache 2890: result = Feed.open(result.url) 2891: end 2892: return result 2893: end
Initialize the feed object
# File lib/feed_tools/feed.rb, line 36 36: def initialize 37: super 38: @cache_object = nil 39: @http_headers = nil 40: @xml_document = nil 41: @feed_data = nil 42: @feed_data_type = :xml 43: @root_node = nil 44: @channel_node = nil 45: @href = nil 46: @id = nil 47: @title = nil 48: @subtitle = nil 49: @link = nil 50: @last_retrieved = nil 51: @time_to_live = nil 52: @entries = nil 53: @live = false 54: @encoding = nil 55: @options = nil 56: @version = FeedTools::FEED_TOOLS_VERSION::STRING 57: end
Loads the feed specified by the url, pulling the data from the cache if it hasn‘t expired. Options supplied will override the default options.
# File lib/feed_tools/feed.rb, line 97 97: def Feed.open(href, options={}) 98: FeedTools::GenericHelper.validate_options( 99: FeedTools.configurations.keys, options.keys) 100: 101: # clean up the url 102: href = FeedTools::UriHelper.normalize_url(href) 103: 104: feed_configurations = FeedTools.configurations.merge(options) 105: cache_object = nil 106: deserialized_feed = nil 107: 108: if feed_configurations[:feed_cache] != nil && FeedTools.feed_cache.nil? 109: raise(ArgumentError, "There is currently no caching mechanism set. " + 110: "Cannot retrieve cached feeds.") 111: elsif feed_configurations[:serialization_enabled] == true 112: # We've got a caching mechanism available 113: cache_object = FeedTools.feed_cache.find_by_href(href) 114: begin 115: if cache_object != nil && cache_object.serialized != nil 116: # If we've got a cache hit, deserialize 117: expired = true 118: if cache_object.time_to_live == nil 119: cache_object.time_to_live = 120: feed_configurations[:default_ttl].to_i 121: cache_object.save 122: end 123: if (cache_object.last_retrieved == nil) 124: expired = true 125: elsif (cache_object.time_to_live < 30.minutes) 126: expired = 127: (cache_object.last_retrieved + 30.minutes) < Time.now.gmtime 128: else 129: expired = 130: (cache_object.last_retrieved + cache_object.time_to_live) < 131: Time.now.gmtime 132: end 133: if !expired 134: require 'yaml' 135: deserialized_feed = YAML.load(cache_object.serialized) 136: deserialized_feed.cache_object = cache_object 137: Thread.pass 138: end 139: end 140: rescue Exception 141: end 142: end 143: 144: if deserialized_feed == nil 145: # create the new feed 146: feed = FeedTools::Feed.new 147: 148: feed.configurations = feed_configurations 149: 150: # load the new feed 151: feed.href = href 152: if cache_object != nil 153: feed.cache_object = cache_object 154: end 155: feed.update! unless feed.configurations[:disable_update_from_remote] 156: Thread.pass 157: 158: return feed 159: else 160: return deserialized_feed 161: end 162: end
Syntactic sugar for appending feed items to a feed.
# File lib/feed_tools/feed.rb, line 2471 2471: def <<(new_entry) 2472: @entries ||= [] 2473: unless new_entry.kind_of? FeedTools::FeedItem 2474: raise ArgumentError, 2475: "You should only add FeedItem objects to the entries array." 2476: end 2477: @entries << new_entry 2478: end
Returns the feed author
# File lib/feed_tools/feed.rb, line 1608 1608: def author 1609: if @author.nil? 1610: @author = FeedTools::Author.new 1611: author_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1612: "atom10:author", 1613: "atom03:author", 1614: "atom:author", 1615: "author", 1616: "managingEditor", 1617: "dc:author", 1618: "dc:creator" 1619: ]) 1620: unless author_node.nil? 1621: @author.raw = FeedTools::XmlHelper.try_xpaths( 1622: author_node, ["text()"], :select_result_value => true) 1623: @author.raw = FeedTools::HtmlHelper.unescape_entities(@author.raw) 1624: unless @author.raw.nil? 1625: raw_scan = @author.raw.scan( 1626: /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i) 1627: if raw_scan.nil? || raw_scan.size == 0 1628: raw_scan = @author.raw.scan( 1629: /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i) 1630: unless raw_scan.size == 0 1631: author_raw_pair = raw_scan.first.reverse 1632: end 1633: else 1634: author_raw_pair = raw_scan.first 1635: end 1636: if raw_scan.nil? || raw_scan.size == 0 1637: email_scan = @author.raw.scan( 1638: /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i) 1639: if email_scan != nil && email_scan.size > 0 1640: @author.email = email_scan.first.strip 1641: end 1642: end 1643: unless author_raw_pair.nil? || author_raw_pair.size == 0 1644: @author.name = author_raw_pair.first.strip 1645: @author.email = author_raw_pair.last.strip 1646: else 1647: unless @author.raw.include?("@") 1648: # We can be reasonably sure we are looking at something 1649: # that the creator didn't intend to contain an email address 1650: # if it got through the preceeding regexes and it doesn't 1651: # contain the tell-tale '@' symbol. 1652: @author.name = @author.raw 1653: end 1654: end 1655: end 1656: if @author.name.blank? 1657: @author.name = FeedTools::HtmlHelper.unescape_entities( 1658: FeedTools::XmlHelper.try_xpaths(author_node, [ 1659: "atom10:name/text()", 1660: "atom03:name/text()", 1661: "atom:name/text()", 1662: "name/text()", 1663: "@name" 1664: ], :select_result_value => true) 1665: ) 1666: end 1667: if @author.email.blank? 1668: @author.email = FeedTools::HtmlHelper.unescape_entities( 1669: FeedTools::XmlHelper.try_xpaths(author_node, [ 1670: "atom10:email/text()", 1671: "atom03:email/text()", 1672: "atom:email/text()", 1673: "email/text()", 1674: "@email" 1675: ], :select_result_value => true) 1676: ) 1677: end 1678: if @author.url.blank? 1679: @author.url = FeedTools::HtmlHelper.unescape_entities( 1680: FeedTools::XmlHelper.try_xpaths(author_node, [ 1681: "atom10:url/text()", 1682: "atom03:url/text()", 1683: "atom:url/text()", 1684: "url/text()", 1685: "atom10:uri/text()", 1686: "atom03:uri/text()", 1687: "atom:uri/text()", 1688: "uri/text()", 1689: "@href", 1690: "@uri", 1691: "@href" 1692: ], :select_result_value => true) 1693: ) 1694: end 1695: if @author.name.blank? && !@author.raw.blank? && 1696: !@author.email.blank? 1697: name_scan = @author.raw.scan( 1698: /"?([^"]*)"? ?[\(<].*#{@author.email}.*[\)>].*/) 1699: if name_scan.flatten.size == 1 1700: @author.name = name_scan.flatten[0].strip 1701: end 1702: if @author.name.blank? 1703: name_scan = @author.raw.scan( 1704: /.*#{@author.email} ?[\(<]"?([^"]*)"?[\)>].*/) 1705: if name_scan.flatten.size == 1 1706: @author.name = name_scan.flatten[0].strip 1707: end 1708: end 1709: end 1710: @author.name = nil if @author.name.blank? 1711: @author.raw = nil if @author.raw.blank? 1712: @author.email = nil if @author.email.blank? 1713: @author.url = nil if @author.url.blank? 1714: if @author.url != nil 1715: begin 1716: if !(@author.url =~ /^file:/) && 1717: !FeedTools::UriHelper.is_uri?(@author.url) 1718: @author.url = FeedTools::UriHelper.resolve_relative_uri( 1719: @author.url, [author_node.base_uri, self.base_uri]) 1720: end 1721: rescue 1722: end 1723: end 1724: if FeedTools::XmlHelper.try_xpaths(author_node, 1725: ["@gr:unknown-author"], :select_result_value => true) == "true" 1726: if @author.name == "(author unknown)" 1727: @author.name = nil 1728: end 1729: end 1730: end 1731: # Fallback on the itunes module if we didn't find an author name 1732: begin 1733: @author.name = self.itunes_author if @author.name.nil? 1734: rescue 1735: @author.name = nil 1736: end 1737: end 1738: return @author 1739: end
Sets the feed author
# File lib/feed_tools/feed.rb, line 1742 1742: def author=(new_author) 1743: if new_author.respond_to?(:name) && 1744: new_author.respond_to?(:email) && 1745: new_author.respond_to?(:url) 1746: # It's a complete author object, just set it. 1747: @author = new_author 1748: else 1749: # We're not looking at an author object, this is probably a string, 1750: # default to setting the author's name. 1751: if @author.nil? 1752: @author = FeedTools::Author.new 1753: end 1754: @author.name = new_author 1755: end 1756: end
Returns the base uri for the feed, used for resolving relative paths
# File lib/feed_tools/feed.rb, line 1487 1487: def base_uri 1488: if @base_uri.nil? 1489: @base_uri = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1490: "@base", 1491: "base/@href", 1492: "base/text()", 1493: "@xml:base" 1494: ], :select_result_value => true) 1495: if @base_uri.blank? 1496: begin 1497: @base_uri = 1498: FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do 1499: self.href 1500: end 1501: rescue Exception 1502: end 1503: end 1504: if @base_uri.blank? 1505: @base_uri = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1506: "@xml:base" 1507: ], :select_result_value => true) 1508: end 1509: if !@base_uri.blank? 1510: @base_uri = FeedTools::UriHelper.normalize_url(@base_uri) 1511: end 1512: if !@base_uri.blank? 1513: parsed_uri = FeedTools::URI.parse(@base_uri) 1514: # Feedburner is almost never the base uri that was intended 1515: # Use the actual site instead 1516: if parsed_uri.host =~ /feedburner/ 1517: site_uri = 1518: FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do 1519: FeedTools::UriHelper.normalize_url(self.link) 1520: end 1521: @base_uri = site_uri if !site_uri.blank? 1522: end 1523: end 1524: end 1525: return @base_uri 1526: end
Sets the base uri for the feed
# File lib/feed_tools/feed.rb, line 1529 1529: def base_uri=(new_base_uri) 1530: @base_uri = new_base_uri 1531: end
Generates xml based on the content of the feed
# File lib/feed_tools/feed.rb, line 2550 2550: def build_xml(feed_type=(self.feed_type or "atom"), feed_version=nil, 2551: xml_builder=Builder::XmlMarkup.new( 2552: :indent => 2, :escape_attrs => false)) 2553: 2554: if self.find_node("access:restriction/@relationship").to_s == "deny" 2555: raise StandardError, 2556: "Operation not permitted. This feed denies redistribution." 2557: elsif self.find_node("@indexing:index").to_s == "no" 2558: raise StandardError, 2559: "Operation not permitted. This feed denies redistribution." 2560: end 2561: 2562: self.full_parse() 2563: 2564: xml_builder.instruct! :xml, :version => "1.0", 2565: :encoding => (self.configurations[:output_encoding] or "utf-8") 2566: if feed_type.nil? 2567: feed_type = self.feed_type 2568: end 2569: if feed_version.nil? 2570: feed_version = self.feed_version 2571: end 2572: if feed_type == "rss" && 2573: (feed_version == nil || feed_version <= 0.0) 2574: feed_version = 1.0 2575: elsif feed_type == "atom" && 2576: (feed_version == nil || feed_version <= 0.0) 2577: feed_version = 1.0 2578: end 2579: if feed_type == "rss" && 2580: (feed_version == 0.9 || feed_version == 1.0 || feed_version == 1.1) 2581: # RDF-based rss format 2582: return xml_builder.tag!("rdf:RDF", 2583: "xmlns" => FEED_TOOLS_NAMESPACES['rss10'], 2584: "xmlns:content" => FEED_TOOLS_NAMESPACES['content'], 2585: "xmlns:rdf" => FEED_TOOLS_NAMESPACES['rdf'], 2586: "xmlns:dc" => FEED_TOOLS_NAMESPACES['dc'], 2587: "xmlns:syn" => FEED_TOOLS_NAMESPACES['syn'], 2588: "xmlns:admin" => FEED_TOOLS_NAMESPACES['admin'], 2589: "xmlns:taxo" => FEED_TOOLS_NAMESPACES['taxo'], 2590: "xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'], 2591: "xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do 2592: channel_attributes = {} 2593: unless self.link.nil? 2594: channel_attributes["rdf:about"] = 2595: FeedTools::HtmlHelper.escape_entities(self.link) 2596: end 2597: xml_builder.channel(channel_attributes) do 2598: unless self.title.blank? 2599: xml_builder.title( 2600: FeedTools::HtmlHelper.strip_html_tags(self.title)) 2601: else 2602: xml_builder.title 2603: end 2604: unless self.link.blank? 2605: xml_builder.link(self.link) 2606: else 2607: xml_builder.link 2608: end 2609: unless images.blank? 2610: xml_builder.image("rdf:resource" => 2611: FeedTools::HtmlHelper.escape_entities( 2612: images.first.url)) 2613: end 2614: unless description.nil? || description == "" 2615: xml_builder.description(description) 2616: else 2617: xml_builder.description 2618: end 2619: unless self.language.blank? 2620: xml_builder.tag!("dc:language", self.language) 2621: end 2622: unless self.rights.blank? 2623: xml_builder.tag!("dc:rights", self.rights) 2624: end 2625: xml_builder.tag!("syn:updatePeriod", "hourly") 2626: xml_builder.tag!("syn:updateFrequency", 2627: (self.time_to_live / 1.hour).to_s) 2628: xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601) 2629: xml_builder.items do 2630: xml_builder.tag!("rdf:Seq") do 2631: unless items.nil? 2632: for item in items 2633: if item.link.nil? 2634: raise "Cannot generate an rdf-based feed with a nil " + 2635: "item link field." 2636: end 2637: xml_builder.tag!("rdf:li", "rdf:resource" => 2638: FeedTools::HtmlHelper.escape_entities(item.link)) 2639: end 2640: end 2641: end 2642: end 2643: xml_builder.tag!( 2644: "admin:generatorAgent", 2645: "rdf:resource" => self.configurations[:generator_href]) 2646: build_xml_hook(feed_type, feed_version, xml_builder) 2647: end 2648: unless self.images.blank? 2649: best_image = nil 2650: for image in self.images 2651: if image.link != nil 2652: best_image = image 2653: break 2654: end 2655: end 2656: best_image = self.images.first if best_image.nil? 2657: xml_builder.image("rdf:about" => 2658: FeedTools::HtmlHelper.escape_entities(best_image.url)) do 2659: if !best_image.title.blank? 2660: xml_builder.title(best_image.title) 2661: elsif !self.title.blank? 2662: xml_builder.title(self.title) 2663: else 2664: xml_builder.title 2665: end 2666: unless best_image.url.blank? 2667: xml_builder.url(best_image.url) 2668: end 2669: if !best_image.link.blank? 2670: xml_builder.link(best_image.link) 2671: elsif !self.link.blank? 2672: xml_builder.link(self.link) 2673: else 2674: xml_builder.link 2675: end 2676: end 2677: end 2678: unless items.nil? 2679: for item in items 2680: item.build_xml(feed_type, feed_version, xml_builder) 2681: end 2682: end 2683: end 2684: elsif feed_type == "rss" 2685: # normal rss format 2686: return xml_builder.rss("version" => "2.0", 2687: "xmlns:content" => FEED_TOOLS_NAMESPACES['content'], 2688: "xmlns:rdf" => FEED_TOOLS_NAMESPACES['rdf'], 2689: "xmlns:dc" => FEED_TOOLS_NAMESPACES['dc'], 2690: "xmlns:taxo" => FEED_TOOLS_NAMESPACES['taxo'], 2691: "xmlns:trackback" => FEED_TOOLS_NAMESPACES['trackback'], 2692: "xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'], 2693: "xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do 2694: xml_builder.channel do 2695: unless self.title.blank? 2696: xml_builder.title( 2697: FeedTools::HtmlHelper.strip_html_tags(self.title)) 2698: end 2699: unless self.link.blank? 2700: xml_builder.link(link) 2701: end 2702: unless self.description.blank? 2703: xml_builder.description(description) 2704: else 2705: xml_builder.description 2706: end 2707: unless self.author.email.blank? 2708: xml_builder.managingEditor(self.author.email) 2709: end 2710: unless self.publisher.email.blank? 2711: xml_builder.webMaster(self.publisher.email) 2712: end 2713: unless self.published.blank? 2714: xml_builder.pubDate(self.published.rfc822) 2715: end 2716: unless self.updated.blank? 2717: xml_builder.lastBuildDate(self.updated.rfc822) 2718: end 2719: unless self.copyright.blank? 2720: xml_builder.copyright(self.copyright) 2721: end 2722: unless self.language.blank? 2723: xml_builder.language(self.language) 2724: end 2725: xml_builder.ttl((time_to_live / 1.minute).to_s) 2726: xml_builder.generator( 2727: self.configurations[:generator_href]) 2728: build_xml_hook(feed_type, feed_version, xml_builder) 2729: unless items.nil? 2730: for item in items 2731: item.build_xml(feed_type, feed_version, xml_builder) 2732: end 2733: end 2734: end 2735: end 2736: elsif feed_type == "atom" && feed_version == 0.3 2737: raise "Atom 0.3 is obsolete." 2738: elsif feed_type == "atom" && feed_version == 1.0 2739: # normal atom format 2740: return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom10'], 2741: "xml:lang" => language) do 2742: unless title.blank? 2743: xml_builder.title(title, 2744: "type" => "html") 2745: end 2746: xml_builder.author do 2747: unless self.author.nil? || self.author.name.nil? 2748: xml_builder.name(self.author.name) 2749: else 2750: xml_builder.name("n/a") 2751: end 2752: unless self.author.nil? || self.author.email.nil? 2753: xml_builder.email(self.author.email) 2754: end 2755: unless self.author.nil? || self.author.url.nil? 2756: xml_builder.uri(self.author.url) 2757: end 2758: end 2759: unless self.href.blank? 2760: xml_builder.link("href" => self.href, 2761: "rel" => "self", 2762: "type" => "application/atom+xml") 2763: end 2764: unless self.link.blank? 2765: xml_builder.link( 2766: "href" => 2767: FeedTools::HtmlHelper.escape_entities(self.link), 2768: "rel" => "alternate") 2769: end 2770: unless self.subtitle.blank? 2771: xml_builder.subtitle(self.subtitle, 2772: "type" => "html") 2773: end 2774: if self.updated != nil 2775: xml_builder.updated(self.updated.iso8601) 2776: elsif self.time != nil 2777: # Not technically correct, but a heck of a lot better 2778: # than the Time.now fall-back. 2779: xml_builder.updated(self.time.iso8601) 2780: else 2781: xml_builder.updated(Time.now.gmtime.iso8601) 2782: end 2783: unless self.rights.blank? 2784: xml_builder.rights(self.rights) 2785: end 2786: xml_builder.generator(self.configurations[:generator_name] + 2787: " - " + self.configurations[:generator_href]) 2788: if self.id != nil 2789: unless FeedTools::UriHelper.is_uri? self.id 2790: if self.link != nil 2791: xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link)) 2792: else 2793: raise "The unique id must be a valid URI." 2794: end 2795: else 2796: xml_builder.id(self.id) 2797: end 2798: elsif self.link != nil 2799: xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link)) 2800: elsif self.url != nil 2801: xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.url)) 2802: else 2803: raise "Cannot build feed, missing feed unique id." 2804: end 2805: build_xml_hook(feed_type, feed_version, xml_builder) 2806: unless items.nil? 2807: for item in items 2808: item.build_xml(feed_type, feed_version, xml_builder) 2809: end 2810: end 2811: end 2812: else 2813: raise "Unsupported feed format/version." 2814: end 2815: end
A hook method that is called during the feed generation process. Overriding this method will enable additional content to be inserted into the feed.
# File lib/feed_tools/feed.rb, line 2545 2545: def build_xml_hook(feed_type, version, xml_builder) 2546: return nil 2547: end
The cache object that handles the feed persistence.
# File lib/feed_tools/feed.rb, line 787 787: def cache_object 788: if !@href.nil? && @href =~ /^file:\/\// 789: return nil 790: end 791: unless FeedTools.feed_cache.nil? 792: if @cache_object.nil? 793: begin 794: if @href != nil 795: begin 796: @cache_object = FeedTools.feed_cache.find_by_href(@href) 797: rescue RuntimeError => error 798: if error.message =~ /sorry, too many clients already/ 799: warn("There are too many connections to the database open.") 800: raise error 801: else 802: raise error 803: end 804: rescue => error 805: warn("The feed cache seems to be having trouble with the " + 806: "find_by_href method. This may cause unexpected results.") 807: raise error 808: end 809: end 810: if @cache_object.nil? 811: @cache_object = FeedTools.feed_cache.new 812: end 813: rescue 814: end 815: end 816: end 817: return @cache_object 818: end
Sets the cache object for this feed.
This can be any object, but it must accept the following messages: href href= title title= link link= feed_data feed_data= feed_data_type feed_data_type= etag etag= last_modified last_modified= save
# File lib/feed_tools/feed.rb, line 838 838: def cache_object=(new_cache_object) 839: @cache_object = new_cache_object 840: end
Returns a list of the feed‘s categories
# File lib/feed_tools/feed.rb, line 1965 1965: def categories 1966: if @categories.nil? 1967: @categories = [] 1968: category_nodes = 1969: FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [ 1970: "category", 1971: "dc:subject" 1972: ]) 1973: unless category_nodes.nil? 1974: for category_node in category_nodes 1975: category = FeedTools::Category.new 1976: category.term = FeedTools::XmlHelper.try_xpaths(category_node, [ 1977: "@term", 1978: "text()" 1979: ], :select_result_value => true) 1980: category.term.strip! unless category.term.blank? 1981: category.label = FeedTools::XmlHelper.try_xpaths( 1982: category_node, ["@label"], 1983: :select_result_value => true) 1984: category.label.strip! unless category.label.blank? 1985: category.scheme = FeedTools::XmlHelper.try_xpaths(category_node, [ 1986: "@scheme", 1987: "@domain" 1988: ], :select_result_value => true) 1989: category.scheme.strip! unless category.scheme.blank? 1990: @categories << category 1991: end 1992: end 1993: end 1994: return @categories 1995: end
Returns the channel node of the feed.
# File lib/feed_tools/feed.rb, line 771 771: def channel_node 772: if @channel_node.nil? && self.root_node != nil 773: @channel_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 774: "channel", 775: "CHANNEL", 776: "feedinfo", 777: "news" 778: ]) 779: if @channel_node == nil 780: @channel_node = self.root_node 781: end 782: end 783: return @channel_node 784: end
Returns the feed‘s cloud
# File lib/feed_tools/feed.rb, line 2223 2223: def cloud 2224: if @cloud.nil? 2225: @cloud = FeedTools::Cloud.new 2226: @cloud.domain = FeedTools::XmlHelper.try_xpaths( 2227: self.channel_node, ["cloud/@domain"], 2228: :select_result_value => true) 2229: @cloud.port = FeedTools::XmlHelper.try_xpaths( 2230: self.channel_node, ["cloud/@port"], 2231: :select_result_value => true) 2232: @cloud.path = FeedTools::XmlHelper.try_xpaths( 2233: self.channel_node, ["cloud/@path"], 2234: :select_result_value => true) 2235: @cloud.register_procedure = 2236: FeedTools::XmlHelper.try_xpaths( 2237: self.channel_node, ["cloud/@registerProcedure"], 2238: :select_result_value => true) 2239: @cloud.protocol = 2240: FeedTools::XmlHelper.try_xpaths( 2241: self.channel_node, ["cloud/@protocol"], 2242: :select_result_value => true) 2243: @cloud.protocol.downcase unless @cloud.protocol.nil? 2244: @cloud.port = @cloud.port.to_s.to_i 2245: @cloud.port = nil if @cloud.port == 0 2246: end 2247: return @cloud 2248: end
Returns the load options for this feed.
# File lib/feed_tools/feed.rb, line 165 165: def configurations 166: if @configurations.blank? 167: @configurations = FeedTools.configurations.dup 168: end 169: return @configurations 170: end
Sets the load options for this feed.
# File lib/feed_tools/feed.rb, line 173 173: def configurations=(new_configurations) 174: @configurations = new_configurations 175: end
Breaks any references that the feed may be keeping around, thus making the job of the garbage collector much, much easier. Call this method prior to feeds going out of scope to prevent memory leaks.
# File lib/feed_tools/feed.rb, line 62 62: def dispose() 63: self.entries.each do |entry| 64: entry.instance_variable_set("@root_node", nil) 65: entry.instance_variable_set("@feed", nil) 66: entry.instance_variable_set("@parent_feed", nil) 67: entry.dispose if entry.respond_to?(:dispose) 68: end 69: self.entries = [] 70: 71: @cache_object = nil 72: @http_headers = nil 73: @xml_document = nil 74: @feed_data = nil 75: @feed_data_type = nil 76: @root_node = nil 77: @channel_node = nil 78: @href = nil 79: @id = nil 80: @title = nil 81: @subtitle = nil 82: @link = nil 83: @last_retrieved = nil 84: @time_to_live = nil 85: @entries = nil 86: @live = false 87: @encoding = nil 88: @options = nil 89: 90: GC.start() 91: self 92: end
Returns the feed docs
# File lib/feed_tools/feed.rb, line 2308 2308: def docs 2309: if @docs.nil? 2310: @docs = FeedTools::XmlHelper.try_xpaths( 2311: self.channel_node, ["docs/text()"], 2312: :select_result_value => true) 2313: begin 2314: if !(@docs =~ /^file:/) && 2315: !FeedTools::UriHelper.is_uri?(@docs) 2316: channel_base_uri = nil 2317: unless self.channel_node.nil? 2318: channel_base_uri = self.channel_node.base_uri 2319: end 2320: @docs = FeedTools::UriHelper.resolve_relative_uri( 2321: @docs, [channel_base_uri, self.base_uri]) 2322: end 2323: rescue 2324: end 2325: if self.configurations[:url_normalization_enabled] 2326: @docs = FeedTools::UriHelper.normalize_url(@docs) 2327: end 2328: end 2329: return @docs 2330: end
Returns the encoding that the feed was parsed with
# File lib/feed_tools/feed.rb, line 547 547: def encoding 548: if @encoding.blank? 549: if !self.http_headers.blank? 550: if self.http_headers['content-type'] =~ /charset=([\w\d-]+)/ 551: @encoding = $1.downcase 552: else 553: @encoding = self.encoding_from_feed_data 554: end 555: else 556: @encoding = self.encoding_from_feed_data 557: end 558: end 559: return @encoding 560: end
Returns the encoding of feed calculated only from the xml data. I.e., the encoding we would come up with if we ignore RFC 3023.
# File lib/feed_tools/feed.rb, line 564 564: def encoding_from_feed_data 565: if @encoding_from_feed_data.blank? 566: raw_data = self.feed_data 567: return nil if raw_data.nil? 568: encoding_from_xml_instruct = 569: raw_data.scan( 570: /^<\?xml [^>]*encoding="([^\"]*)"[^>]*\?>/ 571: ).flatten.first 572: unless encoding_from_xml_instruct.blank? 573: encoding_from_xml_instruct.downcase! 574: end 575: if encoding_from_xml_instruct.blank? 576: doc = REXML::Document.new(raw_data) 577: encoding_from_xml_instruct = doc.encoding.downcase 578: if encoding_from_xml_instruct == "utf-8" 579: # REXML has a tendency to report utf-8 overzealously, take with 580: # grain of salt 581: encoding_from_xml_instruct = nil 582: end 583: else 584: @encoding_from_feed_data = encoding_from_xml_instruct 585: end 586: if encoding_from_xml_instruct.blank? 587: sniff_table = { 588: "Lo\247\224" => "ebcdic-cp-us", 589: "<?xm" => "utf-8" 590: } 591: sniff = self.feed_data[0..3] 592: if sniff_table[sniff] != nil 593: @encoding_from_feed_data = sniff_table[sniff].downcase 594: end 595: else 596: @encoding_from_feed_data = encoding_from_xml_instruct 597: end 598: if @encoding_from_feed_data.blank? 599: # Safest assumption 600: @encoding_from_feed_data = "utf-8" 601: end 602: end 603: return @encoding_from_feed_data 604: end
Returns the feed entries
# File lib/feed_tools/feed.rb, line 2394 2394: def entries 2395: if @entries.nil? 2396: raw_entries = FeedTools::XmlHelper.select_not_blank([ 2397: FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [ 2398: "atom10:entry", 2399: "atom03:entry", 2400: "atom:entry", 2401: "entry" 2402: ]), 2403: FeedTools::XmlHelper.try_xpaths_all(self.root_node, [ 2404: "rss10:item", 2405: "rss11:items/rss11:item", 2406: "rss11:items/item", 2407: "items/rss11:item", 2408: "items/item", 2409: "item", 2410: "atom10:entry", 2411: "atom03:entry", 2412: "atom:entry", 2413: "entry", 2414: "story" 2415: ]), 2416: FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [ 2417: "rss10:item", 2418: "rss11:items/rss11:item", 2419: "rss11:items/item", 2420: "items/rss11:item", 2421: "items/item", 2422: "item", 2423: "story" 2424: ]) 2425: ]) 2426: 2427: # create the individual feed items 2428: @entries = [] 2429: unless raw_entries.blank? 2430: for entry_node in raw_entries.reverse 2431: new_entry = FeedItem.new 2432: new_entry.feed_data = entry_node.to_s 2433: new_entry.feed_data_type = self.feed_data_type 2434: new_entry.root_node = entry_node 2435: if new_entry.root_node.namespace.blank? 2436: new_entry.root_node.add_namespace(self.root_node.namespace) 2437: end 2438: @entries << new_entry 2439: end 2440: end 2441: end 2442: 2443: # Sort the items 2444: if self.configurations[:entry_sorting_property] == "time" 2445: @entries = @entries.sort do |a, b| 2446: (b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970)) 2447: end 2448: elsif self.configurations[:entry_sorting_property] != nil 2449: sorting_property = self.configurations[:entry_sorting_property] 2450: @entries = @entries.sort do |a, b| 2451: eval("a.#{sorting_property}") <=> eval("b.#{sorting_property}") 2452: end 2453: else 2454: return @entries.reverse 2455: end 2456: return @entries 2457: end
Sets the entries array to a new array.
# File lib/feed_tools/feed.rb, line 2460 2460: def entries=(new_entries) 2461: for entry in new_entries 2462: unless entry.kind_of? FeedTools::FeedItem 2463: raise ArgumentError, 2464: "You should only add FeedItem objects to the entries array." 2465: end 2466: end 2467: @entries = new_entries 2468: end
Forces this feed to expire.
# File lib/feed_tools/feed.rb, line 2537 2537: def expire! 2538: self.last_retrieved = Time.mktime(1970).gmtime 2539: self.save 2540: end
True if the feed has expired and must be reacquired from the remote server.
# File lib/feed_tools/feed.rb, line 2526 2526: def expired? 2527: if (self.last_retrieved == nil) 2528: return true 2529: elsif (self.time_to_live < 30.minutes) 2530: return (self.last_retrieved + 30.minutes) < Time.now.gmtime 2531: else 2532: return (self.last_retrieved + self.time_to_live) < Time.now.gmtime 2533: end 2534: end
Sets whether or not the feed contains explicit material
# File lib/feed_tools/feed.rb, line 2389 2389: def explicit=(new_explicit) 2390: @explicit = (new_explicit ? true : false) 2391: end
Returns true if this feed contains explicit material.
# File lib/feed_tools/feed.rb, line 2373 2373: def explicit? 2374: if @explicit.nil? 2375: explicit_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 2376: "media:adult/text()", 2377: "itunes:explicit/text()" 2378: ], :select_result_value => true) 2379: if explicit_string == "true" || explicit_string == "yes" 2380: @explicit = true 2381: else 2382: @explicit = false 2383: end 2384: end 2385: return @explicit 2386: end
Returns the favicon url for this feed. This method first tries to use the url from the link field instead of the feed url, in order to avoid grabbing the favicon for services like feedburner.
# File lib/feed_tools/feed.rb, line 1575 1575: def favicon 1576: if @favicon.nil? 1577: if !self.link.blank? 1578: begin 1579: link_uri = URI.parse( 1580: FeedTools::UriHelper.normalize_url(self.link)) 1581: if link_uri.scheme == "http" 1582: @favicon = 1583: "http://" + link_uri.host + "/favicon.ico" 1584: end 1585: rescue 1586: @favicon = nil 1587: end 1588: if @favicon.nil? && !self.href.blank? 1589: begin 1590: feed_uri = URI.parse( 1591: FeedTools::UriHelper.normalize_url(self.href)) 1592: if feed_uri.scheme == "http" 1593: @favicon = 1594: "http://" + feed_uri.host + "/favicon.ico" 1595: end 1596: rescue 1597: @favicon = nil 1598: end 1599: end 1600: else 1601: @favicon = nil 1602: end 1603: end 1604: return @favicon 1605: end
Returns the feed‘s raw data.
# File lib/feed_tools/feed.rb, line 607 607: def feed_data 608: if @feed_data.nil? 609: unless self.cache_object.nil? 610: @feed_data = self.cache_object.feed_data 611: end 612: end 613: return @feed_data 614: end
Sets the feed‘s data.
# File lib/feed_tools/feed.rb, line 617 617: def feed_data=(new_feed_data) 618: for var in self.instance_variables 619: self.instance_variable_set(var, nil) 620: end 621: @http_headers = {} 622: @feed_data = new_feed_data 623: unless self.cache_object.nil? 624: self.cache_object.feed_data = new_feed_data 625: end 626: ugly_redirect = FeedTools::XmlHelper.try_xpaths(self.xml_document, [ 627: "redirect/newLocation/text()" 628: ], :select_result_value => true) 629: if !ugly_redirect.blank? 630: for var in self.instance_variables 631: self.instance_variable_set(var, nil) 632: end 633: @http_headers = {} 634: @feed_data = nil 635: self.href = ugly_redirect 636: if FeedTools.feed_cache.nil? 637: self.cache_object = nil 638: else 639: begin 640: self.cache_object = 641: FeedTools.feed_cache.find_by_href(ugly_redirect) 642: rescue RuntimeError => error 643: if error.message =~ /sorry, too many clients already/ 644: warn("There are too many connections to the database open.") 645: end 646: raise error 647: end 648: end 649: self.update! 650: end 651: 652: # Get these things parsed in the correct order to avoid the retardedly 653: # painful corecursion issues. 654: self.href 655: @links = nil 656: @link = nil 657: self.links 658: self.link 659: end
Returns the data type of the feed Possible values:
# File lib/feed_tools/feed.rb, line 689 689: def feed_data_type 690: if @feed_data_type.nil? 691: # Right now, nothing else is supported 692: @feed_data_type = :xml 693: end 694: return @feed_data_type 695: end
Sets the feed‘s data type.
# File lib/feed_tools/feed.rb, line 698 698: def feed_data_type=(new_feed_data_type) 699: @feed_data_type = new_feed_data_type 700: unless self.cache_object.nil? 701: self.cache_object.feed_data_type = new_feed_data_type 702: end 703: if self.feed_data_type != :xml 704: @xml_document = nil 705: end 706: end
Returns the feed‘s raw data as utf-8.
# File lib/feed_tools/feed.rb, line 662 662: def feed_data_utf_8(force_encoding=nil) 663: if @feed_data_utf_8.nil? 664: raw_data = self.feed_data 665: if force_encoding.nil? 666: use_encoding = self.encoding 667: else 668: use_encoding = force_encoding 669: end 670: if use_encoding != "utf-8" && use_encoding != nil && raw_data != nil 671: begin 672: @feed_data_utf_8 = 673: Iconv.new('utf-8', use_encoding).iconv(raw_data) 674: rescue Exception => error 675: return raw_data 676: end 677: else 678: return self.feed_data 679: end 680: end 681: return @feed_data_utf_8 682: end
Returns the type of feed Possible values: "rss", "atom", "cdf", "!okay/news"
# File lib/feed_tools/feed.rb, line 845 845: def feed_type 846: if @feed_type.nil? 847: if self.root_node.nil? 848: return nil 849: end 850: case self.root_node.name.downcase 851: when "feed" 852: @feed_type = "atom" 853: when "rdf:rdf" 854: @feed_type = "rss" 855: when "rdf" 856: @feed_type = "rss" 857: when "rss" 858: @feed_type = "rss" 859: when "channel" 860: if self.root_node.namespace == FEED_TOOLS_NAMESPACES['rss11'] 861: @feed_type = "rss" 862: else 863: @feed_type = "cdf" 864: end 865: end 866: end 867: return @feed_type 868: end
Sets the default feed type
# File lib/feed_tools/feed.rb, line 871 871: def feed_type=(new_feed_type) 872: @feed_type = new_feed_type 873: end
Returns the version number of the feed type. Intentionally does not differentiate between the Netscape and Userland versions of RSS 0.91.
# File lib/feed_tools/feed.rb, line 878 878: def feed_version 879: if @feed_version.nil? 880: if self.root_node.nil? 881: return nil 882: end 883: version = nil 884: begin 885: version_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 886: "@version" 887: ], :select_result_value => true) 888: unless version_string.nil? 889: version = version_string.to_f 890: end 891: rescue 892: end 893: version = nil if version == 0.0 894: default_namespace = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 895: "@xmlns" 896: ], :select_result_value => true) 897: case self.feed_type 898: when "atom" 899: if default_namespace == FEED_TOOLS_NAMESPACES['atom10'] 900: @feed_version = 1.0 901: elsif version != nil 902: @feed_version = version 903: elsif default_namespace == FEED_TOOLS_NAMESPACES['atom03'] 904: @feed_version = 0.3 905: end 906: when "rss" 907: if default_namespace == FEED_TOOLS_NAMESPACES['rss09'] 908: @feed_version = 0.9 909: elsif default_namespace == FEED_TOOLS_NAMESPACES['rss10'] 910: @feed_version = 1.0 911: elsif default_namespace == FEED_TOOLS_NAMESPACES['rss11'] 912: @feed_version = 1.1 913: elsif version != nil 914: case version 915: when 2.1 916: @feed_version = 2.0 917: when 2.01 918: @feed_version = 2.0 919: else 920: @feed_version = version 921: end 922: end 923: when "cdf" 924: @feed_version = 0.4 925: when "!okay/news" 926: @feed_version = 1.0 927: end 928: end 929: return @feed_version 930: end
Sets the default feed version
# File lib/feed_tools/feed.rb, line 933 933: def feed_version=(new_feed_version) 934: @feed_version = new_feed_version 935: end
Returns all nodes within the channel_node that match the xpath query.
# File lib/feed_tools/feed.rb, line 742 742: def find_all_nodes(xpath, select_result_value=false) 743: if self.feed_data_type != :xml 744: raise "The feed data type is not xml." 745: end 746: return FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [xpath], 747: :select_result_value => select_result_value) 748: end
Returns the first node within the channel_node that matches the xpath query.
# File lib/feed_tools/feed.rb, line 733 733: def find_node(xpath, select_result_value=false) 734: if self.feed_data_type != :xml 735: raise "The feed data type is not xml." 736: end 737: return FeedTools::XmlHelper.try_xpaths(self.channel_node, [xpath], 738: :select_result_value => select_result_value) 739: end
Does a full parse of the feed.
# File lib/feed_tools/feed.rb, line 435 435: def full_parse 436: self.href 437: 438: self.cache_object 439: 440: self.http_headers 441: self.encoding 442: self.feed_data_utf_8 443: self.xml_document 444: self.root_node 445: self.channel_node 446: 447: self.base_uri 448: self.feed_type 449: self.feed_version 450: 451: self.entries 452: 453: self.id 454: self.title 455: self.subtitle 456: self.links 457: self.link 458: self.icon 459: self.favicon 460: self.author 461: self.publisher 462: self.time 463: self.updated 464: self.published 465: self.categories 466: self.images 467: self.rights 468: self.time_to_live 469: self.generator 470: self.language 471: 472: self.docs 473: self.text_input 474: self.cloud 475: 476: self.itunes_summary 477: self.itunes_subtitle 478: self.itunes_author 479: 480: self.media_text 481: 482: self.explicit? 483: 484: self.entries.each do |entry| 485: entry.full_parse() 486: end 487: 488: nil 489: end
Returns the feed generator
# File lib/feed_tools/feed.rb, line 2284 2284: def generator 2285: if @generator.nil? 2286: @generator = FeedTools::XmlHelper.try_xpaths( 2287: self.channel_node, ["generator/text()"], 2288: :select_result_value => true) 2289: unless @generator.nil? 2290: @generator = 2291: FeedTools::HtmlHelper.convert_html_to_plain_text(@generator) 2292: end 2293: end 2294: return @generator 2295: end
Sets the feed generator
Note: Setting this variable will NOT cause this to appear in any generated output. The generator string is created from the :generator_name and :generator_href configuration variables.
# File lib/feed_tools/feed.rb, line 2303 2303: def generator=(new_generator) 2304: @generator = new_generator 2305: end
Returns the feed url.
# File lib/feed_tools/feed.rb, line 966 966: def href 967: if @href_overridden != true || @href.nil? 968: original_href = @href 969: 970: override_href = lambda do |current_href| 971: begin 972: if current_href.nil? && self.feed_data != nil 973: # The current url is nil and we have feed data to go on 974: true 975: elsif current_href != nil && !(["http", "https"].include?( 976: URI.parse(current_href.to_s).scheme)) 977: if self.feed_data != nil 978: # The current url is set, but isn't a http/https url and 979: # we have feed data to use to replace the current url with 980: true 981: else 982: # The current url is set, but isn't a http/https url but 983: # we don't have feed data to use to replace the current url 984: # with so we'll have to wait until we do 985: false 986: end 987: else 988: # The current url is set to an http/https url and there's 989: # no compelling reason to override it 990: false 991: end 992: rescue 993: # Something went wrong, so we should err on the side of caution 994: # and attempt to override the url 995: true 996: end 997: end 998: if override_href.call(@href) && self.feed_data != nil 999: begin 1000: links = FeedTools::GenericHelper.recursion_trap(:feed_href) do 1001: self.links 1002: end 1003: link = FeedTools::GenericHelper.recursion_trap(:feed_href) do 1004: self.link 1005: end 1006: if links != nil 1007: for link_object in links 1008: if link_object.rel == 'self' 1009: if link_object.href != link || 1010: (link_object.href =~ /xml/ || 1011: link_object.href =~ /atom/ || 1012: link_object.href =~ /feed/) 1013: @href = link_object.href 1014: @href_overridden = true 1015: @links = nil 1016: @link = nil 1017: return @href 1018: end 1019: end 1020: end 1021: end 1022: rescue Exception 1023: end 1024: @links = nil 1025: @link = nil 1026: 1027: # rdf:about is ordered last because a lot of people put the url to 1028: # the feed inside it instead of a link to their blog. 1029: # Ordering it last gives them as many chances as humanly possible 1030: # for them to redeem themselves. If the link turns out to be the 1031: # same as the blog link, it will be reset to the original value. 1032: @href = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1033: "admin:feed/@rdf:resource", 1034: "admin:feed/@resource", 1035: "feed/@rdf:resource", 1036: "feed/@resource", 1037: "@rdf:about", 1038: "@about", 1039: "newLocation/text()", 1040: "atom10:link[@rel='self']/@href" 1041: ], :select_result_value => true) do |result| 1042: override_href.call(FeedTools::UriHelper.normalize_url(result)) 1043: end 1044: begin 1045: if !(@href =~ /^file:/) && 1046: !FeedTools::UriHelper.is_uri?(@href) 1047: @href = FeedTools::UriHelper.resolve_relative_uri( 1048: @href, [self.base_uri]) 1049: end 1050: rescue 1051: end 1052: if self.configurations[:url_normalization_enabled] 1053: @href = FeedTools::UriHelper.normalize_url(@href) 1054: end 1055: @href.strip! unless @href.nil? 1056: @href = nil if @href.blank? 1057: @href_overridden = true 1058: if @href == nil 1059: @href = original_href 1060: @href_overridden = false 1061: end 1062: if @href == self.link 1063: @href = original_href 1064: @href_overridden = false 1065: end 1066: if @href_overridden == true 1067: @links = nil 1068: @link = nil 1069: end 1070: end 1071: end 1072: return @href 1073: end
Sets the feed url and prepares the cache_object if necessary.
# File lib/feed_tools/feed.rb, line 1076 1076: def href=(new_href) 1077: @href = FeedTools::UriHelper.normalize_url(new_href) 1078: self.cache_object.href = new_href unless self.cache_object.nil? 1079: end
Returns a hash of the http headers from the response.
# File lib/feed_tools/feed.rb, line 534 534: def http_headers 535: if @http_headers.blank? 536: if !self.cache_object.nil? && !self.cache_object.http_headers.nil? 537: @http_headers = YAML.load(self.cache_object.http_headers) 538: @http_headers = {} unless @http_headers.kind_of? Hash 539: else 540: @http_headers = {} 541: end 542: end 543: return @http_headers 544: end
Returns the relevant information from an http request.
# File lib/feed_tools/feed.rb, line 529 529: def http_response 530: return @http_response 531: end
Returns the url to the icon file for this feed.
# File lib/feed_tools/feed.rb, line 1534 1534: def icon 1535: if @icon.nil? 1536: icon_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1537: "link[@rel='icon']", 1538: "link[@rel='shortcut icon']", 1539: "link[@type='image/x-icon']", 1540: "icon", 1541: "logo[@style='icon']", 1542: "LOGO[@STYLE='ICON']" 1543: ]) 1544: unless icon_node.nil? 1545: @icon = FeedTools::XmlHelper.try_xpaths(icon_node, [ 1546: "@atom10:href", 1547: "@atom03:href", 1548: "@atom:href", 1549: "@href", 1550: "text()" 1551: ], :select_result_value => true) 1552: begin 1553: if !(@icon =~ /^file:/) && 1554: !FeedTools::UriHelper.is_uri?(@icon) 1555: channel_base_uri = nil 1556: unless self.channel_node.nil? 1557: channel_base_uri = self.channel_node.base_uri 1558: end 1559: @icon = FeedTools::UriHelper.resolve_relative_uri( 1560: @icon, [channel_base_uri, self.base_uri]) 1561: end 1562: rescue 1563: end 1564: @icon = nil unless FeedTools::UriHelper.is_uri?(@icon) 1565: @icon = nil if @icon.blank? 1566: end 1567: end 1568: return @icon 1569: end
Returns the feed‘s unique id
# File lib/feed_tools/feed.rb, line 938 938: def id 939: if @id.nil? 940: @id = FeedTools::XmlHelper.select_not_blank([ 941: FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 942: "atom10:id/text()", 943: "atom03:id/text()", 944: "atom:id/text()", 945: "id/text()", 946: "guid/text()" 947: ], :select_result_value => true), 948: FeedTools::XmlHelper.try_xpaths(self.root_node, [ 949: "atom10:id/text()", 950: "atom03:id/text()", 951: "atom:id/text()", 952: "id/text()", 953: "guid/text()" 954: ], :select_result_value => true) 955: ]) 956: end 957: return @id 958: end
Returns a list of the feed‘s images
# File lib/feed_tools/feed.rb, line 1998 1998: def images 1999: if @images.nil? 2000: @images = [] 2001: image_nodes = FeedTools::XmlHelper.combine_xpaths_all( 2002: self.channel_node, [ 2003: "image", 2004: "logo", 2005: "apple-wallpapers:image", 2006: "imageUrl" 2007: ] 2008: ) 2009: unless image_nodes.blank? 2010: for image_node in image_nodes 2011: image = FeedTools::Image.new 2012: image.href = FeedTools::XmlHelper.try_xpaths(image_node, [ 2013: "url/text()", 2014: "@rdf:resource", 2015: "@href", 2016: "text()" 2017: ], :select_result_value => true) 2018: if image.href.nil? && image_node.base_uri != nil 2019: image.href = "" 2020: end 2021: begin 2022: if !(image.href =~ /^file:/) && 2023: !FeedTools::UriHelper.is_uri?(image.href) 2024: image.href = FeedTools::UriHelper.resolve_relative_uri( 2025: image.href, [image_node.base_uri, self.base_uri]) 2026: end 2027: rescue 2028: end 2029: if self.configurations[:url_normalization_enabled] 2030: image.href = FeedTools::UriHelper.normalize_url(image.href) 2031: end 2032: image.href.strip! unless image.href.nil? 2033: next if image.href.blank? 2034: image.title = FeedTools::XmlHelper.try_xpaths(image_node, 2035: ["title/text()"], :select_result_value => true) 2036: image.title.strip! unless image.title.nil? 2037: image.description = FeedTools::XmlHelper.try_xpaths(image_node, 2038: ["description/text()"], :select_result_value => true) 2039: image.description.strip! unless image.description.nil? 2040: image.link = FeedTools::XmlHelper.try_xpaths(image_node, 2041: ["link/text()"], :select_result_value => true) 2042: image.link.strip! unless image.link.nil? 2043: image.height = FeedTools::XmlHelper.try_xpaths(image_node, 2044: ["height/text()"], :select_result_value => true).to_i 2045: image.height = nil if image.height <= 0 2046: image.width = FeedTools::XmlHelper.try_xpaths(image_node, 2047: ["width/text()"], :select_result_value => true).to_i 2048: image.width = nil if image.width <= 0 2049: image.style = FeedTools::XmlHelper.try_xpaths(image_node, [ 2050: "style/text()", 2051: "@style" 2052: ], :select_result_value => true) 2053: image.style.strip! unless image.style.nil? 2054: image.style.downcase! unless image.style.nil? 2055: @images << image unless image.href.nil? 2056: end 2057: end 2058: for link_object in self.links 2059: if link_object.type != nil && link_object.type =~ /^image/ 2060: image = FeedTools::Image.new 2061: image.href = link_object.href 2062: image.title = link_object.title 2063: @images << image unless image.href.nil? 2064: end 2065: end 2066: end 2067: return @images 2068: end
Returns a simple representation of the feed object‘s state.
# File lib/feed_tools/feed.rb, line 2896 2896: def inspect 2897: return "#<FeedTools::Feed:0x#{self.object_id.to_s(16)} URL:#{self.href}>" 2898: end
Returns the contents of the itunes:author element
Returns any incorrectly placed channel-level itunes:author elements. They‘re actually amazingly common. People don‘t read specs. There is no setter for this, since this is an incorrectly placed attribute.
# File lib/feed_tools/feed.rb, line 1846 1846: def itunes_author 1847: if @itunes_author.nil? 1848: @itunes_author = FeedTools::HtmlHelper.unescape_entities( 1849: FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1850: "itunes:author/text()" 1851: ], :select_result_value => true) 1852: ) 1853: @itunes_author = nil if @itunes_author.blank? 1854: end 1855: return @itunes_author 1856: end
Returns the contents of the itunes:subtitle element
# File lib/feed_tools/feed.rb, line 1188 1188: def itunes_subtitle 1189: if @itunes_subtitle.nil? 1190: @itunes_subtitle = FeedTools::XmlHelper.select_not_blank([ 1191: FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1192: "itunes:subtitle/text()" 1193: ], :select_result_value => true), 1194: FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1195: "itunes:subtitle/text()" 1196: ], :select_result_value => true) 1197: ]) 1198: unless @itunes_subtitle.blank? 1199: @itunes_subtitle = 1200: FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle) 1201: @itunes_subtitle = 1202: FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle) 1203: @itunes_subtitle.strip! 1204: else 1205: @itunes_subtitle = nil 1206: end 1207: end 1208: return @itunes_subtitle 1209: end
Returns the contents of the itunes:summary element
# File lib/feed_tools/feed.rb, line 1159 1159: def itunes_summary 1160: if @itunes_summary.nil? 1161: @itunes_summary = FeedTools::XmlHelper.select_not_blank([ 1162: FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1163: "itunes:summary/text()" 1164: ], :select_result_value => true), 1165: FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1166: "itunes:summary/text()" 1167: ], :select_result_value => true) 1168: ]) 1169: unless @itunes_summary.blank? 1170: @itunes_summary = 1171: FeedTools::HtmlHelper.unescape_entities(@itunes_summary) 1172: @itunes_summary = 1173: FeedTools::HtmlHelper.sanitize_html(@itunes_summary) 1174: @itunes_summary.strip! 1175: else 1176: @itunes_summary = nil 1177: end 1178: end 1179: return @itunes_summary 1180: end
Sets the contents of the itunes:summary element
# File lib/feed_tools/feed.rb, line 1183 1183: def itunes_summary=(new_itunes_summary) 1184: @itunes_summary = new_itunes_summary 1185: end
Returns the feed language
# File lib/feed_tools/feed.rb, line 2338 2338: def language 2339: if @language.nil? 2340: @language = FeedTools::XmlHelper.select_not_blank([ 2341: FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 2342: "language/text()", 2343: "dc:language/text()", 2344: "@dc:language", 2345: "@xml:lang", 2346: "xml:lang/text()" 2347: ], :select_result_value => true), 2348: FeedTools::XmlHelper.try_xpaths(self.root_node, [ 2349: "@xml:lang", 2350: "xml:lang/text()" 2351: ], :select_result_value => true) 2352: ]) 2353: if @language.blank? 2354: @language = "en-us" 2355: end 2356: @language.gsub!(/_/, "-") 2357: @language = @language.downcase 2358: if @language.split('-').size > 1 2359: @language = 2360: "#{@language.split('-').first}-" + 2361: "#{@language.split('-').last.upcase}" 2362: end 2363: end 2364: return @language 2365: end
The time that the feed was last requested from the remote server. Nil if it has never been pulled, or if it was created from scratch.
# File lib/feed_tools/feed.rb, line 2482 2482: def last_retrieved 2483: unless self.cache_object.nil? 2484: @last_retrieved = self.cache_object.last_retrieved 2485: end 2486: return @last_retrieved 2487: end
Returns the feed link
# File lib/feed_tools/feed.rb, line 1244 1244: def link 1245: if @link.nil? 1246: max_score = 0 1247: for link_object in self.links.reverse 1248: score = 0 1249: next if link_object.href.nil? 1250: if @href != nil && link_object.href == @href 1251: score = score - 2 1252: end 1253: if link_object.type != nil 1254: if (link_object.type =~ /image/ || link_object.type =~ /video/) 1255: score = score - 2 1256: end 1257: if FeedTools::HtmlHelper.xml_type?(link_object.type) 1258: score = score + 1 1259: end 1260: if FeedTools::HtmlHelper.html_type?(link_object.type) 1261: score = score + 2 1262: elsif link_object.type != nil 1263: score = score - 1 1264: end 1265: end 1266: if link_object.rel == "enclosure" 1267: score = score - 2 1268: end 1269: if link_object.rel == "alternate" 1270: score = score + 1 1271: end 1272: if link_object.rel == "self" 1273: score = score - 1 1274: if (link_object.href =~ /xml/ || 1275: link_object.href =~ /atom/ || 1276: link_object.href =~ /feed/) 1277: score = score - 1 1278: end 1279: end 1280: if score >= max_score 1281: max_score = score 1282: @link = link_object.href 1283: end 1284: end 1285: if @link.blank? 1286: @link = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1287: "@href", 1288: "@rdf:about", 1289: "@about" 1290: ], :select_result_value => true) 1291: end 1292: if @link.blank? 1293: if FeedTools::UriHelper.is_uri?(self.id) && 1294: (self.id =~ /^http/) 1295: @link = self.id 1296: end 1297: end 1298: if !@link.blank? 1299: @link = FeedTools::HtmlHelper.unescape_entities(@link) 1300: end 1301: @link = nil if @link.blank? 1302: begin 1303: if !(@link =~ /^file:/) && 1304: !FeedTools::UriHelper.is_uri?(@link) 1305: channel_base_uri = nil 1306: unless self.channel_node.nil? 1307: channel_base_uri = self.channel_node.base_uri 1308: end 1309: @link = FeedTools::UriHelper.resolve_relative_uri( 1310: @link, [channel_base_uri, self.base_uri]) 1311: end 1312: rescue 1313: end 1314: if self.configurations[:url_normalization_enabled] 1315: @link = FeedTools::UriHelper.normalize_url(@link) 1316: end 1317: unless self.cache_object.nil? 1318: self.cache_object.link = @link 1319: end 1320: end 1321: return @link 1322: end
Returns the links collection
# File lib/feed_tools/feed.rb, line 1333 1333: def links 1334: if @links.blank? 1335: @links = [] 1336: link_nodes = 1337: FeedTools::XmlHelper.combine_xpaths_all(self.channel_node, [ 1338: "atom10:link", 1339: "atom03:link", 1340: "atom:link", 1341: "link", 1342: "channelLink", 1343: "a", 1344: "url", 1345: "href" 1346: ]) 1347: for link_node in link_nodes 1348: link_object = FeedTools::Link.new 1349: link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [ 1350: "@atom10:href", 1351: "@atom03:href", 1352: "@atom:href", 1353: "@href", 1354: "text()" 1355: ], :select_result_value => true) 1356: if link_object.href == "atom10:" || 1357: link_object.href == "atom03:" || 1358: link_object.href == "atom:" 1359: link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [ 1360: "@href" 1361: ], :select_result_value => true) 1362: end 1363: if link_object.href.nil? && link_node.base_uri != nil 1364: link_object.href = "" 1365: end 1366: begin 1367: if !(link_object.href =~ /^file:/) && 1368: !FeedTools::UriHelper.is_uri?(link_object.href) 1369: link_object.href = FeedTools::UriHelper.resolve_relative_uri( 1370: link_object.href, 1371: [link_node.base_uri, self.base_uri]) 1372: end 1373: rescue 1374: end 1375: if self.configurations[:url_normalization_enabled] 1376: link_object.href = 1377: FeedTools::UriHelper.normalize_url(link_object.href) 1378: end 1379: link_object.href.strip! unless link_object.href.nil? 1380: next if link_object.href.blank? 1381: link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [ 1382: "@atom10:hreflang", 1383: "@atom03:hreflang", 1384: "@atom:hreflang", 1385: "@hreflang" 1386: ], :select_result_value => true) 1387: if link_object.hreflang == "atom10:" || 1388: link_object.hreflang == "atom03:" || 1389: link_object.hreflang == "atom:" 1390: link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [ 1391: "@hreflang" 1392: ], :select_result_value => true) 1393: end 1394: unless link_object.hreflang.nil? 1395: link_object.hreflang = link_object.hreflang.downcase 1396: end 1397: link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [ 1398: "@atom10:rel", 1399: "@atom03:rel", 1400: "@atom:rel", 1401: "@rel" 1402: ], :select_result_value => true) 1403: if link_object.rel == "atom10:" || 1404: link_object.rel == "atom03:" || 1405: link_object.rel == "atom:" 1406: link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [ 1407: "@rel" 1408: ], :select_result_value => true) 1409: end 1410: unless link_object.rel.nil? 1411: link_object.rel = link_object.rel.downcase 1412: end 1413: if link_object.rel.nil? && self.feed_type == "atom" 1414: link_object.rel = "alternate" 1415: end 1416: link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [ 1417: "@atom10:type", 1418: "@atom03:type", 1419: "@atom:type", 1420: "@type" 1421: ], :select_result_value => true) 1422: if link_object.type == "atom10:" || 1423: link_object.type == "atom03:" || 1424: link_object.type == "atom:" 1425: link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [ 1426: "@type" 1427: ], :select_result_value => true) 1428: end 1429: unless link_object.type.nil? 1430: link_object.type = link_object.type.downcase 1431: end 1432: link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [ 1433: "@atom10:title", 1434: "@atom03:title", 1435: "@atom:title", 1436: "@title", 1437: "text()" 1438: ], :select_result_value => true) 1439: if link_object.title == "atom10:" || 1440: link_object.title == "atom03:" || 1441: link_object.title == "atom:" 1442: link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [ 1443: "@title" 1444: ], :select_result_value => true) 1445: end 1446: # This catches the ambiguities between atom, rss, and cdf 1447: if link_object.title == link_object.href 1448: link_object.title = nil 1449: end 1450: link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [ 1451: "@atom10:length", 1452: "@atom03:length", 1453: "@atom:length", 1454: "@length" 1455: ], :select_result_value => true) 1456: if link_object.length == "atom10:" || 1457: link_object.length == "atom03:" || 1458: link_object.length == "atom:" 1459: link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [ 1460: "@length" 1461: ], :select_result_value => true) 1462: end 1463: if !link_object.length.nil? 1464: link_object.length = link_object.length.to_i 1465: else 1466: if !link_object.type.nil? && link_object.type[0..4] != "text" && 1467: link_object.type[-3..-1] != "xml" && 1468: link_object.href =~ /^http:\/\// 1469: # Retrieve the length with an http HEAD request 1470: else 1471: link_object.length = nil 1472: end 1473: end 1474: @links = [] if @links.nil? 1475: @links << link_object 1476: end 1477: end 1478: return @links 1479: end
True if the feed was not last retrieved from the cache.
# File lib/feed_tools/feed.rb, line 2520 2520: def live? 2521: return @live 2522: end
Attempts to load the feed from the remote location. Requires the url field to be set. If an etag or the last_modified date has been set, attempts to use them to prevent unnecessary reloading of identical content.
# File lib/feed_tools/feed.rb, line 289 289: def load_remote_feed! 290: @live = true 291: if self.http_headers.nil? && !(self.cache_object.nil?) && 292: !(self.cache_object.http_headers.nil?) 293: @http_headers = YAML.load(self.cache_object.http_headers) 294: end 295: 296: if (self.href =~ /^feed:/) == 0 297: # Woah, Nelly, how'd that happen? You should've already been 298: # corrected. So let's fix that url. And please, 299: # just use less crappy browsers instead of badly defined 300: # pseudo-protocol hacks. 301: self.href = FeedTools::UriHelper.normalize_url(self.href) 302: end 303: 304: # Find out what method we're going to be using to obtain this feed. 305: begin 306: uri = URI.parse(self.href) 307: rescue URI::InvalidURIError 308: raise FeedAccessError, 309: "Cannot retrieve feed using invalid URL: " + self.href.to_s 310: end 311: retrieval_method = "http" 312: case uri.scheme 313: when "http" 314: retrieval_method = "http" 315: when "ftp" 316: retrieval_method = "ftp" 317: when "file" 318: retrieval_method = "file" 319: when nil 320: raise FeedAccessError, 321: "No protocol was specified in the url." 322: else 323: raise FeedAccessError, 324: "Cannot retrieve feed using unrecognized protocol: " + uri.scheme 325: end 326: 327: # No need for http headers unless we're actually doing http 328: if retrieval_method == "http" 329: begin 330: @http_response = (FeedTools::RetrievalHelper.http_get( 331: self.href, :feed_object => self) do |url, response| 332: # Find out if we've already seen the url we've been 333: # redirected to. 334: follow_redirect = true 335: 336: begin 337: cached_feed = FeedTools::Feed.open(url, 338: :disable_update_from_remote => true) 339: if cached_feed.cache_object != nil && 340: cached_feed.cache_object.new_record? != true 341: if !cached_feed.expired? && 342: !cached_feed.http_headers.blank? 343: # Copy the cached state 344: self.href = cached_feed.href 345: 346: @feed_data = cached_feed.feed_data 347: @feed_data_type = cached_feed.feed_data_type 348: 349: if @feed_data.blank? 350: raise "Invalid cache data." 351: end 352: 353: @title = nil; self.title 354: self.href 355: @link = nil; self.link 356: 357: self.last_retrieved = cached_feed.last_retrieved 358: self.http_headers = cached_feed.http_headers 359: self.cache_object = cached_feed.cache_object 360: @live = false 361: follow_redirect = false 362: end 363: end 364: rescue 365: # If anything goes wrong, ignore it. 366: end 367: follow_redirect 368: end) 369: case @http_response 370: when Net::HTTPSuccess 371: @feed_data = self.http_response.body 372: @http_headers = {} 373: self.http_response.each_header do |key, value| 374: self.http_headers[key.downcase] = value 375: end 376: self.last_retrieved = Time.now.gmtime 377: @live = true 378: when Net::HTTPNotModified 379: @http_headers = {} 380: self.http_response.each_header do |key, value| 381: self.http_headers[key.downcase] = value 382: end 383: self.last_retrieved = Time.now.gmtime 384: @live = false 385: else 386: @live = false 387: end 388: rescue Exception => error 389: @live = false 390: if self.feed_data.nil? 391: raise error 392: end 393: end 394: elsif retrieval_method == "https" 395: # Not supported... yet 396: elsif retrieval_method == "ftp" 397: # Not supported... yet 398: # Technically, CDF feeds are supposed to be able to be accessed 399: # directly from an ftp server. This is silly, but we'll humor 400: # Microsoft. 401: # 402: # Eventually. If they're lucky. And someone demands it. 403: elsif retrieval_method == "file" 404: # Now that we've gone to all that trouble to ensure the url begins 405: # with 'file://', strip the 'file://' off the front of the url. 406: file_name = self.href.gsub(/^file:\/\//, "") 407: if RUBY_PLATFORM =~ /mswin/ 408: file_name = file_name[1..-1] if file_name[0..0] == "/" 409: end 410: begin 411: open(file_name) do |file| 412: @http_response = nil 413: @http_headers = {} 414: @feed_data = file.read 415: @feed_data_type = :xml 416: self.last_retrieved = Time.now.gmtime 417: end 418: rescue 419: @live = false 420: # In this case, pulling from the cache is probably not going 421: # to help at all, and the use should probably be immediately 422: # appraised of the problem. Raise the exception. 423: raise 424: end 425: end 426: unless self.cache_object.nil? 427: begin 428: self.save 429: rescue 430: end 431: end 432: end
Returns the contents of the media:text element
# File lib/feed_tools/feed.rb, line 1217 1217: def media_text 1218: if @media_text.nil? 1219: @media_text = FeedTools::XmlHelper.select_not_blank([ 1220: FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1221: "media:text/text()" 1222: ], :select_result_value => true), 1223: FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1224: "media:text/text()" 1225: ], :select_result_value => true) 1226: ]) 1227: unless @media_text.blank? 1228: @media_text = FeedTools::HtmlHelper.unescape_entities(@media_text) 1229: @media_text = FeedTools::HtmlHelper.sanitize_html(@media_text) 1230: @media_text.strip! 1231: else 1232: @media_text = nil 1233: end 1234: end 1235: return @media_text 1236: end
Sets the contents of the media:text element
# File lib/feed_tools/feed.rb, line 1239 1239: def media_text=(new_media_text) 1240: @media_text = new_media_text 1241: end
passes missing methods to the cache_object
# File lib/feed_tools/feed.rb, line 2876 2876: def method_missing(msg, *params) 2877: if self.cache_object.nil? 2878: raise NoMethodError, "Invalid method #{msg.to_s}" 2879: end 2880: return self.cache_object.send(msg, params) 2881: end
True if this feed contains audio content enclosures
# File lib/feed_tools/feed.rb, line 2498 2498: def podcast? 2499: podcast = false 2500: self.items.each do |item| 2501: item.enclosures.each do |enclosure| 2502: podcast = true if enclosure.audio? 2503: end 2504: end 2505: return podcast 2506: end
Returns the feed published time
# File lib/feed_tools/feed.rb, line 1935 1935: def published 1936: if @published.nil? 1937: published_string = 1938: FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1939: "atom10:published/text()", 1940: "atom03:published/text()", 1941: "atom:published/text()", 1942: "published/text()", 1943: "dc:date/text()", 1944: "pubDate/text()", 1945: "atom10:issued/text()", 1946: "atom03:issued/text()", 1947: "atom:issued/text()", 1948: "issued/text()" 1949: ], :select_result_value => true) 1950: unless published_string.blank? 1951: @published = Time.parse(published_string).gmtime rescue nil 1952: else 1953: @published = nil 1954: end 1955: end 1956: return @published 1957: end
Returns the feed publisher
# File lib/feed_tools/feed.rb, line 1759 1759: def publisher 1760: if @publisher.nil? 1761: @publisher = FeedTools::Author.new 1762: @publisher.raw = FeedTools::HtmlHelper.unescape_entities( 1763: FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1764: "webMaster/text()", 1765: "dc:publisher/text()" 1766: ], :select_result_value => true)) 1767: 1768: unless @publisher.raw.blank? 1769: raw_scan = @publisher.raw.scan( 1770: /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i) 1771: if raw_scan.nil? || raw_scan.size == 0 1772: raw_scan = @publisher.raw.scan( 1773: /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i) 1774: unless raw_scan.size == 0 1775: publisher_raw_pair = raw_scan.first.reverse 1776: end 1777: else 1778: publisher_raw_pair = raw_scan.first 1779: end 1780: if raw_scan.nil? || raw_scan.size == 0 1781: email_scan = @publisher.raw.scan( 1782: /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i) 1783: if email_scan != nil && email_scan.size > 0 1784: @publisher.email = email_scan.first.strip 1785: end 1786: end 1787: unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0 1788: @publisher.name = publisher_raw_pair.first.strip 1789: @publisher.email = publisher_raw_pair.last.strip 1790: else 1791: unless @publisher.raw.include?("@") 1792: # We can be reasonably sure we are looking at something 1793: # that the creator didn't intend to contain an email address if 1794: # it got through the preceeding regexes and it doesn't 1795: # contain the tell-tale '@' symbol. 1796: @publisher.name = @publisher.raw 1797: end 1798: end 1799: end 1800: 1801: @publisher.name = nil if @publisher.name.blank? 1802: @publisher.raw = nil if @publisher.raw.blank? 1803: @publisher.email = nil if @publisher.email.blank? 1804: @publisher.url = nil if @publisher.url.blank? 1805: if @publisher.url != nil 1806: begin 1807: if !(@publisher.url =~ /^file:/) && 1808: !FeedTools::UriHelper.is_uri?(@publisher.url) 1809: channel_base_uri = nil 1810: unless self.channel_node.nil? 1811: channel_base_uri = self.channel_node.base_uri 1812: end 1813: @publisher.url = FeedTools::UriHelper.resolve_relative_uri( 1814: @publisher.url, [channel_base_uri, self.base_uri]) 1815: end 1816: rescue 1817: end 1818: end 1819: end 1820: return @publisher 1821: end
Sets the feed publisher
# File lib/feed_tools/feed.rb, line 1824 1824: def publisher=(new_publisher) 1825: if new_publisher.respond_to?(:name) && 1826: new_publisher.respond_to?(:email) && 1827: new_publisher.respond_to?(:url) 1828: # It's a complete Author object, just set it. 1829: @publisher = new_publisher 1830: else 1831: # We're not looking at an Author object, this is probably a string, 1832: # default to setting the publisher's name. 1833: if @publisher.nil? 1834: @publisher = FeedTools::Author.new 1835: end 1836: @publisher.name = new_publisher 1837: end 1838: end
Returns the feed‘s copyright information
# File lib/feed_tools/feed.rb, line 2071 2071: def rights 2072: if @rights.nil? 2073: repair_entities = false 2074: rights_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 2075: "atom10:copyright", 2076: "atom03:copyright", 2077: "atom:copyright", 2078: "copyright", 2079: "copyrights", 2080: "dc:rights", 2081: "rights" 2082: ]) 2083: @rights = FeedTools::HtmlHelper.process_text_construct(rights_node, 2084: self.feed_type, self.feed_version, [self.base_uri]) 2085: if self.feed_type == "atom" || 2086: self.configurations[:always_strip_wrapper_elements] 2087: @rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights) 2088: end 2089: end 2090: return @rights 2091: end
Returns the root node of the feed.
# File lib/feed_tools/feed.rb, line 751 751: def root_node 752: if @root_node.nil? 753: # TODO: Fix this so that added content at the end of the file doesn't 754: # break this stuff. 755: # E.g.: http://smogzer.tripod.com/smog.rdf 756: # =================================================================== 757: begin 758: if self.xml_document.nil? 759: return nil 760: else 761: @root_node = self.xml_document.root 762: end 763: rescue Exception 764: return nil 765: end 766: end 767: return @root_node 768: end
Persists the current feed state to the cache.
# File lib/feed_tools/feed.rb, line 2818 2818: def save 2819: if self.configurations[:feed_cache].nil? 2820: # The cache is disabled for this feed, do nothing. 2821: return 2822: end 2823: if self.feed_data.blank? && self.http_headers.blank? 2824: # There's no data, nothing to save. 2825: return 2826: end 2827: if self.http_headers['content-type'] =~ /text\/html/ || 2828: self.http_headers['content-type'] =~ /application\/xhtml\+xml/ 2829: if self.title.nil? && self.link.nil? && self.entries.blank? 2830: # Don't save html pages to the cache, it messes with 2831: # autodiscovery. 2832: return 2833: end 2834: end 2835: unless self.href =~ /^file:\/\// 2836: if FeedTools.feed_cache.nil? 2837: raise "Caching is currently disabled. Cannot save to cache." 2838: elsif self.href.nil? 2839: raise "The url field must be set to save to the cache." 2840: elsif self.cache_object.nil? 2841: raise "The cache_object is currently nil. Cannot save to cache." 2842: else 2843: self.cache_object.href = self.href 2844: unless self.feed_data.nil? 2845: self.cache_object.title = self.title 2846: self.cache_object.link = self.link 2847: self.cache_object.feed_data = self.feed_data 2848: self.cache_object.feed_data_type = self.feed_data_type.to_s 2849: end 2850: self.cache_object.http_headers = self.http_headers.to_yaml 2851: self.cache_object.last_retrieved = self.last_retrieved 2852: Thread.pass 2853: self.cache_object.save 2854: end 2855: end 2856: end
Returns a duplicate object suitable for serialization
# File lib/feed_tools/feed.rb, line 508 508: def serializable 509: self.full_parse() 510: entries_to_dump = self.entries 511: # This prevents errors due to temporarily having feed items with 512: # multiple parent feeds. 513: self.entries = [] 514: feed_to_dump = self.dup 515: feed_to_dump.instance_variable_set("@xml_document", nil) 516: feed_to_dump.instance_variable_set("@root_node", nil) 517: feed_to_dump.instance_variable_set("@channel_node", nil) 518: feed_to_dump.entries = entries_to_dump.collect do |entry| 519: entry.serializable 520: end 521: self.entries = entries_to_dump 522: feed_to_dump.entries.each do |entry| 523: entry.instance_variable_set("@root_node", nil) 524: end 525: return feed_to_dump 526: end
Does a full parse, then serializes the feed object directly to the cache.
# File lib/feed_tools/feed.rb, line 493 493: def serialize_to_cache 494: @cache_object = nil 495: require 'yaml' 496: serialized_feed = YAML.dump(self.serializable) 497: if self.cache_object != nil 498: begin 499: self.cache_object.serialized = serialized_feed 500: self.cache_object.save 501: rescue Exception 502: end 503: end 504: return nil 505: end
Returns the feed subtitle
# File lib/feed_tools/feed.rb, line 1113 1113: def subtitle 1114: if @subtitle.nil? 1115: subtitle_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1116: "atom10:subtitle", 1117: "subtitle", 1118: "atom03:tagline", 1119: "tagline", 1120: "description", 1121: "summary", 1122: "abstract", 1123: "ABSTRACT", 1124: "content:encoded", 1125: "encoded", 1126: "content", 1127: "xhtml:body", 1128: "body", 1129: "xhtml:div", 1130: "div", 1131: "p:payload", 1132: "payload", 1133: "channelDescription", 1134: "blurb", 1135: "info" 1136: ]) 1137: @subtitle = FeedTools::HtmlHelper.process_text_construct( 1138: subtitle_node, self.feed_type, self.feed_version, [self.base_uri]) 1139: if self.feed_type == "atom" || 1140: self.configurations[:always_strip_wrapper_elements] 1141: @subtitle = FeedTools::HtmlHelper.strip_wrapper_element(@subtitle) 1142: end 1143: if @subtitle.blank? 1144: @subtitle = self.itunes_summary 1145: end 1146: if @subtitle.blank? 1147: @subtitle = self.itunes_subtitle 1148: end 1149: end 1150: return @subtitle 1151: end
Returns the feed‘s text input field
# File lib/feed_tools/feed.rb, line 2256 2256: def text_input 2257: if @text_input.nil? 2258: @text_input = FeedTools::TextInput.new 2259: text_input_node = 2260: FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"]) 2261: unless text_input_node.nil? 2262: @text_input.title = 2263: FeedTools::XmlHelper.try_xpaths(text_input_node, 2264: ["title/text()"], 2265: :select_result_value => true) 2266: @text_input.description = 2267: FeedTools::XmlHelper.try_xpaths(text_input_node, 2268: ["description/text()"], 2269: :select_result_value => true) 2270: @text_input.link = 2271: FeedTools::XmlHelper.try_xpaths(text_input_node, 2272: ["link/text()"], 2273: :select_result_value => true) 2274: @text_input.name = 2275: FeedTools::XmlHelper.try_xpaths(text_input_node, 2276: ["name/text()"], 2277: :select_result_value => true) 2278: end 2279: end 2280: return @text_input 2281: end
Returns the feed time
# File lib/feed_tools/feed.rb, line 1859 1859: def time 1860: if @time.nil? 1861: time_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1862: "atom10:updated/text()", 1863: "atom03:updated/text()", 1864: "atom:updated/text()", 1865: "updated/text()", 1866: "atom10:modified/text()", 1867: "atom03:modified/text()", 1868: "atom:modified/text()", 1869: "modified/text()", 1870: "time/text()", 1871: "lastBuildDate/text()", 1872: "atom10:issued/text()", 1873: "atom03:issued/text()", 1874: "atom:issued/text()", 1875: "issued/text()", 1876: "atom10:published/text()", 1877: "atom03:published/text()", 1878: "atom:published/text()", 1879: "published/text()", 1880: "dc:date/text()", 1881: "pubDate/text()", 1882: "date/text()" 1883: ], :select_result_value => true) 1884: begin 1885: unless time_string.blank? 1886: @time = Time.parse(time_string).gmtime 1887: else 1888: if self.configurations[:timestamp_estimation_enabled] 1889: @time = Time.now.gmtime 1890: end 1891: end 1892: rescue 1893: if self.configurations[:timestamp_estimation_enabled] 1894: @time = Time.now.gmtime 1895: end 1896: end 1897: end 1898: return @time 1899: end
Returns the number of seconds before the feed should expire
# File lib/feed_tools/feed.rb, line 2119 2119: def time_to_live 2120: if @time_to_live.nil? 2121: unless channel_node.nil? 2122: # get the feed time to live from the xml document 2123: update_frequency = FeedTools::XmlHelper.try_xpaths( 2124: self.channel_node, 2125: ["syn:updateFrequency/text()"], :select_result_value => true) 2126: if !update_frequency.blank? 2127: update_period = FeedTools::XmlHelper.try_xpaths( 2128: self.channel_node, 2129: ["syn:updatePeriod/text()"], :select_result_value => true) 2130: if update_period == "daily" 2131: @time_to_live = update_frequency.to_i.day 2132: elsif update_period == "weekly" 2133: @time_to_live = update_frequency.to_i.week 2134: elsif update_period == "monthly" 2135: @time_to_live = update_frequency.to_i.month 2136: elsif update_period == "yearly" 2137: @time_to_live = update_frequency.to_i.year 2138: else 2139: # hourly 2140: @time_to_live = update_frequency.to_i.hour 2141: end 2142: end 2143: if @time_to_live.nil? 2144: # usually expressed in minutes 2145: update_frequency = FeedTools::XmlHelper.try_xpaths( 2146: self.channel_node, ["ttl/text()"], 2147: :select_result_value => true) 2148: if !update_frequency.blank? 2149: update_span = FeedTools::XmlHelper.try_xpaths( 2150: self.channel_node, ["ttl/@span"], 2151: :select_result_value => true) 2152: if update_span == "seconds" 2153: @time_to_live = update_frequency.to_i 2154: elsif update_span == "minutes" 2155: @time_to_live = update_frequency.to_i.minute 2156: elsif update_span == "hours" 2157: @time_to_live = update_frequency.to_i.hour 2158: elsif update_span == "days" 2159: @time_to_live = update_frequency.to_i.day 2160: elsif update_span == "weeks" 2161: @time_to_live = update_frequency.to_i.week 2162: elsif update_span == "months" 2163: @time_to_live = update_frequency.to_i.month 2164: elsif update_span == "years" 2165: @time_to_live = update_frequency.to_i.year 2166: else 2167: @time_to_live = update_frequency.to_i.minute 2168: end 2169: end 2170: end 2171: if @time_to_live.nil? 2172: @time_to_live = 0 2173: update_frequency_days = 2174: FeedTools::XmlHelper.try_xpaths(self.channel_node, 2175: ["schedule/intervaltime/@day"], :select_result_value => true) 2176: update_frequency_hours = 2177: FeedTools::XmlHelper.try_xpaths(self.channel_node, 2178: ["schedule/intervaltime/@hour"], :select_result_value => true) 2179: update_frequency_minutes = 2180: FeedTools::XmlHelper.try_xpaths(self.channel_node, 2181: ["schedule/intervaltime/@min"], :select_result_value => true) 2182: update_frequency_seconds = 2183: FeedTools::XmlHelper.try_xpaths(self.channel_node, 2184: ["schedule/intervaltime/@sec"], :select_result_value => true) 2185: if !update_frequency_days.blank? 2186: @time_to_live = @time_to_live + update_frequency_days.to_i.day 2187: end 2188: if !update_frequency_hours.blank? 2189: @time_to_live = @time_to_live + update_frequency_hours.to_i.hour 2190: end 2191: if !update_frequency_minutes.blank? 2192: @time_to_live = @time_to_live + 2193: update_frequency_minutes.to_i.minute 2194: end 2195: if !update_frequency_seconds.blank? 2196: @time_to_live = @time_to_live + update_frequency_seconds.to_i 2197: end 2198: if @time_to_live == 0 2199: @time_to_live = self.configurations[:default_ttl].to_i 2200: end 2201: end 2202: end 2203: end 2204: if @time_to_live.nil? || @time_to_live == 0 2205: # Default to one hour 2206: @time_to_live = self.configurations[:default_ttl].to_i 2207: elsif self.configurations[:max_ttl] != nil && 2208: self.configurations[:max_ttl] != 0 && 2209: @time_to_live >= self.configurations[:max_ttl].to_i 2210: @time_to_live = self.configurations[:max_ttl].to_i 2211: end 2212: @time_to_live = @time_to_live.round 2213: return @time_to_live 2214: end
Returns the feed title
# File lib/feed_tools/feed.rb, line 1082 1082: def title 1083: if @title.nil? 1084: repair_entities = false 1085: title_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1086: "atom10:title", 1087: "atom03:title", 1088: "atom:title", 1089: "title", 1090: "dc:title", 1091: "channelTitle", 1092: "TITLE" 1093: ]) 1094: @title = FeedTools::HtmlHelper.process_text_construct(title_node, 1095: self.feed_type, self.feed_version, [self.base_uri]) 1096: if self.feed_type == "atom" || 1097: self.configurations[:always_strip_wrapper_elements] 1098: @title = FeedTools::HtmlHelper.strip_wrapper_element(@title) 1099: end 1100: @title = nil if @title.blank? 1101: self.cache_object.title = @title unless self.cache_object.nil? 1102: end 1103: return @title 1104: end
Loads the feed from the remote url if the feed has expired from the cache or cannot be retrieved from the cache for some reason.
# File lib/feed_tools/feed.rb, line 179 179: def update! 180: # Don't do anything if this option is set 181: return if self.configurations[:disable_update_from_remote] 182: 183: if !FeedTools.feed_cache.nil? && 184: !FeedTools.feed_cache.set_up_correctly? 185: FeedTools.feed_cache.initialize_cache() 186: end 187: if !FeedTools.feed_cache.nil? && 188: !FeedTools.feed_cache.set_up_correctly? 189: raise "Your feed cache system is incorrectly set up. " + 190: "Please see the documentation for more information." 191: end 192: if self.http_headers.blank? && !(self.cache_object.nil?) && 193: !(self.cache_object.http_headers.nil?) 194: @http_headers = YAML.load(self.cache_object.http_headers) 195: @http_headers = {} unless @http_headers.kind_of? Hash 196: elsif self.http_headers.blank? 197: @http_headers = {} 198: end 199: if self.expired? == false 200: @live = false 201: else 202: load_remote_feed! 203: 204: # Handle autodiscovery 205: if self.http_headers['content-type'] =~ /text\/html/ || 206: self.http_headers['content-type'] =~ /application\/xhtml\+xml/ 207: 208: autodiscovered_url = nil 209: ['atom', 'rss', 'rdf'].each do |type| 210: autodiscovered_url = 211: FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data, 212: "application/#{type}+xml") 213: break unless autodiscovered_url.nil? 214: end 215: 216: if autodiscovered_url != nil 217: begin 218: autodiscovered_url = FeedTools::UriHelper.resolve_relative_uri( 219: autodiscovered_url, [self.href]) 220: rescue Exception 221: end 222: if self.href == autodiscovered_url 223: raise FeedAccessError, 224: "Autodiscovery loop detected: #{autodiscovered_url}" 225: end 226: self.feed_data = nil 227: 228: self.href = autodiscovered_url 229: if FeedTools.feed_cache.nil? 230: self.cache_object = nil 231: else 232: self.cache_object = 233: FeedTools.feed_cache.find_by_href(autodiscovered_url) 234: end 235: self.update! 236: else 237: html_body = FeedTools::XmlHelper.try_xpaths(self.xml_document, [ 238: "html/body" 239: ]) 240: if html_body != nil 241: raise FeedAccessError, 242: "#{self.href} does not appear to be a feed." 243: end 244: end 245: else 246: ugly_redirect = FeedTools::XmlHelper.try_xpaths(self.xml_document, [ 247: "redirect/newLocation/text()" 248: ], :select_result_value => true) 249: if !ugly_redirect.blank? 250: if self.href == ugly_redirect 251: raise FeedAccessError, 252: "Ugly redirect loop detected: #{ugly_redirect}" 253: end 254: self.feed_data = nil 255: self.href = ugly_redirect 256: if FeedTools.feed_cache.nil? 257: self.cache_object = nil 258: else 259: self.cache_object = 260: FeedTools.feed_cache.find_by_href(ugly_redirect) 261: end 262: self.update! 263: end 264: end 265: 266: # Reset everything that needs to be reset. 267: @xml_document = nil 268: @encoding_from_feed_data = nil 269: @root_node = nil 270: @channel_node = nil 271: @id = nil 272: @title = nil 273: @subtitle = nil 274: @copyright = nil 275: @link = nil 276: @time_to_live = nil 277: @entries = nil 278: 279: if self.configurations[:lazy_parsing_enabled] == false 280: self.full_parse() 281: end 282: end 283: end
# File lib/feed_tools/feed.rb, line 1907 1907: def updated 1908: if @updated.nil? 1909: updated_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [ 1910: "atom10:updated/text()", 1911: "atom03:updated/text()", 1912: "atom:updated/text()", 1913: "updated/text()", 1914: "atom10:modified/text()", 1915: "atom03:modified/text()", 1916: "atom:modified/text()", 1917: "modified/text()", 1918: "lastBuildDate/text()" 1919: ], :select_result_value => true) 1920: unless updated_string.blank? 1921: @updated = Time.parse(updated_string).gmtime rescue nil 1922: else 1923: @updated = nil 1924: end 1925: end 1926: return @updated 1927: end
True if this feed contains video content enclosures
# File lib/feed_tools/feed.rb, line 2509 2509: def vidlog? 2510: vidlog = false 2511: self.items.each do |item| 2512: item.enclosures.each do |enclosure| 2513: vidlog = true if enclosure.video? 2514: end 2515: end 2516: return vidlog 2517: end
Returns a REXML Document of the feed_data
# File lib/feed_tools/feed.rb, line 709 709: def xml_document 710: if @xml_document.nil? 711: return nil if self.feed_data.blank? 712: if self.feed_data_type != :xml 713: @xml_document = nil 714: else 715: begin 716: @xml_document = REXML::Document.new(self.feed_data_utf_8) 717: rescue Exception 718: # Something failed, attempt to repair the xml with html5lib. 719: begin 720: @xml_document = HTML5::XMLParser.parse(self.feed_data_utf_8) 721: rescue Exception 722: # Failed again, give up. 723: return nil 724: end 725: end 726: end 727: end 728: return @xml_document 729: end