Class | FeedTools::FeedItem |
In: |
lib/feed_tools/feed_item.rb
|
Parent: | Object |
The FeedTools::FeedItem class represents the structure of a single item within a web feed.
Initialize the feed object
# File lib/feed_tools/feed_item.rb, line 31 31: def initialize 32: super 33: @feed_data = nil 34: @feed_data_type = :xml 35: @xml_document = nil 36: @root_node = nil 37: @title = nil 38: @id = nil 39: @time = Time.now.gmtime 40: @version = FeedTools::FEED_TOOLS_VERSION::STRING 41: end
# File lib/feed_tools/feed_item.rb, line 1420 1420: def author 1421: if @author.nil? 1422: @author = FeedTools::Author.new 1423: author_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1424: "atom10:author", 1425: "atom03:author", 1426: "atom:author", 1427: "author", 1428: "managingEditor", 1429: "dc:author", 1430: "dc:creator", 1431: "creator" 1432: ]) 1433: unless author_node.nil? 1434: @author.raw = FeedTools::XmlHelper.try_xpaths( 1435: author_node, ["text()"], :select_result_value => true) 1436: @author.raw = FeedTools::HtmlHelper.unescape_entities(@author.raw) 1437: unless @author.raw.nil? 1438: raw_scan = @author.raw.scan( 1439: /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i) 1440: if raw_scan.nil? || raw_scan.size == 0 1441: raw_scan = @author.raw.scan( 1442: /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i) 1443: unless raw_scan.size == 0 1444: author_raw_pair = raw_scan.first.reverse 1445: end 1446: else 1447: author_raw_pair = raw_scan.first 1448: end 1449: if raw_scan.nil? || raw_scan.size == 0 1450: email_scan = @author.raw.scan( 1451: /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i) 1452: if email_scan != nil && email_scan.size > 0 1453: @author.email = email_scan.first.strip 1454: end 1455: end 1456: unless author_raw_pair.nil? || author_raw_pair.size == 0 1457: @author.name = author_raw_pair.first.strip 1458: @author.email = author_raw_pair.last.strip 1459: else 1460: unless @author.raw.include?("@") 1461: # We can be reasonably sure we are looking at something 1462: # that the creator didn't intend to contain an email address 1463: # if it got through the preceeding regexes and it doesn't 1464: # contain the tell-tale '@' symbol. 1465: @author.name = @author.raw 1466: end 1467: end 1468: end 1469: if @author.name.blank? 1470: @author.name = FeedTools::HtmlHelper.unescape_entities( 1471: FeedTools::XmlHelper.try_xpaths(author_node, [ 1472: "atom10:name/text()", 1473: "atom03:name/text()", 1474: "atom:name/text()", 1475: "name/text()", 1476: "@name" 1477: ], :select_result_value => true) 1478: ) 1479: end 1480: if @author.email.blank? 1481: @author.email = FeedTools::HtmlHelper.unescape_entities( 1482: FeedTools::XmlHelper.try_xpaths(author_node, [ 1483: "atom10:email/text()", 1484: "atom03:email/text()", 1485: "atom:email/text()", 1486: "email/text()", 1487: "@email" 1488: ], :select_result_value => true) 1489: ) 1490: end 1491: if @author.url.blank? 1492: @author.url = FeedTools::HtmlHelper.unescape_entities( 1493: FeedTools::XmlHelper.try_xpaths(author_node, [ 1494: "atom10:url/text()", 1495: "atom03:url/text()", 1496: "atom:url/text()", 1497: "url/text()", 1498: "atom10:uri/text()", 1499: "atom03:uri/text()", 1500: "atom:uri/text()", 1501: "uri/text()", 1502: "@url", 1503: "@uri", 1504: "@href" 1505: ], :select_result_value => true) 1506: ) 1507: end 1508: if @author.name.blank? && !@author.raw.blank? && 1509: !@author.email.blank? 1510: name_scan = @author.raw.scan( 1511: /"?([^"]*)"? ?[\(<].*#{@author.email}.*[\)>].*/) 1512: if name_scan.flatten.size == 1 1513: @author.name = name_scan.flatten[0].strip 1514: end 1515: if @author.name.blank? 1516: name_scan = @author.raw.scan( 1517: /.*#{@author.email} ?[\(<]"?([^"]*)"?[\)>].*/) 1518: if name_scan.flatten.size == 1 1519: @author.name = name_scan.flatten[0].strip 1520: end 1521: end 1522: end 1523: @author.name = nil if @author.name.blank? 1524: @author.raw = nil if @author.raw.blank? 1525: @author.email = nil if @author.email.blank? 1526: @author.url = nil if @author.url.blank? 1527: if @author.url != nil 1528: begin 1529: if !(@author.url =~ /^file:/) && 1530: !FeedTools::UriHelper.is_uri?(@author.url) 1531: @author.url = FeedTools::UriHelper.resolve_relative_uri( 1532: @author.url, [author_node.base_uri, self.base_uri]) 1533: end 1534: rescue 1535: end 1536: end 1537: if FeedTools::XmlHelper.try_xpaths(author_node, 1538: ["@gr:unknown-author"], :select_result_value => true) == "true" 1539: if @author.name == "(author unknown)" 1540: @author.name = nil 1541: end 1542: end 1543: end 1544: # Fallback on the itunes module if we didn't find an author name 1545: begin 1546: @author.name = self.itunes_author if @author.name.nil? 1547: rescue 1548: @author.name = nil 1549: end 1550: if @author.name.blank? && @author.email.blank? && 1551: @author.href.blank? 1552: parent_feed = self.feed 1553: if parent_feed != nil 1554: @author = parent_feed.author.dup 1555: end 1556: end 1557: end 1558: return @author 1559: end
# File lib/feed_tools/feed_item.rb, line 1562 1562: def author=(new_author) 1563: if new_author.respond_to?(:name) && 1564: new_author.respond_to?(:email) && 1565: new_author.respond_to?(:url) 1566: # It's a complete author object, just set it. 1567: @author = new_author 1568: else 1569: # We're not looking at an author object, this is probably a string, 1570: # default to setting the author's name. 1571: if @author.nil? 1572: @author = FeedTools::Author.new 1573: end 1574: @author.name = new_author 1575: end 1576: end
Generates xml based on the content of the feed item
# File lib/feed_tools/feed_item.rb, line 2034 2034: def build_xml(feed_type=(self.feed.feed_type or "atom"), version=nil, 2035: xml_builder=Builder::XmlMarkup.new( 2036: :indent => 2, :escape_attrs => false)) 2037: 2038: parent_feed = self.feed 2039: if parent_feed.find_node( 2040: "access:restriction/@relationship").to_s == "deny" 2041: raise StandardError, 2042: "Operation not permitted. This feed denies redistribution." 2043: elsif parent_feed.find_node("@indexing:index").to_s == "no" 2044: raise StandardError, 2045: "Operation not permitted. This feed denies redistribution." 2046: end 2047: if self.find_node( 2048: "access:restriction/@relationship").to_s == "deny" 2049: raise StandardError, 2050: "Operation not permitted. This feed item denies redistribution." 2051: end 2052: 2053: self.full_parse() 2054: 2055: if feed_type == "rss" && (version == nil || version == 0.0) 2056: version = 1.0 2057: elsif feed_type == "atom" && (version == nil || version == 0.0) 2058: version = 1.0 2059: end 2060: if feed_type == "rss" && 2061: (version == 0.9 || version == 1.0 || version == 1.1) 2062: # RDF-based rss format 2063: if link.nil? 2064: raise "Cannot generate an rdf-based feed item with a " + 2065: "nil link field." 2066: end 2067: return xml_builder.item("rdf:about" => 2068: FeedTools::HtmlHelper.escape_entities(link)) do 2069: unless self.title.blank? 2070: xml_builder.title( 2071: FeedTools::HtmlHelper.strip_html_tags(self.title)) 2072: else 2073: xml_builder.title 2074: end 2075: unless self.link.blank? 2076: xml_builder.link(self.link) 2077: else 2078: xml_builder.link 2079: end 2080: unless self.author.nil? || self.author.name.nil? 2081: xml_builder.tag!("dc:creator", self.author.name) 2082: end 2083: unless self.summary.blank? 2084: xml_builder.description(self.summary) 2085: else 2086: xml_builder.description 2087: end 2088: unless self.content.blank? 2089: xml_builder.tag!("content:encoded") do 2090: xml_builder.cdata!(self.content) 2091: end 2092: end 2093: unless time.nil? 2094: xml_builder.tag!("dc:date", time.iso8601) 2095: end 2096: unless self.rights.blank? 2097: xml_builder.tag!("dc:rights", self.rights) 2098: end 2099: unless tags.nil? || tags.size == 0 2100: for tag in tags 2101: xml_builder.tag!("dc:subject", tag) 2102: end 2103: if self.feed.podcast? 2104: xml_builder.tag!("itunes:keywords", tags.join(", ")) 2105: end 2106: end 2107: build_xml_hook(feed_type, version, xml_builder) 2108: end 2109: elsif feed_type == "rss" 2110: # normal rss format 2111: return xml_builder.item do 2112: unless self.title.blank? 2113: xml_builder.title( 2114: FeedTools::HtmlHelper.strip_html_tags(self.title)) 2115: end 2116: unless self.link.blank? 2117: xml_builder.link(self.link) 2118: end 2119: unless self.author.nil? || self.author.name.nil? 2120: xml_builder.tag!("dc:creator", self.author.name) 2121: end 2122: unless self.author.nil? || self.author.email.nil? || 2123: self.author.name.nil? 2124: xml_builder.author("#{self.author.email} (#{self.author.name})") 2125: end 2126: unless self.summary.blank? 2127: xml_builder.description(self.summary) 2128: end 2129: unless self.content.blank? 2130: xml_builder.tag!("content:encoded") do 2131: xml_builder.cdata!(self.content) 2132: end 2133: end 2134: if !self.published.nil? 2135: xml_builder.pubDate(self.published.rfc822) 2136: elsif !self.time.nil? 2137: xml_builder.pubDate(self.time.rfc822) 2138: end 2139: unless self.rights.blank? 2140: xml_builder.tag!("dc:rights", self.rights) 2141: end 2142: unless self.guid.blank? 2143: if FeedTools::UriHelper.is_uri?(self.guid) && 2144: (self.guid =~ /^http/) 2145: xml_builder.guid(self.guid, "isPermaLink" => "true") 2146: else 2147: xml_builder.guid(self.guid, "isPermaLink" => "false") 2148: end 2149: else 2150: unless self.link.blank? 2151: xml_builder.guid(self.link, "isPermaLink" => "true") 2152: end 2153: end 2154: unless tags.nil? || tags.size == 0 2155: for tag in tags 2156: xml_builder.tag!("category", tag) 2157: end 2158: if self.feed.podcast? 2159: xml_builder.tag!("itunes:keywords", tags.join(", ")) 2160: end 2161: end 2162: unless self.enclosures.blank? || self.enclosures.size == 0 2163: for enclosure in self.enclosures 2164: attribute_hash = {} 2165: next if enclosure.url.blank? 2166: begin 2167: if enclosure.file_size.blank? || enclosure.file_size.to_i == 0 2168: # We can't use this enclosure because it's missing the 2169: # required file size. Check alternate versions for 2170: # file_size. 2171: if !enclosure.versions.blank? && enclosure.versions.size > 0 2172: for alternate in enclosure.versions 2173: if alternate.file_size != nil && 2174: alternate.file_size.to_i > 0 2175: enclosure = alternate 2176: break 2177: end 2178: end 2179: end 2180: end 2181: rescue 2182: end 2183: attribute_hash["url"] = 2184: FeedTools::UriHelper.normalize_url(enclosure.url) 2185: if enclosure.type != nil 2186: attribute_hash["type"] = enclosure.type 2187: end 2188: if enclosure.file_size != nil && enclosure.file_size.to_i > 0 2189: attribute_hash["length"] = enclosure.file_size.to_s 2190: else 2191: # We couldn't find an alternate and the problem is still 2192: # there. Give up and go on. 2193: xml_builder.comment!( 2194: "*** Enclosure failed to include file size. Ignoring. ***") 2195: next 2196: end 2197: xml_builder.enclosure(attribute_hash) 2198: end 2199: end 2200: build_xml_hook(feed_type, version, xml_builder) 2201: end 2202: elsif feed_type == "atom" && version == 0.3 2203: raise "Atom 0.3 is obsolete." 2204: elsif feed_type == "atom" && version == 1.0 2205: # normal atom format 2206: return xml_builder.entry("xmlns" => 2207: FEED_TOOLS_NAMESPACES['atom10']) do 2208: unless title.nil? || title == "" 2209: xml_builder.title( 2210: FeedTools::HtmlHelper.strip_html_tags(self.title), 2211: "type" => "html") 2212: end 2213: xml_builder.author do 2214: unless self.author.nil? || self.author.name.nil? 2215: xml_builder.name(self.author.name) 2216: else 2217: xml_builder.name("n/a") 2218: end 2219: unless self.author.nil? || self.author.email.nil? 2220: xml_builder.email(self.author.email) 2221: end 2222: unless self.author.nil? || self.author.url.nil? 2223: xml_builder.uri(self.author.url) 2224: end 2225: end 2226: unless link.nil? || link == "" 2227: xml_builder.link( 2228: "href" => 2229: FeedTools::HtmlHelper.escape_entities(self.link), 2230: "rel" => "alternate") 2231: end 2232: if !self.content.blank? 2233: xml_builder.content(self.content, 2234: "type" => "html") 2235: end 2236: if !self.summary.blank? 2237: xml_builder.summary(self.summary, 2238: "type" => "html") 2239: end 2240: if self.updated != nil 2241: xml_builder.updated(self.updated.iso8601) 2242: elsif self.time != nil 2243: # Not technically correct, but a heck of a lot better 2244: # than the Time.now fall-back. 2245: xml_builder.updated(self.time.iso8601) 2246: else 2247: xml_builder.updated(Time.now.gmtime.iso8601) 2248: end 2249: unless self.published.nil? 2250: xml_builder.published(self.published.iso8601) 2251: end 2252: unless self.rights.blank? 2253: xml_builder.rights(self.rights) 2254: end 2255: if self.id != nil 2256: unless FeedTools::UriHelper.is_uri? self.id 2257: if self.time != nil && self.link != nil 2258: xml_builder.id(FeedTools::UriHelper.build_tag_uri( 2259: self.link, self.time)) 2260: elsif self.link != nil 2261: xml_builder.id(FeedTools.build_urn_uuid_uri(self.link)) 2262: else 2263: raise "The unique id must be a URI. " + 2264: "(Attempted to generate id, but failed.)" 2265: end 2266: else 2267: xml_builder.id(self.id) 2268: end 2269: elsif self.time != nil && self.link != nil 2270: xml_builder.id(FeedTools::UriHelper.build_tag_uri( 2271: self.link, self.time)) 2272: else 2273: raise "Cannot build feed, missing feed unique id." 2274: end 2275: unless self.tags.nil? || self.tags.size == 0 2276: for tag in self.tags 2277: xml_builder.category("term" => tag) 2278: end 2279: end 2280: unless self.enclosures.blank? || self.enclosures.size == 0 2281: for enclosure in self.enclosures 2282: attribute_hash = {} 2283: next if enclosure.url.blank? 2284: attribute_hash["rel"] = "enclosure" 2285: attribute_hash["href"] = 2286: FeedTools::UriHelper.normalize_url(enclosure.url) 2287: if enclosure.type != nil 2288: attribute_hash["type"] = enclosure.type 2289: end 2290: if enclosure.file_size != nil && enclosure.file_size.to_i > 0 2291: attribute_hash["length"] = enclosure.file_size.to_s 2292: end 2293: xml_builder.link(attribute_hash) 2294: end 2295: end 2296: build_xml_hook(feed_type, version, xml_builder) 2297: end 2298: else 2299: raise "Unsupported feed format/version." 2300: end 2301: end
Returns a list of the feed item‘s categories
# File lib/feed_tools/feed_item.rb, line 743 743: def categories 744: if @categories.nil? 745: @categories = [] 746: category_nodes = FeedTools::XmlHelper.try_xpaths_all(self.root_node, [ 747: "category", 748: "dc:subject" 749: ]) 750: for category_node in category_nodes 751: category = FeedTools::Category.new 752: category.term = FeedTools::XmlHelper.try_xpaths( 753: category_node, ["@term", "text()"], 754: :select_result_value => true) 755: category.term.strip! unless category.term.nil? 756: category.label = FeedTools::XmlHelper.try_xpaths( 757: category_node, ["@label"], 758: :select_result_value => true) 759: category.label.strip! unless category.label.nil? 760: category.scheme = FeedTools::XmlHelper.try_xpaths( 761: category_node, [ 762: "@scheme", 763: "@domain" 764: ], :select_result_value => true) 765: category.scheme.strip! unless category.scheme.nil? 766: @categories << category 767: end 768: end 769: return @categories 770: end
Returns the url for posting comments
# File lib/feed_tools/feed_item.rb, line 643 643: def comments 644: if @comments.nil? 645: @comments = FeedTools::XmlHelper.try_xpaths( 646: self.root_node, ["comments/text()"], 647: :select_result_value => true) 648: begin 649: if !(@comments =~ /^file:/) && 650: !FeedTools::UriHelper.is_uri?(@comments) 651: root_base_uri = nil 652: unless self.root_node.nil? 653: root_base_uri = self.root_node.base_uri 654: end 655: @comments = FeedTools::UriHelper.resolve_relative_uri( 656: @comments, [root_base_uri, self.base_uri]) 657: end 658: rescue 659: end 660: if self.configurations[:url_normalization_enabled] 661: @comments = FeedTools::UriHelper.normalize_url(@comments) 662: end 663: end 664: return @comments 665: end
Returns the load options for this feed.
# File lib/feed_tools/feed_item.rb, line 139 139: def configurations 140: if @configurations.blank? 141: parent_feed = self.feed 142: if parent_feed != nil 143: @configurations = parent_feed.configurations.dup 144: else 145: @configurations = FeedTools.configurations.dup 146: end 147: end 148: return @configurations 149: end
# File lib/feed_tools/feed_item.rb, line 328 328: def content 329: if @content.nil? 330: repair_entities = false 331: content_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 332: "atom10:content", 333: "atom03:content", 334: "atom:content", 335: "body/datacontent", 336: "xhtml:body", 337: "body", 338: "xhtml:div", 339: "div", 340: "p:payload", 341: "payload", 342: "content:encoded", 343: "content", 344: "fullitem", 345: "encoded", 346: "description", 347: "tagline", 348: "subtitle", 349: "atom10:summary", 350: "atom03:summary", 351: "atom:summary", 352: "summary", 353: "abstract", 354: "blurb", 355: "info" 356: ]) 357: @content = FeedTools::HtmlHelper.process_text_construct(content_node, 358: self.feed_type, self.feed_version, [self.base_uri]) 359: if self.feed_type == "atom" || 360: self.configurations[:always_strip_wrapper_elements] 361: @content = FeedTools::HtmlHelper.strip_wrapper_element(@content) 362: end 363: if @content.nil? 364: @content = self.media_text 365: end 366: if @content.nil? 367: @content = self.itunes_summary 368: end 369: if @content.nil? 370: @content = self.itunes_subtitle 371: end 372: end 373: return @content 374: end
Breaks any references that the feed entry may be keeping around, thus making the job of the garbage collector much, much easier. Call this method prior to feed entries going out of scope to prevent memory leaks.
# File lib/feed_tools/feed_item.rb, line 46 46: def dispose() 47: @feed_data = nil 48: @feed_data_type = nil 49: @xml_document = nil 50: @root_node = nil 51: @title = nil 52: @id = nil 53: @time = nil 54: end
Returns all feed item enclosures
# File lib/feed_tools/feed_item.rb, line 937 937: def enclosures 938: if @enclosures.nil? 939: @enclosures = [] 940: 941: # First, load up all the different possible sources of enclosures 942: rss_enclosures = 943: FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["enclosure"]) 944: atom_enclosures = 945: FeedTools::XmlHelper.try_xpaths_all(self.root_node, [ 946: "atom10:link[@rel='enclosure']", 947: "atom03:link[@rel='enclosure']", 948: "atom:link[@rel='enclosure']", 949: "link[@rel='enclosure']" 950: ]) 951: media_content_enclosures = 952: FeedTools::XmlHelper.try_xpaths_all(self.root_node, 953: ["media:content"]) 954: media_group_enclosures = 955: FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["media:group"]) 956: 957: bogus_enclosures = 958: FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["video"]) 959: 960: # TODO: Implement this 961: bittorrent_enclosures = 962: FeedTools::XmlHelper.try_xpaths_all(self.root_node, 963: ["bitTorrent:torrent"]) 964: 965: 966: # Parse RSS-type enclosures. Thanks to a few buggy enclosures 967: # implementations, sometimes these also manage to show up in atom 968: # files. 969: for enclosure_node in rss_enclosures 970: enclosure = FeedTools::Enclosure.new 971: enclosure.url = FeedTools::HtmlHelper.unescape_entities( 972: enclosure_node.attributes["url"].to_s) 973: enclosure.type = enclosure_node.attributes["type"].to_s 974: enclosure.file_size = enclosure_node.attributes["length"].to_i 975: enclosure.credits = [] 976: enclosure.explicit = false 977: @enclosures << enclosure 978: end 979: 980: # Parse atom-type enclosures. If there are repeats of the same 981: # enclosure object, we merge the two together. 982: for enclosure_node in atom_enclosures 983: enclosure_url = FeedTools::HtmlHelper.unescape_entities( 984: enclosure_node.attributes["href"].to_s) 985: enclosure = nil 986: new_enclosure = false 987: for existing_enclosure in @enclosures 988: if existing_enclosure.url == enclosure_url 989: enclosure = existing_enclosure 990: break 991: end 992: end 993: if enclosure.nil? 994: new_enclosure = true 995: enclosure = FeedTools::Enclosure.new 996: end 997: enclosure.url = enclosure_url 998: enclosure.type = enclosure_node.attributes["type"].to_s 999: enclosure.file_size = enclosure_node.attributes["length"].to_i 1000: enclosure.credits = [] 1001: enclosure.explicit = false 1002: if new_enclosure 1003: @enclosures << enclosure 1004: end 1005: end 1006: 1007: # Parse atom-type enclosures. If there are repeats of the same 1008: # enclosure object, we merge the two together. 1009: for enclosure_node in bogus_enclosures 1010: enclosure_url = FeedTools::HtmlHelper.unescape_entities( 1011: enclosure_node.attributes["url"].to_s) 1012: enclosure = nil 1013: new_enclosure = false 1014: for existing_enclosure in @enclosures 1015: if existing_enclosure.url == enclosure_url 1016: enclosure = existing_enclosure 1017: break 1018: end 1019: end 1020: if enclosure.nil? 1021: new_enclosure = true 1022: enclosure = FeedTools::Enclosure.new 1023: end 1024: enclosure.url = enclosure_url 1025: if File.extname(enclosure_url) == ".wmv" 1026: enclosure.type = "video/x-ms-wmv" 1027: end 1028: enclosure.explicit = false 1029: if new_enclosure 1030: @enclosures << enclosure 1031: end 1032: end 1033: 1034: # Creates an anonymous method to parse content objects from the media 1035: # module. We do this to avoid excessive duplication of code since we 1036: # have to do identical processing for content objects within group 1037: # objects. 1038: parse_media_content = lambda do |media_content_nodes| 1039: affected_enclosures = [] 1040: for enclosure_node in media_content_nodes 1041: enclosure_url = FeedTools::HtmlHelper.unescape_entities( 1042: enclosure_node.attributes["url"].to_s) 1043: enclosure = nil 1044: new_enclosure = false 1045: for existing_enclosure in @enclosures 1046: if existing_enclosure.url == enclosure_url 1047: enclosure = existing_enclosure 1048: break 1049: end 1050: end 1051: if enclosure.nil? 1052: new_enclosure = true 1053: enclosure = FeedTools::Enclosure.new 1054: end 1055: enclosure.url = enclosure_url 1056: enclosure.type = enclosure_node.attributes["type"].to_s 1057: enclosure.file_size = enclosure_node.attributes["fileSize"].to_i 1058: enclosure.duration = enclosure_node.attributes["duration"].to_s 1059: enclosure.height = enclosure_node.attributes["height"].to_i 1060: enclosure.width = enclosure_node.attributes["width"].to_i 1061: enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i 1062: enclosure.framerate = enclosure_node.attributes["framerate"].to_i 1063: enclosure.expression = 1064: enclosure_node.attributes["expression"].to_s 1065: enclosure.is_default = 1066: (enclosure_node.attributes["isDefault"].to_s.downcase == "true") 1067: enclosure_thumbnail_url = 1068: FeedTools::XmlHelper.try_xpaths(enclosure_node, 1069: ["media:thumbnail/@url"], :select_result_value => true) 1070: if !enclosure_thumbnail_url.blank? 1071: enclosure.thumbnail = FeedTools::EnclosureThumbnail.new( 1072: FeedTools::HtmlHelper.unescape_entities( 1073: enclosure_thumbnail_url), 1074: FeedTools::HtmlHelper.unescape_entities( 1075: FeedTools::XmlHelper.try_xpaths(enclosure_node, 1076: ["media:thumbnail/@height"], 1077: :select_result_value => true)), 1078: FeedTools::HtmlHelper.unescape_entities( 1079: FeedTools::XmlHelper.try_xpaths(enclosure_node, 1080: ["media:thumbnail/@width"], 1081: :select_result_value => true)) 1082: ) 1083: end 1084: enclosure.categories = [] 1085: for category in FeedTools::XmlHelper.try_xpaths_all( 1086: enclosure_node, ["media:category"]) 1087: enclosure.categories << FeedTools::Category.new 1088: enclosure.categories.last.term = 1089: FeedTools::HtmlHelper.unescape_entities(category.inner_xml) 1090: enclosure.categories.last.scheme = 1091: FeedTools::HtmlHelper.unescape_entities( 1092: category.attributes["scheme"].to_s) 1093: enclosure.categories.last.label = 1094: FeedTools::HtmlHelper.unescape_entities( 1095: category.attributes["label"].to_s) 1096: if enclosure.categories.last.scheme.blank? 1097: enclosure.categories.last.scheme = nil 1098: end 1099: if enclosure.categories.last.label.blank? 1100: enclosure.categories.last.label = nil 1101: end 1102: end 1103: enclosure_media_hash = 1104: FeedTools::XmlHelper.try_xpaths(enclosure_node, 1105: ["media:hash/text()"], :select_result_value => true) 1106: if !enclosure_media_hash.nil? 1107: enclosure.hash = FeedTools::EnclosureHash.new( 1108: FeedTools::HtmlHelper.sanitize_html( 1109: FeedTools::HtmlHelper.unescape_entities( 1110: enclosure_media_hash), :strip), 1111: "md5" 1112: ) 1113: end 1114: enclosure_media_player_url = 1115: FeedTools::XmlHelper.try_xpaths(enclosure_node, 1116: ["media:player/@url"], :select_result_value => true) 1117: if !enclosure_media_player_url.blank? 1118: enclosure.player = FeedTools::EnclosurePlayer.new( 1119: FeedTools::HtmlHelper.unescape_entities( 1120: enclosure_media_player_url), 1121: FeedTools::HtmlHelper.unescape_entities( 1122: FeedTools::XmlHelper.try_xpaths(enclosure_node, 1123: ["media:player/@height"], :select_result_value => true)), 1124: FeedTools::HtmlHelper.unescape_entities( 1125: FeedTools::XmlHelper.try_xpaths(enclosure_node, 1126: ["media:player/@width"], :select_result_value => true)) 1127: ) 1128: end 1129: enclosure.credits = [] 1130: for credit in FeedTools::XmlHelper.try_xpaths_all( 1131: enclosure_node, ["media:credit"]) 1132: enclosure.credits << FeedTools::EnclosureCredit.new( 1133: FeedTools::HtmlHelper.unescape_entities( 1134: credit.inner_xml.to_s.strip), 1135: FeedTools::HtmlHelper.unescape_entities( 1136: credit.attributes["role"].to_s.downcase) 1137: ) 1138: if enclosure.credits.last.name.blank? 1139: enclosure.credits.last.name = nil 1140: end 1141: if enclosure.credits.last.role.blank? 1142: enclosure.credits.last.role = nil 1143: end 1144: end 1145: enclosure.explicit = 1146: (FeedTools::XmlHelper.try_xpaths(enclosure_node, 1147: ["media:adult/text()"]).to_s.downcase == "true") 1148: enclosure_media_text = 1149: FeedTools::XmlHelper.try_xpaths(enclosure_node, 1150: ["media:text/text()"]) 1151: if !enclosure_media_text.blank? 1152: enclosure.text = FeedTools::HtmlHelper.unescape_entities( 1153: enclosure_media_text) 1154: end 1155: affected_enclosures << enclosure 1156: if new_enclosure 1157: @enclosures << enclosure 1158: end 1159: end 1160: affected_enclosures 1161: end 1162: 1163: # Parse the independant content objects. 1164: parse_media_content.call(media_content_enclosures) 1165: 1166: media_groups = [] 1167: 1168: # Parse the group objects. 1169: for media_group in media_group_enclosures 1170: group_media_content_enclosures = 1171: FeedTools::XmlHelper.try_xpaths_all(media_group, 1172: ["media:content"]) 1173: 1174: # Parse the content objects within the group objects. 1175: affected_enclosures = 1176: parse_media_content.call(group_media_content_enclosures) 1177: 1178: # Now make sure that content objects inherit certain properties from 1179: # the group objects. 1180: for enclosure in affected_enclosures 1181: media_group_thumbnail = 1182: FeedTools::XmlHelper.try_xpaths(media_group, 1183: ["media:thumbnail/@url"], :select_result_value => true) 1184: if enclosure.thumbnail.nil? && !media_group_thumbnail.blank? 1185: enclosure.thumbnail = FeedTools::EnclosureThumbnail.new( 1186: FeedTools::HtmlHelper.unescape_entities( 1187: media_group_thumbnail), 1188: FeedTools::HtmlHelper.unescape_entities( 1189: FeedTools::XmlHelper.try_xpaths(media_group, 1190: ["media:thumbnail/@height"], 1191: :select_result_value => true)), 1192: FeedTools::HtmlHelper.unescape_entities( 1193: FeedTools::XmlHelper.try_xpaths(media_group, 1194: ["media:thumbnail/@width"], 1195: :select_result_value => true)) 1196: ) 1197: end 1198: if (enclosure.categories.blank?) 1199: enclosure.categories = [] 1200: for category in FeedTools::XmlHelper.try_xpaths_all( 1201: media_group, ["media:category"]) 1202: enclosure.categories << FeedTools::Category.new 1203: enclosure.categories.last.term = 1204: FeedTools::HtmlHelper.unescape_entities(category.inner_xml) 1205: enclosure.categories.last.scheme = 1206: FeedTools::HtmlHelper.unescape_entities( 1207: category.attributes["scheme"].to_s) 1208: enclosure.categories.last.label = 1209: FeedTools::HtmlHelper.unescape_entities( 1210: category.attributes["label"].to_s) 1211: if enclosure.categories.last.scheme.blank? 1212: enclosure.categories.last.scheme = nil 1213: end 1214: if enclosure.categories.last.label.blank? 1215: enclosure.categories.last.label = nil 1216: end 1217: end 1218: end 1219: enclosure_media_group_hash = 1220: FeedTools::XmlHelper.try_xpaths(enclosure_node, 1221: ["media:hash/text()"], :select_result_value => true) 1222: if enclosure.hash.nil? && !enclosure_media_group_hash.blank? 1223: enclosure.hash = FeedTools::EnclosureHash.new( 1224: FeedTools::HtmlHelper.sanitize_html( 1225: FeedTools::HtmlHelper.unescape_entities( 1226: enclosure_media_group_hash), :strip), 1227: "md5" 1228: ) 1229: end 1230: enclosure_media_group_url = FeedTools::XmlHelper.try_xpaths( 1231: media_group, 1232: "media:player/@url", 1233: :select_result_value => true 1234: ) 1235: if enclosure.player.nil? && !enclosure_media_group_url.blank? 1236: enclosure.player = FeedTools::EnclosurePlayer.new( 1237: FeedTools::HtmlHelper.unescape_entities( 1238: enclosure_media_group_url), 1239: FeedTools::HtmlHelper.unescape_entities( 1240: FeedTools::XmlHelper.try_xpaths(media_group, 1241: ["media:player/@height"], 1242: :select_result_value => true)), 1243: FeedTools::HtmlHelper.unescape_entities( 1244: FeedTools::XmlHelper.try_xpaths(media_group, 1245: ["media:player/@width"], 1246: :select_result_value => true 1247: ) 1248: ) 1249: ) 1250: end 1251: if enclosure.credits.nil? || enclosure.credits.size == 0 1252: enclosure.credits = [] 1253: for credit in FeedTools::XmlHelper.try_xpaths_all( 1254: media_group, ["media:credit"]) 1255: enclosure.credits << FeedTools::EnclosureCredit.new( 1256: FeedTools::HtmlHelper.unescape_entities(credit.inner_xml), 1257: FeedTools::HtmlHelper.unescape_entities( 1258: credit.attributes["role"].to_s.downcase) 1259: ) 1260: if enclosure.credits.last.role.blank? 1261: enclosure.credits.last.role = nil 1262: end 1263: end 1264: end 1265: if enclosure.explicit?.nil? 1266: enclosure.explicit = 1267: ((FeedTools::XmlHelper.try_xpaths(media_group, [ 1268: "media:adult/text()" 1269: ], :select_result_value => true).downcase == "true") ? 1270: true : false) 1271: end 1272: enclosure_media_group_text = 1273: FeedTools::XmlHelper.try_xpaths(media_group, 1274: ["media:text/text()"], :select_result_value => true) 1275: if enclosure.text.nil? && !enclosure_media_group_text.blank? 1276: enclosure.text = FeedTools::HtmlHelper.sanitize_html( 1277: FeedTools::HtmlHelper.unescape_entities( 1278: enclosure_media_group_text), :strip) 1279: end 1280: end 1281: 1282: # Keep track of the media groups 1283: media_groups << affected_enclosures 1284: end 1285: 1286: # Now we need to inherit any relevant item level information. 1287: if self.explicit? 1288: for enclosure in @enclosures 1289: enclosure.explicit = true 1290: end 1291: end 1292: 1293: # Add all the itunes categories 1294: itunes_categories = 1295: FeedTools::XmlHelper.try_xpaths_all(self.root_node, 1296: ["itunes:category"]) 1297: for itunes_category in itunes_categories 1298: genre = "Podcasts" 1299: category = itunes_category.attributes["text"].to_s 1300: subcategory = 1301: FeedTools::XmlHelper.try_xpaths(itunes_category, 1302: ["itunes:category/@text"], 1303: :select_result_value => true) 1304: category_path = genre 1305: if !category.blank? 1306: category_path << "/" + category 1307: end 1308: if !subcategory.blank? 1309: category_path << "/" + subcategory 1310: end 1311: for enclosure in @enclosures 1312: if enclosure.categories.nil? 1313: enclosure.categories = [] 1314: end 1315: enclosure.categories << FeedTools::Category.new 1316: enclosure.categories.last.term = 1317: FeedTools::HtmlHelper.unescape_entities(category_path) 1318: enclosure.categories.last.scheme = 1319: "http://www.apple.com/itunes/store/" 1320: enclosure.categories.last.label = 1321: "iTunes Music Store Categories" 1322: end 1323: end 1324: 1325: for enclosure in @enclosures 1326: # Clean up any of those attributes that incorrectly have "" 1327: # or 0 as their values 1328: if enclosure.type.blank? 1329: enclosure.type = nil 1330: end 1331: if enclosure.file_size == 0 1332: enclosure.file_size = nil 1333: end 1334: if enclosure.duration == 0 1335: enclosure.duration = nil 1336: end 1337: if enclosure.height == 0 1338: enclosure.height = nil 1339: end 1340: if enclosure.width == 0 1341: enclosure.width = nil 1342: end 1343: if enclosure.bitrate == 0 1344: enclosure.bitrate = nil 1345: end 1346: if enclosure.framerate == 0 1347: enclosure.framerate = nil 1348: end 1349: if enclosure.expression.blank? 1350: enclosure.expression = "full" 1351: end 1352: 1353: # If an enclosure is missing the text field, fall back on the 1354: # itunes:summary field 1355: if enclosure.text.blank? 1356: enclosure.text = self.itunes_summary 1357: end 1358: 1359: # Make sure we don't have duplicate categories 1360: unless enclosure.categories.nil? 1361: enclosure.categories.uniq! 1362: end 1363: 1364: # Normalize enclosure URIs 1365: if !enclosure.href.blank? 1366: enclosure.href = 1367: FeedTools::UriHelper.normalize_url(enclosure.href) 1368: else 1369: enclosure.href = nil 1370: end 1371: end 1372: 1373: # And finally, now things get complicated. This is where we make 1374: # sure that the enclosures method only returns either default 1375: # enclosures or enclosures with only one version. Any enclosures 1376: # that are wrapped in a media:group will be placed in the appropriate 1377: # versions field. 1378: affected_enclosure_urls = [] 1379: for media_group in media_groups 1380: affected_enclosure_urls = 1381: affected_enclosure_urls | (media_group.map do |enclosure| 1382: enclosure.url 1383: end) 1384: end 1385: @enclosures.delete_if do |enclosure| 1386: (affected_enclosure_urls.include? enclosure.url) 1387: end 1388: for media_group in media_groups 1389: default_enclosure = nil 1390: for enclosure in media_group 1391: if enclosure.is_default? 1392: default_enclosure = enclosure 1393: end 1394: end 1395: for enclosure in media_group 1396: enclosure.default_version = default_enclosure 1397: enclosure.versions = media_group.clone 1398: enclosure.versions.delete(enclosure) 1399: end 1400: @enclosures << default_enclosure 1401: end 1402: end 1403: 1404: # If we have a single enclosure, it's safe to inherit the 1405: # itunes:duration field if it's missing. 1406: if @enclosures.size == 1 1407: if @enclosures.first.duration.nil? || @enclosures.first.duration == 0 1408: @enclosures.first.duration = self.itunes_duration 1409: end 1410: end 1411: 1412: return @enclosures 1413: end
# File lib/feed_tools/feed_item.rb, line 1415 1415: def enclosures=(new_enclosures) 1416: @enclosures = new_enclosures 1417: end
Returns true if this feed item contains explicit material. If the whole feed has been marked as explicit, this will return true even if the item isn‘t explicitly marked as explicit.
# File lib/feed_tools/feed_item.rb, line 2003 2003: def explicit? 2004: if @explicit.nil? 2005: explicit_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 2006: "media:adult/text()", 2007: "itunes:explicit/text()" 2008: ], :select_result_value => true) 2009: parent_feed = self.feed 2010: if explicit_string == "true" || explicit_string == "yes" 2011: @explicit = true 2012: elsif parent_feed != nil && parent_feed.explicit? 2013: @explicit = true 2014: else 2015: @explicit = false 2016: end 2017: end 2018: return @explicit 2019: end
Returns the parent feed of this feed item Warning, this method may be slow if you have a large number of FeedTools::Feed objects. Can‘t use a direct reference to the parent because it plays havoc with the garbage collector. Could‘ve used a WeakRef object, but really, if there are multiple parent feeds, something is going to go wrong, and the programmer needs to be notified. A WeakRef implementation can‘t detect this condition.
# File lib/feed_tools/feed_item.rb, line 65 65: def feed 66: parent_feed = nil 67: ObjectSpace.each_object(FeedTools::Feed) do |feed| 68: if feed.instance_variable_get("@entries").nil? 69: feed.items 70: end 71: unsorted_items = feed.instance_variable_get("@entries") 72: for item in unsorted_items 73: if item.object_id == self.object_id 74: if parent_feed.nil? 75: parent_feed = feed 76: break 77: else 78: raise "Multiple parent feeds found." 79: end 80: end 81: end 82: end 83: return parent_feed 84: end
Returns all nodes within the root_node that match the xpath query.
# File lib/feed_tools/feed_item.rb, line 220 220: def find_all_nodes(xpath, select_result_value=false) 221: if self.feed_data_type != :xml 222: raise "The feed data type is not xml." 223: end 224: return FeedTools::XmlHelper.try_xpaths_all(self.root_node, [xpath], 225: :select_result_value => select_result_value) 226: end
Returns the first node within the root_node that matches the xpath query.
# File lib/feed_tools/feed_item.rb, line 211 211: def find_node(xpath, select_result_value=false) 212: if self.feed_data_type != :xml 213: raise "The feed data type is not xml." 214: end 215: return FeedTools::XmlHelper.try_xpaths(self.root_node, [xpath], 216: :select_result_value => select_result_value) 217: end
Does a full parse of the feed item.
# File lib/feed_tools/feed_item.rb, line 87 87: def full_parse 88: self.configurations 89: 90: self.encoding 91: self.xml_document 92: self.root_node 93: 94: self.feed_type 95: self.feed_version 96: 97: self.id 98: self.title 99: self.content 100: self.summary 101: self.links 102: self.link 103: self.comments 104: self.time 105: self.updated 106: self.published 107: self.source 108: self.categories 109: self.tags 110: self.images 111: self.rights 112: self.author 113: self.publisher 114: 115: self.itunes_summary 116: self.itunes_subtitle 117: self.itunes_image_link 118: self.itunes_author 119: self.itunes_duration 120: 121: self.media_text 122: self.media_thumbnail_link 123: 124: self.explicit? 125: end
Returns the feed items‘s unique id
# File lib/feed_tools/feed_item.rb, line 267 267: def id 268: if @id.nil? 269: @id = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 270: "atom10:id/@gr:original-id", 271: "atom03:id/@gr:original-id", 272: "atom:id/@gr:original-id", 273: "id/@gr:original-id", 274: "atom10:id/text()", 275: "atom03:id/text()", 276: "atom:id/text()", 277: "id/text()", 278: "guid/text()" 279: ], :select_result_value => true) 280: end 281: return @id 282: end
Returns a list of the feed items‘s images
# File lib/feed_tools/feed_item.rb, line 773 773: def images 774: if @images.nil? 775: @images = [] 776: image_nodes = FeedTools::XmlHelper.try_xpaths_all(self.root_node, [ 777: "image", 778: "logo", 779: "apple-wallpapers:image", 780: "imageUrl" 781: ]) 782: unless image_nodes.blank? 783: for image_node in image_nodes 784: image = FeedTools::Image.new 785: image.href = FeedTools::XmlHelper.try_xpaths(image_node, [ 786: "url/text()", 787: "@rdf:resource", 788: "@href", 789: "@url", 790: "text()" 791: ], :select_result_value => true) 792: if image.href.nil? && image_node.base_uri != nil 793: image.href = "" 794: end 795: begin 796: if !(image.href =~ /^file:/) && 797: !FeedTools::UriHelper.is_uri?(image.href) 798: stored_base_uri = 799: FeedTools::GenericHelper.recursion_trap(:feed_link) do 800: self.base_uri if self.feed != nil 801: end 802: image.href = FeedTools::UriHelper.resolve_relative_uri( 803: image.href, [image_node.base_uri, stored_base_uri]) 804: end 805: rescue 806: end 807: if self.configurations[:url_normalization_enabled] 808: image.href = FeedTools::UriHelper.normalize_url(image.href) 809: end 810: image.href.strip! unless image.href.nil? 811: next if image.href.blank? 812: image.title = FeedTools::XmlHelper.try_xpaths(image_node, 813: ["title/text()"], :select_result_value => true) 814: image.title.strip! unless image.title.nil? 815: image.description = FeedTools::XmlHelper.try_xpaths(image_node, 816: ["description/text()"], :select_result_value => true) 817: image.description.strip! unless image.description.nil? 818: image.link = FeedTools::XmlHelper.try_xpaths(image_node, 819: ["link/text()"], :select_result_value => true) 820: image.link.strip! unless image.link.nil? 821: image.height = FeedTools::XmlHelper.try_xpaths(image_node, 822: ["height/text()"], :select_result_value => true).to_i 823: image.height = nil if image.height <= 0 824: image.width = FeedTools::XmlHelper.try_xpaths(image_node, 825: ["width/text()"], :select_result_value => true).to_i 826: image.width = nil if image.width <= 0 827: image.style = FeedTools::XmlHelper.try_xpaths(image_node, [ 828: "style/text()", 829: "@style" 830: ], :select_result_value => true) 831: image.style.strip! unless image.style.nil? 832: image.style.downcase! unless image.style.nil? 833: @images << image unless image.url.nil? 834: end 835: end 836: for link_object in self.links 837: if link_object.type != nil && link_object.type =~ /^image/ 838: image = FeedTools::Image.new 839: image.href = link_object.href 840: image.title = link_object.title 841: @images << image unless image.href.nil? 842: end 843: end 844: end 845: return @images 846: end
Returns the contents of the itunes:author element
This inherits from any incorrectly placed channel-level itunes:author elements. They‘re actually amazingly common. People don‘t read specs.
# File lib/feed_tools/feed_item.rb, line 1672 1672: def itunes_author 1673: if @itunes_author.nil? 1674: @itunes_author = FeedTools::HtmlHelper.unescape_entities( 1675: FeedTools::XmlHelper.try_xpaths(self.root_node, 1676: ["itunes:author/text()"], :select_result_value => true)) 1677: if @itunes_author.blank? 1678: parent_feed = self.feed 1679: if parent_feed != nil 1680: @itunes_author = parent_feed.itunes_author 1681: end 1682: end 1683: end 1684: return @itunes_author 1685: end
Returns the number of seconds that the associated media runs for
# File lib/feed_tools/feed_item.rb, line 1693 1693: def itunes_duration 1694: if @itunes_duration.nil? 1695: raw_duration = FeedTools::HtmlHelper.unescape_entities( 1696: FeedTools::XmlHelper.try_xpaths(self.root_node, 1697: ["itunes:duration/text()"], :select_result_value => true)) 1698: if !raw_duration.blank? 1699: hms = raw_duration.split(":").map { |x| x.to_i } 1700: if hms.size == 3 1701: @itunes_duration = hms[0].hours + hms[1].minutes + hms[2] 1702: elsif hms.size == 2 1703: @itunes_duration = hms[0].minutes + hms[1] 1704: elsif hms.size == 1 1705: @itunes_duration = hms[0] 1706: end 1707: end 1708: end 1709: return @itunes_duration 1710: end
Sets the number of seconds that the associate media runs for
# File lib/feed_tools/feed_item.rb, line 1713 1713: def itunes_duration=(new_itunes_duration) 1714: @itunes_duration = new_itunes_duration 1715: end
Returns the feed item itunes image link
# File lib/feed_tools/feed_item.rb, line 849 849: def itunes_image_link 850: if @itunes_image_link.nil? 851: @itunes_image_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 852: "itunes:image/@href", 853: "itunes:link[@rel='image']/@href" 854: ], :select_result_value => true) 855: if self.configurations[:url_normalization_enabled] 856: @itunes_image_link = 857: FeedTools::UriHelper.normalize_url(@itunes_image_link) 858: end 859: end 860: return @itunes_image_link 861: end
Returns the contents of the itunes:subtitle element
# File lib/feed_tools/feed_item.rb, line 697 697: def itunes_subtitle 698: if @itunes_subtitle.nil? 699: @itunes_subtitle = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 700: "itunes:subtitle/text()" 701: ], :select_result_value => true) 702: unless @itunes_subtitle.blank? 703: @itunes_subtitle = 704: FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle) 705: @itunes_subtitle = 706: FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle) 707: @itunes_subtitle.strip! 708: else 709: @itunes_subtitle = nil 710: end 711: end 712: return @itunes_subtitle 713: end
Sets the contents of the itunes:subtitle element
# File lib/feed_tools/feed_item.rb, line 716 716: def itunes_subtitle=(new_itunes_subtitle) 717: @itunes_subtitle = new_itunes_subtitle 718: end
Returns the contents of the itunes:summary element
# File lib/feed_tools/feed_item.rb, line 673 673: def itunes_summary 674: if @itunes_summary.nil? 675: @itunes_summary = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 676: "itunes:summary/text()" 677: ], :select_result_value => true) 678: unless @itunes_summary.blank? 679: @itunes_summary = 680: FeedTools::HtmlHelper.unescape_entities(@itunes_summary) 681: @itunes_summary = 682: FeedTools::HtmlHelper.sanitize_html(@itunes_summary) 683: @itunes_summary.strip! 684: else 685: @itunes_summary = nil 686: end 687: end 688: return @itunes_summary 689: end
# File lib/feed_tools/feed_item.rb, line 557 557: def link 558: if @link.nil? 559: max_score = 0 560: for link_object in self.links.reverse 561: score = 0 562: if FeedTools::HtmlHelper.html_type?(link_object.type) 563: score = score + 2 564: elsif link_object.type != nil 565: score = score - 1 566: end 567: if FeedTools::HtmlHelper.xml_type?(link_object.type) 568: score = score + 1 569: end 570: if link_object.type =~ /^video/ && self.links.size == 1 571: score = score + 1 572: elsif link_object.type =~ /^audio/ && self.links.size == 1 573: score = score + 1 574: end 575: if link_object.rel == "alternate" 576: score = score + 1 577: end 578: if link_object.rel == "self" 579: score = score - 1 580: end 581: if score >= max_score 582: max_score = score 583: @link = link_object.href 584: end 585: end 586: if @link.blank? 587: @link = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 588: "@href", 589: "@rdf:about", 590: "@about" 591: ], :select_result_value => true) 592: end 593: if @link.blank? 594: if FeedTools::UriHelper.is_uri?(self.id) && 595: (self.id =~ /^http/) 596: @link = self.id 597: end 598: end 599: if !@link.blank? 600: @link = FeedTools::HtmlHelper.unescape_entities(@link) 601: end 602: @link = self.comments if @link.blank? 603: @link = nil if @link.blank? 604: begin 605: if !(@link =~ /^file:/) && 606: !FeedTools::UriHelper.is_uri?(@link) 607: stored_base_uri = 608: FeedTools::GenericHelper.recursion_trap(:feed_link) do 609: self.base_uri if self.feed != nil 610: end 611: root_base_uri = nil 612: unless self.root_node.nil? 613: root_base_uri = self.root_node.base_uri 614: end 615: @link = FeedTools::UriHelper.resolve_relative_uri( 616: @link, [root_base_uri,stored_base_uri]) 617: end 618: rescue 619: end 620: if self.configurations[:url_normalization_enabled] 621: @link = FeedTools::UriHelper.normalize_url(@link) 622: end 623: end 624: return @link 625: end
Returns the links collection
# File lib/feed_tools/feed_item.rb, line 436 436: def links 437: if @links.nil? 438: @links = [] 439: link_nodes = 440: FeedTools::XmlHelper.combine_xpaths_all(self.root_node, [ 441: "atom10:link", 442: "atom03:link", 443: "atom:link", 444: "link", 445: "a", 446: "url", 447: "href" 448: ]) 449: for link_node in link_nodes 450: link_object = FeedTools::Link.new 451: link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [ 452: "@atom10:href", 453: "@atom03:href", 454: "@atom:href", 455: "@href", 456: "@url", 457: "text()" 458: ], :select_result_value => true) 459: if link_object.href.nil? && link_node.base_uri != nil 460: link_object.href = "" 461: end 462: begin 463: if !(link_object.href =~ /^file:/) && 464: !FeedTools::UriHelper.is_uri?(link_object.href) 465: stored_base_uri = 466: FeedTools::GenericHelper.recursion_trap(:feed_link) do 467: self.base_uri if self.feed != nil 468: end 469: link_object.href = FeedTools::UriHelper.resolve_relative_uri( 470: link_object.href, 471: [link_node.base_uri, stored_base_uri]) 472: end 473: rescue 474: end 475: if self.configurations[:url_normalization_enabled] 476: link_object.href = 477: FeedTools::UriHelper.normalize_url(link_object.href) 478: end 479: link_object.href.strip! unless link_object.href.nil? 480: next if link_object.href.blank? 481: link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [ 482: "@atom10:hreflang", 483: "@atom03:hreflang", 484: "@atom:hreflang", 485: "@hreflang" 486: ], :select_result_value => true) 487: unless link_object.hreflang.nil? 488: link_object.hreflang = link_object.hreflang.downcase 489: end 490: link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [ 491: "@atom10:rel", 492: "@atom03:rel", 493: "@atom:rel", 494: "@rel" 495: ], :select_result_value => true) 496: unless link_object.rel.nil? 497: link_object.rel = link_object.rel.downcase 498: end 499: link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [ 500: "@atom10:type", 501: "@atom03:type", 502: "@atom:type", 503: "@type" 504: ], :select_result_value => true) 505: unless link_object.type.nil? 506: link_object.type = link_object.type.downcase 507: end 508: link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [ 509: "@atom10:title", 510: "@atom03:title", 511: "@atom:title", 512: "@title", 513: "text()" 514: ], :select_result_value => true) 515: # This catches the ambiguities between atom, rss, and cdf 516: if link_object.title == link_object.href 517: link_object.title = nil 518: end 519: link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [ 520: "@atom10:length", 521: "@atom03:length", 522: "@atom:length", 523: "@length" 524: ], :select_result_value => true) 525: if !link_object.length.nil? 526: link_object.length = link_object.length.to_i 527: else 528: if !link_object.type.nil? && link_object.type[0..4] != "text" && 529: link_object.type[-3..-1] != "xml" && 530: link_object.href =~ /^http:\/\// 531: # Retrieve the length with an http HEAD request 532: else 533: link_object.length = nil 534: end 535: end 536: @links << link_object 537: end 538: if @links.empty? && self.enclosures.size > 0 539: # If there's seriously nothing to link to, but there's enclosures 540: # available, then add a link to the first one. 541: enclosure_link = self.enclosures[0] 542: link_object = FeedTools::Link.new 543: link_object.href = enclosure_link.url 544: link_object.type = enclosure_link.type 545: @links << link_object 546: end 547: end 548: return @links 549: end
Returns the contents of the media:text element
# File lib/feed_tools/feed_item.rb, line 721 721: def media_text 722: if @media_text.nil? 723: @media_text = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 724: "media:text/text()" 725: ], :select_result_value => true) 726: unless @media_text.blank? 727: @media_text = FeedTools::HtmlHelper.unescape_entities(@media_text) 728: @media_text = FeedTools::HtmlHelper.sanitize_html(@media_text) 729: @media_text.strip! 730: else 731: @media_text = nil 732: end 733: end 734: return @media_text 735: end
Sets the contents of the media:text element
# File lib/feed_tools/feed_item.rb, line 738 738: def media_text=(new_media_text) 739: @media_text = new_media_text 740: end
Returns the feed item media thumbnail link
# File lib/feed_tools/feed_item.rb, line 869 869: def media_thumbnail_link 870: if @media_thumbnail_link.nil? 871: @media_thumbnail_link = FeedTools::XmlHelper.try_xpaths( 872: self.root_node, [ 873: "media:thumbnail/@url" 874: ], :select_result_value => true) 875: if self.configurations[:url_normalization_enabled] 876: @media_thumbnail_link = 877: FeedTools::UriHelper.normalize_url(@media_thumbnail_link) 878: end 879: end 880: return @media_thumbnail_link 881: end
Returns the feed item published time
# File lib/feed_tools/feed_item.rb, line 1864 1864: def published 1865: if @published.nil? 1866: published_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1867: "atom10:issued/text()", 1868: "atom03:issued/text()", 1869: "atom:issued/text()", 1870: "issued/text()", 1871: "atom10:published/text()", 1872: "atom03:published/text()", 1873: "atom:published/text()", 1874: "published/text()", 1875: "dc:date/text()", 1876: "pubDate/text()", 1877: "date/text()" 1878: ], :select_result_value => true) 1879: if !published_string.blank? 1880: @published = Time.parse(published_string).gmtime rescue nil 1881: else 1882: @published = nil 1883: end 1884: end 1885: return @published 1886: end
# File lib/feed_tools/feed_item.rb, line 1579 1579: def publisher 1580: if @publisher.nil? 1581: @publisher = FeedTools::Author.new 1582: 1583: # Set the author name 1584: @publisher.raw = FeedTools::HtmlHelper.unescape_entities( 1585: FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1586: "dc:publisher/text()", 1587: "webMaster/text()" 1588: ], :select_result_value => true)) 1589: unless @publisher.raw.blank? 1590: raw_scan = @publisher.raw.scan( 1591: /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i) 1592: if raw_scan.nil? || raw_scan.size == 0 1593: raw_scan = @publisher.raw.scan( 1594: /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i) 1595: unless raw_scan.size == 0 1596: publisher_raw_pair = raw_scan.first.reverse 1597: end 1598: else 1599: publisher_raw_pair = raw_scan.first 1600: end 1601: if raw_scan.nil? || raw_scan.size == 0 1602: email_scan = @publisher.raw.scan( 1603: /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i) 1604: if email_scan != nil && email_scan.size > 0 1605: @publisher.email = email_scan.first.strip 1606: end 1607: end 1608: unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0 1609: @publisher.name = publisher_raw_pair.first.strip 1610: @publisher.email = publisher_raw_pair.last.strip 1611: else 1612: unless @publisher.raw.include?("@") 1613: # We can be reasonably sure we are looking at something 1614: # that the creator didn't intend to contain an email address if 1615: # it got through the preceeding regexes and it doesn't 1616: # contain the tell-tale '@' symbol. 1617: @publisher.name = @publisher.raw 1618: end 1619: end 1620: end 1621: 1622: @publisher.name = nil if @publisher.name.blank? 1623: @publisher.raw = nil if @publisher.raw.blank? 1624: @publisher.email = nil if @publisher.email.blank? 1625: @publisher.url = nil if @publisher.url.blank? 1626: if @publisher.url != nil 1627: begin 1628: if !(@publisher.url =~ /^file:/) && 1629: !FeedTools::UriHelper.is_uri?(@publisher.url) 1630: root_base_uri = nil 1631: unless self.root_node.nil? 1632: root_base_uri = self.root_node.base_uri 1633: end 1634: @publisher.url = FeedTools::UriHelper.resolve_relative_uri( 1635: @publisher.url, [root_base_uri, self.base_uri]) 1636: end 1637: rescue 1638: end 1639: end 1640: if @publisher.name.blank? && @publisher.email.blank? && 1641: @publisher.href.blank? 1642: parent_feed = self.feed 1643: if parent_feed != nil 1644: @publisher = parent_feed.publisher.dup 1645: end 1646: end 1647: end 1648: return @publisher 1649: end
# File lib/feed_tools/feed_item.rb, line 1652 1652: def publisher=(new_publisher) 1653: if new_publisher.respond_to?(:name) && 1654: new_publisher.respond_to?(:email) && 1655: new_publisher.respond_to?(:url) 1656: # It's a complete Author object, just set it. 1657: @publisher = new_publisher 1658: else 1659: # We're not looking at an Author object, this is probably a string, 1660: # default to setting the publisher's name. 1661: if @publisher.nil? 1662: @publisher = FeedTools::Author.new 1663: end 1664: @publisher.name = new_publisher 1665: end 1666: end
Returns the feed item‘s rights information
# File lib/feed_tools/feed_item.rb, line 889 889: def rights 890: if @rights.nil? 891: repair_entities = false 892: rights_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 893: "atom10:copyright", 894: "atom03:copyright", 895: "atom:copyright", 896: "copyright", 897: "copyrights", 898: "dc:rights", 899: "rights" 900: ]) 901: @rights = FeedTools::HtmlHelper.process_text_construct(rights_node, 902: self.feed_type, self.feed_version, [self.base_uri]) 903: if self.feed_type == "atom" || 904: self.configurations[:always_strip_wrapper_elements] 905: @rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights) 906: end 907: end 908: return @rights 909: end
Sets the root node of the feed item.
This allows namespace information to be inherited by the feed item from the feed itself. When creating individual nodes from scratch, the feed_data= method should be used instead.
# File lib/feed_tools/feed_item.rb, line 244 244: def root_node=(new_root_node) 245: @root_node = new_root_node 246: end
Returns a duplicate object suitable for serialization
# File lib/feed_tools/feed_item.rb, line 128 128: def serializable 129: self.full_parse() 130: feed_item_to_dump = self.dup 131: feed_item_to_dump.author 132: feed_item_to_dump.publisher 133: feed_item_to_dump.instance_variable_set("@xml_document", nil) 134: feed_item_to_dump.instance_variable_set("@root_node", nil) 135: return feed_item_to_dump 136: end
TODO: FIX ME! This code is completely wrong. The source that this post was based on
# File lib/feed_tools/feed_item.rb, line 1895 1895: def source 1896: if @source.nil? 1897: @source = FeedTools::Link.new 1898: @source.href = FeedTools::XmlHelper.try_xpaths( 1899: self.root_node, ["source/@url"], 1900: :select_result_value => true) 1901: @source.title = FeedTools::XmlHelper.try_xpaths( 1902: self.root_node, ["source/text()"], 1903: :select_result_value => true) 1904: end 1905: return @source 1906: end
# File lib/feed_tools/feed_item.rb, line 382 382: def summary 383: if @summary.nil? 384: repair_entities = false 385: summary_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 386: "atom10:summary", 387: "atom03:summary", 388: "atom:summary", 389: "summary", 390: "abstract", 391: "blurb", 392: "description", 393: "tagline", 394: "subtitle", 395: "xhtml:body", 396: "body", 397: "xhtml:div", 398: "div", 399: "p:payload", 400: "payload", 401: "fullitem", 402: "content:encoded", 403: "encoded", 404: "atom10:content", 405: "atom03:content", 406: "atom:content", 407: "content", 408: "info", 409: "body/datacontent" 410: ]) 411: @summary = FeedTools::HtmlHelper.process_text_construct(summary_node, 412: self.feed_type, self.feed_version, [self.base_uri]) 413: if self.feed_type == "atom" || 414: self.configurations[:always_strip_wrapper_elements] 415: @summary = FeedTools::HtmlHelper.strip_wrapper_element(@summary) 416: end 417: if @summary.blank? 418: @summary = self.media_text 419: end 420: if @summary.blank? 421: @summary = self.itunes_summary 422: end 423: if @summary.blank? 424: @summary = self.itunes_subtitle 425: end 426: end 427: return @summary 428: end
# File lib/feed_tools/feed_item.rb, line 1909 1909: def tags 1910: # TODO: support the rel="tag" microformat 1911: # ======================================= 1912: if @tags.nil? 1913: @tags = [] 1914: if root_node.nil? 1915: return @tags 1916: end 1917: if @tags.nil? || @tags.size == 0 1918: @tags = [] 1919: tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, 1920: ["dc:subject/rdf:Bag/rdf:li/text()"], 1921: :select_result_value => true) 1922: if tag_list != nil && tag_list.size > 0 1923: for tag in tag_list 1924: @tags << tag.downcase.strip 1925: end 1926: end 1927: end 1928: if @tags.nil? || @tags.size == 0 1929: # messy effort to find ourselves some tags, mainly for del.icio.us 1930: @tags = [] 1931: rdf_bag = FeedTools::XmlHelper.try_xpaths_all(self.root_node, 1932: ["taxo:topics/rdf:Bag/rdf:li"]) 1933: if rdf_bag != nil && rdf_bag.size > 0 1934: for tag_node in rdf_bag 1935: begin 1936: tag_url = FeedTools::XmlHelper.try_xpaths(tag_node, 1937: ["@resource"], 1938: :select_result_value => true) 1939: tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)$/) 1940: if tag_match.size > 0 1941: @tags << tag_match.first.last.downcase.strip 1942: end 1943: rescue 1944: end 1945: end 1946: end 1947: end 1948: if @tags.nil? || @tags.size == 0 1949: @tags = [] 1950: tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, 1951: ["category/text()"], 1952: :select_result_value => true) 1953: for tag in tag_list 1954: @tags << tag.to_s.downcase.strip 1955: end 1956: end 1957: if @tags.nil? || @tags.size == 0 1958: @tags = [] 1959: tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node, 1960: ["dc:subject/text()"], 1961: :select_result_value => true) 1962: for tag in tag_list 1963: @tags << tag.to_s.downcase.strip 1964: end 1965: end 1966: if @tags.blank? 1967: begin 1968: itunes_keywords_string = 1969: FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1970: "itunes:keywords/text()" 1971: ], :select_result_value => true) 1972: unless itunes_keywords_string.blank? 1973: @tags = itunes_keywords_string.downcase.split(",") 1974: if @tags.size == 1 1975: @tags = itunes_keywords_string.downcase.split(" ") 1976: @tags = @tags.map { |tag| tag.chomp(",") } 1977: end 1978: if @tags.size == 1 1979: @tags = itunes_keywords_string.downcase.split(",") 1980: end 1981: @tags = @tags.map { |tag| tag.strip } 1982: end 1983: rescue 1984: @tags = [] 1985: end 1986: end 1987: if @tags.nil? 1988: @tags = [] 1989: end 1990: @tags.uniq! 1991: end 1992: return @tags 1993: end
# File lib/feed_tools/feed_item.rb, line 1718 1718: def time(options = {}) 1719: FeedTools::GenericHelper.validate_options([ :estimate_timestamp ], 1720: options.keys) 1721: options = { :estimate_timestamp => true }.merge(options) 1722: if @time.nil? 1723: time_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1724: "atom10:updated/text()", 1725: "atom03:updated/text()", 1726: "atom:updated/text()", 1727: "updated/text()", 1728: "atom10:modified/text()", 1729: "atom03:modified/text()", 1730: "atom:modified/text()", 1731: "modified/text()", 1732: "time/text()", 1733: "lastBuildDate/text()", 1734: "atom10:issued/text()", 1735: "atom03:issued/text()", 1736: "atom:issued/text()", 1737: "issued/text()", 1738: "atom10:published/text()", 1739: "atom03:published/text()", 1740: "atom:published/text()", 1741: "published/text()", 1742: "dc:date/text()", 1743: "pubDate/text()", 1744: "date/text()", 1745: "lastupdated/text()" 1746: ], :select_result_value => true) 1747: begin 1748: if !time_string.blank? 1749: @time = Time.parse(time_string).gmtime 1750: elsif self.configurations[:timestamp_estimation_enabled] && 1751: !self.title.nil? && 1752: (Time.parse(self.title) - Time.now).abs > 100 1753: @time = Time.parse(self.title).gmtime 1754: end 1755: rescue 1756: end 1757: if self.configurations[:timestamp_estimation_enabled] 1758: if options[:estimate_timestamp] 1759: if @time.nil? 1760: begin 1761: @time = succ_time 1762: if @time.nil? 1763: @time = prev_time 1764: end 1765: rescue 1766: end 1767: if @time.nil? 1768: @time = Time.now.gmtime 1769: end 1770: end 1771: end 1772: end 1773: end 1774: return @time 1775: end
# File lib/feed_tools/feed_item.rb, line 290 290: def title 291: if @title.nil? 292: repair_entities = false 293: title_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 294: "atom10:title", 295: "atom03:title", 296: "atom:title", 297: "title", 298: "dc:title", 299: "headline" 300: ]) 301: @title = FeedTools::HtmlHelper.process_text_construct(title_node, 302: self.feed_type, self.feed_version, [self.base_uri]) 303: if self.feed_type == "atom" || 304: self.configurations[:always_strip_wrapper_elements] 305: @title = FeedTools::HtmlHelper.strip_wrapper_element(@title) 306: end 307: if !@title.blank? && self.configurations[:strip_comment_count] 308: # Some blogging tools include the number of comments in a post 309: # in the title... this is supremely ugly, and breaks any 310: # applications which expect the title to be static, so we're 311: # gonna strip them out. 312: # 313: # If for some incredibly wierd reason you need the actual 314: # unstripped title, just use find_node("title/text()").to_s 315: @title = @title.strip.gsub(/\[\d*\]$/, "").strip 316: end 317: @title = nil if @title.blank? 318: end 319: return @title 320: end
Returns the feed item updated time
# File lib/feed_tools/feed_item.rb, line 1835 1835: def updated 1836: if @updated.nil? 1837: updated_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [ 1838: "atom10:updated/text()", 1839: "atom03:updated/text()", 1840: "atom:updated/text()", 1841: "updated/text()", 1842: "atom10:modified/text()", 1843: "atom03:modified/text()", 1844: "atom:modified/text()", 1845: "modified/text()", 1846: "lastBuildDate/text()", 1847: "lastupdated/text()" 1848: ], :select_result_value => true) 1849: if !updated_string.blank? 1850: @updated = Time.parse(updated_string).gmtime rescue nil 1851: else 1852: @updated = nil 1853: end 1854: end 1855: return @updated 1856: end
Returns a REXML Document of the feed_data
# File lib/feed_tools/feed_item.rb, line 194 194: def xml_document 195: if @xml_document.nil? 196: return nil if self.feed_data.blank? 197: if self.feed_data_type != :xml 198: @xml_document = nil 199: else 200: # TODO: :ignore_whitespace_nodes => :all 201: # Add that? 202: # ====================================== 203: @xml_document = REXML::Document.new(self.feed_data) 204: end 205: end 206: return @xml_document 207: end