Module | FeedTools::RetrievalHelper |
In: |
lib/feed_tools/helpers/retrieval_helper.rb
|
Methods for pulling remote data
ACCEPT_HEADER | = | "application/atom+xml,application/rdf+xml," + "application/rss+xml,application/x-netcdf,application/xml;" + "q=0.9,text/xml;q=0.2,*/*;q=0.1" | Stolen from the Universal Feed Parser |
Makes an HTTP GET request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.
# File lib/feed_tools/helpers/retrieval_helper.rb, line 250 250: def self.http_get(url, options={}, &block) 251: return FeedTools::RetrievalHelper.http_request( 252: :get, url, options, &block) 253: end
Makes an HTTP POST request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.
# File lib/feed_tools/helpers/retrieval_helper.rb, line 258 258: def self.http_post(url, options={}, &block) 259: return FeedTools::RetrievalHelper.http_request( 260: :post, url, options, &block) 261: end
Makes an HTTP request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.
# File lib/feed_tools/helpers/retrieval_helper.rb, line 43 43: def self.http_request(http_operation, url, options={}, &block) 44: response = nil 45: 46: options = { 47: :feed_object => nil, 48: :form_data => nil, 49: :request_headers => {}, 50: :follow_redirects => true, 51: :redirect_limit => 10, 52: :response_chain => [] 53: }.merge(options) 54: 55: if options[:redirect_limit] == 0 56: raise FeedAccessError, 'Redirect too deep' 57: end 58: 59: if options[:response_chain].blank? || 60: !options[:response_chain].kind_of?(Array) 61: options[:response_chain] = [] 62: end 63: 64: if !options[:request_headers].kind_of?(Hash) 65: options[:request_headers] = {} 66: end 67: if !options[:form_data].kind_of?(Hash) 68: options[:form_data] = nil 69: end 70: 71: if options[:request_headers].blank? && options[:feed_object] != nil 72: options[:request_headers] = {} 73: unless options[:feed_object].http_headers.nil? 74: unless options[:feed_object].http_headers['etag'].nil? 75: options[:request_headers]["If-None-Match"] = 76: options[:feed_object].http_headers['etag'] 77: end 78: unless options[:feed_object].http_headers['last-modified'].nil? 79: options[:request_headers]["If-Modified-Since"] = 80: options[:feed_object].http_headers['last-modified'] 81: end 82: end 83: unless options[:feed_object].configurations[:user_agent].nil? 84: options[:request_headers]["User-Agent"] = 85: options[:feed_object].configurations[:user_agent] 86: end 87: end 88: if options[:request_headers]["Accept"].nil? 89: options[:request_headers]["Accept"] = 90: FeedTools::RetrievalHelper::ACCEPT_HEADER 91: end 92: if options[:request_headers]["User-Agent"].nil? 93: options[:request_headers]["User-Agent"] = 94: FeedTools.configurations[:user_agent] 95: end 96: 97: uri = nil 98: begin 99: uri = URI.parse(url) 100: rescue URI::InvalidURIError 101: # Uh, maybe try to fix it? 102: uri = URI.parse(FeedTools::UriHelper.normalize_url(url)) 103: end 104: 105: begin 106: proxy_address = nil 107: proxy_port = nil 108: proxy_user = nil 109: proxy_password = nil 110: 111: auth_user = nil 112: auth_password = nil 113: auth_scheme = nil 114: 115: if options[:feed_object] != nil 116: proxy_address = 117: options[:feed_object].configurations[:proxy_address] || nil 118: proxy_port = 119: options[:feed_object].configurations[:proxy_port].to_i || nil 120: proxy_user = 121: options[:feed_object].configurations[:proxy_user] || nil 122: proxy_password = 123: options[:feed_object].configurations[:proxy_password] || nil 124: 125: auth_user = 126: options[:feed_object].configurations[:auth_user] || nil 127: auth_password = 128: options[:feed_object].configurations[:auth_password] || nil 129: auth_scheme = 130: options[:feed_object].configurations[:auth_scheme] || nil 131: end 132: 133: if (auth_user && 134: (auth_scheme == nil || auth_scheme.to_s.to_sym == :basic)) 135: options[:request_headers]["Authorization"] = 136: "Basic " + [ 137: "#{auth_user}:#{auth_password}" 138: ].pack('m').delete("\r\n") 139: end 140: 141: # No need to check for nil 142: http = Net::HTTP::Proxy( 143: proxy_address, proxy_port, proxy_user, proxy_password).new( 144: uri.host, (uri.port or 80)) 145: 146: if options[:feed_object] != nil && 147: options[:feed_object].configurations[:http_timeout] != nil 148: http.open_timeout = 149: options[:feed_object].configurations[:http_timeout].to_f 150: elsif FeedTools.configurations[:http_timeout] != nil 151: http.open_timeout = FeedTools.configurations[:http_timeout].to_f 152: end 153: if http.open_timeout != nil && http.open_timeout == 0 154: http.open_timeout = nil 155: end 156: 157: path = uri.path 158: path += ('?' + uri.query) if uri.query 159: 160: request_params = [path, options[:request_headers]] 161: if http_operation == :post 162: options[:form_data] = {} if options[:form_data].blank? 163: request_params << options[:form_data] 164: end 165: Thread.pass 166: response = http.send(http_operation, *request_params) 167: Thread.pass 168: 169: case response 170: when Net::HTTPSuccess 171: if options[:feed_object] != nil 172: # We've reached the final destination, process all previous 173: # redirections, and see if we need to update the url. 174: for redirected_response in options[:response_chain] 175: if redirected_response.last.code.to_i == 301 176: # Reset the cache object or we may get duplicate entries 177: 178: # TODO: verify this line is necessary! 179: #============================================================================= 180: options[:feed_object].cache_object = nil 181: 182: options[:feed_object].href = 183: redirected_response.last['location'] 184: else 185: # Jump out as soon as we hit anything that isn't a 186: # permanently moved redirection. 187: break 188: end 189: end 190: end 191: when Net::HTTPNotModified 192: # Do nothing, we just don't want it processed as a redirection 193: when Net::HTTPRedirection 194: if response['location'].nil? 195: raise FeedAccessError, 196: "No location to redirect to supplied for " + response.code 197: end 198: options[:response_chain] << [url, response] 199: 200: redirected_location = response['location'] 201: redirected_location = FeedTools::UriHelper.resolve_relative_uri( 202: redirected_location, [uri.to_s]) 203: 204: if options[:response_chain].assoc(redirected_location) != nil 205: raise FeedAccessError, 206: "Redirection loop detected: #{redirected_location}" 207: end 208: 209: # Let the block handle redirects 210: follow_redirect = true 211: if block != nil 212: follow_redirect = block.call(redirected_location, response) 213: end 214: 215: if follow_redirect 216: response = FeedTools::RetrievalHelper.http_request( 217: http_operation, 218: redirected_location, 219: options.merge( 220: {:redirect_limit => (options[:redirect_limit] - 1)}), 221: &block) 222: end 223: end 224: rescue SocketError 225: raise FeedAccessError, 'Socket error prevented feed retrieval' 226: rescue Timeout::Error, Errno::ETIMEDOUT 227: raise FeedAccessError, 'Timeout while attempting to retrieve feed' 228: rescue Errno::ENETUNREACH 229: raise FeedAccessError, 'Network was unreachable' 230: rescue Errno::ECONNRESET 231: raise FeedAccessError, 'Connection was reset by peer' 232: end 233: 234: if response != nil 235: class << response 236: def response_chain 237: return @response_chain 238: end 239: end 240: response.instance_variable_set("@response_chain", 241: options[:response_chain]) 242: end 243: 244: return response 245: end
Makes an HTTP HEAD request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.
# File lib/feed_tools/helpers/retrieval_helper.rb, line 266 266: def http_head(url, options={}, &block) 267: return FeedTools::RetrievalHelper.http_request( 268: :head, url, options, &block) 269: end