#!/usr/local/bin/ruby # =sqlite3_feed_cache.rb # # Implements a custom feed cache store for FeedTools. # All data is stored in an SQLite3 database. # # This is not the most efficient implementation (the database # connection isn't kept open in between method calls), and at # times it's even a little rude (we only pretend to react on # initialize_cache, set_up_correctly?, etc) but at least it's # threadsafe (which was my reason to implement this in the # first place). # # Requires FeedTools 0.2.23 and the SQLite3 gem. # # ==Database Schema # # You need to set up the cache database yourself -- it uses the # same schema as FeedTools' DatabaseFeedCache. # # I suggest a simple change though: I'm using a +TIMESTAMP+ for # the +last_retrieved+ field (FeedTools uses +DATETIME+, but # this triggered weird timezone conversion issues on my system, # even when using the default DatabaseFeedCache). # # CREATE TABLE cached_feeds ( # id INTEGER PRIMARY KEY NOT NULL, # href VARCHAR(255) DEFAULT NULL, # title VARCHAR(255) DEFAULT NULL, # link VARCHAR(255) DEFAULT NULL, # feed_data TEXT DEFAULT NULL, # feed_data_type VARCHAR(20) DEFAULT NULL, # http_headers TEXT DEFAULT NULL, # last_retrieved TIMESTAMP DEFAULT NULL # ); # # ==Etc... # # TODO: # * try using Mutexes in SQLite3FeedCache#save instead of simply retrying n times. # # By Martin Dittus (martin_at_dekstop_dot_de), 2006-03-07 # -- last change: 2006-03-08 require 'rubygems' require_gem 'sqlite3-ruby' # =SQLite3FeedCache # # This is a transparent replacement for FeedTool's DatabaseFeedCache. # You set it up once at the start of your script/application, and then # can safely forget about it. # # ==Example # # # set up the cache # SQLite3FeedCache.set_db('./db/feeds.db') # FeedTools::configurations[:feed_cache] = SQLite3FeedCache # # # start using FeedTools # feed = FeedTools::Feed.open('http://dekstop.de/weblog/index.xml') # # See sqlite3_feed_cache.rb[link:files/sqlite3_feed_cache_rb.html] for a database schema. # class SQLite3FeedCache #-- # SQL #++ SQL_FIND_BY_ID = 'select id, href, title, link, feed_data, ' + 'feed_data_type, http_headers, last_retrieved ' + 'from cached_feeds where (id=?) order by id desc limit 1' SQL_FIND_BY_HREF = 'select id, href, title, link, feed_data, ' + 'feed_data_type, http_headers, last_retrieved ' + 'from cached_feeds where (href=?) order by id desc limit 1' SQL_INSERT_FEED = 'insert into cached_feeds (href, title, link, ' + 'feed_data, feed_data_type, http_headers, last_retrieved) values ' + '(?, ?, ?, ?, ?, ?, ?)' SQL_UPDATE_FEED = 'update cached_feeds set href=?, title=?, link=?, ' + 'feed_data=?, feed_data_type=?, http_headers=?, last_retrieved=? ' + ' where id=?' #-- # Prefs #++ # Number of times to retry writing to a busy database before giving up. # Yeah it's a large number. @@num_retries = 10 # Delay (in seconds) before retrying to write to a busy database. @@retry_delay = 1.5 #-- # class methods #++ # Set the number of times to retry writing to a busy database before # giving up. This only has an effect on the #save method. def SQLite3FeedCache.set_num_retries(num_retries) @@num_retries = num_retries end # Set the delay (in seconds) before retrying to write to a busy database. # This only has an effect on the #save method. def SQLite3FeedCache.set_retry_delay(retry_delay) @@retry_delay = retry_delay end # Specify the SQLite3 database file used for caching. You need to call # this first to set up the cache. def SQLite3FeedCache.set_db(filename) @@db_file = filename end # Required by FeedTools. # # Find a cached feed by its primary key. Returns an SQLite3FeedCache # instance, or +nil+ if no feed with this id exists in the cache. # # This is basically a static factory method using # SQLite3FeedCache#load_by_id def SQLite3FeedCache.find_by_id(id) feed = SQLite3FeedCache.new return feed.load_by_id(id) end # Required by FeedTools. # # Find a cached feed by its URL. Returns an SQLite3FeedCache instance, # or +nil+ if no feed with this URL exists in the cache. # # This is basically a static factory method using # SQLite3FeedCache#load_by_href def SQLite3FeedCache.find_by_href(href) feed = SQLite3FeedCache.new return feed.load_by_href(href) end # Required by FeedTools. # # Currently does nothing. # # TODO: implement this (and look up what we're supposed to do here: establish # a connection? Make sure that the table exists?) def SQLite3FeedCache.initialize_cache end # Required by FeedTools. # # Currently always returns +true+. # # TODO: implement this properly. At least check if we _could_ establish a # db connection def SQLite3FeedCache.connected? true end # Required by FeedTools. # # Currently always returns +true+. # # TODO: implement this properly. def SQLite3FeedCache.set_up_correctly? true end #-- # instance methods #++ # Required by FeedTools. attr_accessor :id, :href, :title, :link # Required by FeedTools. attr_accessor :feed_data, :feed_data_type # Required by FeedTools. attr_accessor :http_headers#, :last_retrieved # Required by FeedTools. attr_reader :last_retrieved def initialize @id = nil @href = nil @title = nil @link = nil @feed_data = nil @feed_data_type = nil @http_headers = nil @last_retrieved = nil @new_record = true end # Required by FeedTools. # # Takes a String or Time object. Strings will get converted to Time # internally (and are being treated as if they describe a UTC time). def last_retrieved=(last_retrieved) if (last_retrieved.class == String) # parse string... t = Time.parse(last_retrieved) # and assume this is UTC # (otherwise FeedTools' expire mechanisms will blow up) @last_retrieved = Time.utc(t.year, t.month, t.day, t.hour, t.min, t.sec) else @last_retrieved = last_retrieved end end # Required by FeedTools. # # Returns +true+ if this object has not been saved yet, i.e. if it # is not yet stored in the cache database. def new_record? @new_record end # Attempt to load a cached feed by its primary key. Returns +self+ on # success, or +nil+ if no feed with this id exists in the cache. def load_by_id(id) found = false SQLite3::Database.open(@@db_file).transaction do |db| db.prepare(SQL_FIND_BY_ID) do |id_stmt| id_stmt.execute!(id) do |line| self.id, self.href, self.title, self.link, self.feed_data, self.feed_data_type, self.http_headers, self.last_retrieved = line @new_record = false found = true end end end return (found ? self : nil) end # Attempt to load a cached feed by its URL. Returns +self+ on # success, or +nil+ if no feed with this URL exists in the cache. def load_by_href(href) found = false SQLite3::Database.open(@@db_file).transaction do |db| db.prepare(SQL_FIND_BY_HREF) do |href_stmt| href_stmt.execute!(href) do |line| self.id, self.href, self.title, self.link, self.feed_data, self.feed_data_type, self.http_headers, self.last_retrieved = line @new_record = false found = true end end end return (found ? self : nil) end # Required by FeedTools. # # Updates the feed cache, or stores a new feed in the feed cache. # If the database is locked: sleeps for +retry_delay+ seconds and # then tries again, for a total of +num_retries+ times. def save num_retries = @@num_retries begin if (@id == nil) #new entry SQLite3::Database.open(@@db_file).transaction do |db| db.prepare(SQL_INSERT_FEED) do |ins_stmt| ins_stmt.execute!( # @id, @href, @title, @link, @feed_data, @feed_data_type, @http_headers, @last_retrieved.strftime('%Y-%m-%d %H:%M:%S')) @id = db.last_insert_row_id @new_record = false end end else # update existing entry SQLite3::Database.open(@@db_file).transaction do |db| db.prepare(SQL_UPDATE_FEED) do |upd_stmt| upd_stmt.execute!(@href, @title, @link, @feed_data, @feed_data_type, @http_headers, @last_retrieved.strftime('%Y-%m-%d %H:%M:%S'), @id) # this object has ben instanciated from cache, so # @new_record never was true. #@new_record = false end end end rescue SQLite3::BusyException if ((num_retries -= 1) > 0) sleep(@@retry_delay) retry end raise $! end end end