#!/usr/local/bin/ruby # mailfeed.rb # reads a pop3 account and generates an RSS 2.0 feed from its messages. # # any feedback appreciated. # # by Martin Dittus, martin@dekstop.de, 2005-11-24 # last change: 2005-12-09 # ========= # = ABOUT = # ========= # # - the script currently downloads and displays _all_ mails in your mail account every time # you request the generated feed. there is no caching of content. you need to manually # delete old mails to speed up the script (it gets especially slow when you receive large # attachments, images etc.) # # - multipart messages (e.g., HTML mails and mails with attachments) are handled as follows: # - all parts with content-type=text/plain get chained together sequentially as one message # - all other content types (text/html, image/jpeg, ...) are ignored # # - this doesn't yet support ssl access (ssl support will only be introduced to # net/pop with ruby 1.9), which means GMail is not supported as mail storage. # # =========== # = INSTALL = # =========== # # - make sure your webserver has the RMail gem # - edit settings in the "prefs" section below. at minimum you need to edit the @mailprefs # hash to reflect your email account (server, username, password). the @feedprefs hash # contains strings that are used to create the actual RSS feed; adjust to your liking. # - copy the file to your webserver, make it executable (some webservers require a .cgi # filename suffix) # - take proper security precautions if you don't want the world to see this feed -- e.g., # instruct your webserver to require HTTP authentication to access this feed. # - done. # # ======== # = TODO = # ======== # # - include support for "smart" history inclusion (but make sure to show all new mail # since the last access -- i.e., include support for conditional HTTP GET/HEAD requests) # - allow HTML mails as an option (but take proper precautions) # - don't download binary attachments of multipart mails # - properly decode mail header strings (this is missing in RMail) -- there currently # is a naive workaround for the "Subject:" header using rfc2047.rb, but that's not # very elegant (and hence commented out for now) # - use a mailto: - link as item URL # - handle empty subjects # - catch exceptions and exit gracefully # require 'iconv' require 'time' require 'net/pop' require "rexml/document" #require 'rfc2047' require 'rubygems' require_gem 'rmail' # ========= # = prefs = # ========= @prefs = { :default_mail_charset => 'iso-8859-15', :output_charset => 'utf-8' } # edit these to match your mail account @mailprefs = { :server => 'mail.yourdomain.com', :port => 110, :uname => 'your_username', :pwd => 'your_password' } # edit these to change the feed properties @feedprefs = { :title => 'mailfeed.rb', :link => 'http://dekstop.de/mailfeed/', :description => 'RSS feed generated from an email account.', :language => 'en-us', :contact => 'http://dekstop.de/contact/', :guid_prefix => 'http://dekstop.de/mailfeed/ ', :max_entries => 20 } # RSS 2.0 template rssheader = " #{ @feedprefs[:title] } #{ @feedprefs[:link] } #{ @feedprefs[:description] } #{ @feedprefs[:language] } http://blogs.law.harvard.edu/tech/rss MicroLink 5.6 (NewsletterFeed) #{ @feedprefs[:contact] } #{ @feedprefs[:contact] } " # ======== # = subs = # ======== # attempts to determine the charset for a given "Content-type"-header value # returns default_charset if no charset encoding is found def get_charset(content_type_value, default_charset) if (content_type_value != nil) match = content_type_value.match(/charset="?([^"\;]*)"?\;?/) return match[1] if (match != nil) end return default_charset end # basic HTML markup -- substitutes newlines for HTML-linebreaks and paragraphs def html_markup(text) text.gsub!(//, '>') '

' + text.split(/\n\n/).map { |paragraph| paragraph.gsub(/\n/, "
\n") }.join("

\n

") + '

' end # creates an xml node element def create_node(name, value) node = REXML::Element.new(name) node.text = value node end # ======== # = main = # ======== puts "Content-type: text/xml; charset=#{ @prefs[:output_charset] }\n\n" rssdoc = REXML::Document.new(rssheader) rsschannel = rssdoc.elements['rss/channel'] # insert pubDate and lastBuildDate elements rsschannel << create_node('pubDate', Time.now.rfc2822) rsschannel << create_node('lastBuildDate', Time.now.rfc2822) # login to pop server pop = Net::POP3.new(@mailprefs[:server], @mailprefs[:port]) pop.start(@mailprefs[:uname], @mailprefs[:pwd]) { |pop| if pop.mails.empty? #puts 'No mail.' else #puts "#{pop.mails.size} mails" popmails = pop.mails.reverse popmails = popmails[0, @feedprefs[:max_entries]] if popmails.length > @feedprefs[:max_entries] popmails.each do |mailitem| uid = @feedprefs[:guid_prefix] + mailitem.unique_id # extract header, body mail = nil if (mailitem.pop.match(/\r/)) # RMail chokes when confronted with \r characters mail = RMail::Parser.read(mailitem.pop.gsub(/\r?\n/, "\n")) else mail = RMail::Parser.read(mailitem.pop) end # prepare charset conversions charset = get_charset(mail.header['Content-Type'], @prefs[:default_mail_charset]) conv = Iconv.new(@prefs[:output_charset], charset) # parse header # TODO: determine charset, e.g. "Content-Type: text/plain; charset=ISO-8859-1" from = conv.iconv(mail.header.from.first.format) subject = conv.iconv(mail.header.subject) #subject = Rfc2047.decode_to(@prefs[:output_charset], mail.header.subject) date = mail.header.date.rfc2822 # decode body message_str = nil if (mail.multipart?) message_str = "(This is a multipart message -- some parts may not be displayed.)\n\n" mail.body.each { |part| if (part.header.content_type == 'text/plain') message_str << conv.iconv(part.decode) << "\n\n" end } else message_str = conv.iconv(mail.decode) end message = REXML::CData.new(html_markup(message_str)) # create rss item rssitem = REXML::Document.new('').root rssitem << create_node('title', subject) rssitem << create_node('link', @feedprefs[:link]) rssitem << create_node('description', message) rssitem << create_node('author', from) rssitem << create_node('pubDate', date) rssitem << create_node('guid', uid) # insert item to feed rsschannel << rssitem end end } xml = "" rssdoc.write(xml, 0) # auto-indent puts xml