#!/usr/bin/env ruby ### # parse_lumpley.rb # # A script to read Vincent Baker's weblog and make an RSS feed from it. # ### require 'net/http' require 'cgi' require 'rexml/document' include REXML class Post attr_reader :id, :body, :date def initialize(id, body, date) @id = id @body = body @date = date end end class Feed def initialize(title, link, copyright, generator, comment_link) @posts = [] @title = title @link = link @copyright = copyright @generator = generator @pubDate = Time.now @comment_link = comment_link end def add(id, body, date) @posts[@posts.length] = Post.new(id, body, date) end def to_s xml = Document.new('') channel = Element.new('channel') channel.add_element(Element.new('title').add_text(@title)) channel.add_element(Element.new('link').add_text(@link)) channel.add_element(Element.new('copyright').add_text(@copyright)) channel.add_element(Element.new('generator').add_text(@generator)) channel.add_element(Element.new('pubDate').add_text(@pubDate.to_s)) xml.root[0,0] = channel @posts.each do |post| item = Element.new('item') item.add_element('pubDate').add_text(post.date.to_s) item.add_element('link').add_text(@link + "#" + post.id) item.add_element('guid').add_text(@link + "#" + post.id) item.add_element('comment').add_text(@comment_link + post.id) item.add_element('description').add_text(post.body.gsub(%r{<[^>]+>}, '')[0..150] + "...") item.add_element('content:encoded').add_text(post.body) channel.add_element(item) end xml end end response = Net::HTTP.get_response('www.lumpley.com', '/opine.html') feed = Feed.new('Anyway - Lumpley\'s Weblog', 'http://www.lumpley.com/opine.html', 'Copyright 2005 Vincent Baker', 'lumpley_rss.rb', 'http://www.lumpley.com/anycomment.php?entry=') post_num = nil post_text = '' post_date = nil start = false response.body.each do |line| if line =~ // .. line =~ // if line =~ // post_num = $1 post_text = '' post_date = nil elsif line =~ // print post_date feed.add(post_num, post_text, post_date) post_num = nil elsif !(post_num.nil?) if line =~ /<\/p>\s+$/ start = false elsif line =~ /^

/ start = true if line =~ %r{^

(\d+)-(\d+)-(\d+)
} post_date = Time.utc(2000 + $3.to_i, $1.to_i, $2.to_i) end elsif start == true post_text += line end end end end puts "HTTP/1.0 200 OK\r\n" puts "Content-type: text/xml\r\n\r\n" puts feed.to_s