#!/usr/bin/env ruby
###
# parse_lumpley.rb
#
# A script to read Vincent Baker's weblog and make an RSS feed from it.
#
###
require 'net/http'
require 'cgi'
require 'rexml/document'
include REXML
class Post
attr_reader :id, :body, :date
def initialize(id, body, date)
@id = id
@body = body
@date = date
end
end
class Feed
def initialize(title, link, copyright, generator, comment_link)
@posts = []
@title = title
@link = link
@copyright = copyright
@generator = generator
@pubDate = Time.now
@comment_link = comment_link
end
def add(id, body, date)
@posts[@posts.length] = Post.new(id, body, date)
end
def to_s
xml = Document.new('')
channel = Element.new('channel')
channel.add_element(Element.new('title').add_text(@title))
channel.add_element(Element.new('link').add_text(@link))
channel.add_element(Element.new('copyright').add_text(@copyright))
channel.add_element(Element.new('generator').add_text(@generator))
channel.add_element(Element.new('pubDate').add_text(@pubDate.to_s))
xml.root[0,0] = channel
@posts.each do |post|
item = Element.new('item')
item.add_element('pubDate').add_text(post.date.to_s)
item.add_element('link').add_text(@link + "#" + post.id)
item.add_element('guid').add_text(@link + "#" + post.id)
item.add_element('comment').add_text(@comment_link + post.id)
item.add_element('description').add_text(post.body.gsub(%r{<[^>]+>}, '')[0..150] + "...")
item.add_element('content:encoded').add_text(post.body)
channel.add_element(item)
end
xml
end
end
response = Net::HTTP.get_response('www.lumpley.com', '/opine.html')
feed = Feed.new('Anyway - Lumpley\'s Weblog', 'http://www.lumpley.com/opine.html', 'Copyright 2005 Vincent Baker', 'lumpley_rss.rb', 'http://www.lumpley.com/anycomment.php?entry=')
post_num = nil
post_text = ''
post_date = nil
start = false
response.body.each do |line|
if line =~ // .. line =~ //
if line =~ //
post_num = $1
post_text = ''
post_date = nil
elsif line =~ //
print post_date
feed.add(post_num, post_text, post_date)
post_num = nil
elsif !(post_num.nil?)
if line =~ /<\/p>\s+$/
start = false
elsif line =~ /^
/
start = true
if line =~ %r{^
(\d+)-(\d+)-(\d+)
}
post_date = Time.utc(2000 + $3.to_i, $1.to_i, $2.to_i)
end
elsif start == true
post_text += line
end
end
end
end
puts "HTTP/1.0 200 OK\r\n"
puts "Content-type: text/xml\r\n\r\n"
puts feed.to_s