#!/usr/bin/env ruby # # wordpress_to_hobix.rb # version 0.3 # friends, romans, wordpress, lend me your posts # (c) 2005 bitserf of http://blog.xeraph.org # # modified and improved by clinton r. nixon # # CHANGELOG # 0.3 # - now gets comments! May not work well! Who knows! # # 0.2 # - html gets textiled by angry monkeys only if $insane is true # - posts are put into their old categories. WordPress allows multiple categories # and we don't, so I use the smallest numbered category. anyway. # - we use the hobix entry maker now instead of string-cramming up some yaml action, so # that's good require 'fileutils' require 'mysql' require 'optparse' require 'hobix' require 'hobix/entry' require 'parsedate' require 'yaml' # global defaults $hobix_author = 'you' $db_host = 'localhost' $db_user = 'you' $db_passwd = '-' $db_db = 'yourdb' $insane = false $categories = true $comments = true class String def ucfirst return self if !self || self.empty? self[0..0].upcase + self[1..-1] end end class PersuaderTron def initialize(row) @row = row end def hobix_author $hobix_author end def hobix_title_name a = @row['post_title'].split a1 = a[1..-1] (a[0] + a1.map{|x| x.ucfirst}.join("")).gsub(/[^a-zA-Z0-9]+/, "") end def hobix_title @row['post_title'] end def hobix_created date = @row['post_date'] res = ParseDate.parsedate("#{date} +00:00") Time.local(*res) end def hobix_summary hobixify(@row['post_excerpt']) end def hobix_content hobixify(@row['post_content']) end def hobixify(content) return nil unless content && !content.empty? content.gsub!(/\r/, '') # wordpress likes this crap! content = butt_nasty_html_to_textile(content) if $insane content.split(/\n/).map{|x| " #{x}"}.join("\n") end def butt_nasty_html_to_textile(content) # cheap and nasty HTML-to-textile: blockquotes, lists and anything # complex is not handled content.gsub!(/(.*?)<\/b>/m, '*\1*') content.gsub!(/(.*?)<\/strong>/m, '*\1*') content.gsub!(/(.*?)<\/em>/m, '_\1_') content.gsub!(/(.*?)<\/i>/m, '_\1_') content.gsub!(/(.*?)<\/code>/m, '@\1@') while m = content.match(/
(.*?)<\/pre>/m)
      block = $1
      break if block =~ /^\s*/m
      block = block.split(/\n/).map{|x| "  #{x}"}.join("\n")
      break unless content.sub!(/
(.*?)<\/pre>/m, "
\n\n#{block}\n\n
") end content.gsub!(/(.*?)<\/tt>/m, '@\1@') content.gsub!(/(.*?)<\/del>/m, '-\1-') content.gsub!(/(.*?)<\/ins>/m, '+\1+') content.gsub!(/(.*?)<\/sup>/m, '^\1^') content.gsub!(/(.*?)<\/sub>/m, '~\1~') content.gsub!(/

(.*?)<\/p>/m, '\1') content.gsub!(/(.*?)<\/h1>/m, 'h\1. \2') end def coerce(outdir) entry = Hobix::Entry.new() entry.title = hobix_title entry.author = hobix_author entry.created = hobix_created entry.summary = hobix_summary if hobix_summary entry.content = hobix_content FileUtils.mkdir_p outdir outfile = (hobix_title_name ? hobix_title_name : String.new) + ".yaml" yamlfile = File.join(outdir, outfile) puts "writing: #{yamlfile}" modestr = "w+" File.open(yamlfile, modestr) do |f| f.write(entry.to_yaml + "\n") end end end class Comment def initialize(title, author, created, content) @title = title @author = author res = ParseDate.parsedate("#{created} +00:00") @created = Time.local(*res) @content = content end def to_yaml_type "!hobix.com,2004/entry" end def to_yaml_properties [ '@title', '@author', '@created', '@content' ] end end # lets blow this bitch parser = OptionParser.new do |o| o.banner = "usage: #{$0} [options] OUTPUTDIR" o.separator "mysql options:" o.on("-h", "--host HOST", "mysql host") {|h| $db_host = h} o.on("-u", "--user USER", "mysql username") {|u| $db_user = u} o.on("-p", "--password PASSWORD", "mysql password") {|p| $db_passwd = p} o.on("-d", "--database DATABASE", "mysql database (default: #{$db_db})") {|d| $db_db = d} o.separator "hobix options:" o.on("-a", "--author AUTHOR", "force all posts to have this author (default: #{$hobix_author})") {|a| $hobix_author = a} o.on("--help", "show this screen") {puts o; exit 0} end args = ARGV parser.parse!(args) outdir = args.shift if outdir.nil? $stderr.puts parser exit 1 end mysql = Mysql.init begin mysql.connect($db_host, $db_user, $db_passwd, $db_db) rescue Mysql::Error $stderr.puts "error: failed to connect to '#{$db_db}' database: #{$!}" exit 1 end begin mysql.query("SELECT wp_posts.id, wp_posts.post_date, wp_posts.post_title, wp_posts.post_excerpt, wp_posts.post_content, wp_categories.category_nicename AS category_name FROM wp_posts JOIN (SELECT wp_post2cat.post_id, MAX(wp_post2cat.category_id) AS cat_id FROM wp_post2cat, wp_posts WHERE wp_post2cat.post_id = wp_posts.id GROUP BY wp_post2cat.post_id) foo ON wp_posts.id = foo.post_id JOIN wp_categories ON foo.cat_id = wp_categories.cat_id WHERE wp_posts.post_status = 'publish' ORDER BY wp_posts.post_date").each_hash do |row| tron = PersuaderTron.new(row) dir = outdir dir += "/#{row['category_name']}" if $categories tron.coerce(dir) if $comments then comments = Array.new mysql.query("SELECT wp_comments.comment_post_id, wp_posts.post_title, wp_comments.comment_author, wp_comments.comment_author_email, wp_comments.comment_author_url, wp_comments.comment_date, wp_comments.comment_content FROM wp_comments JOIN wp_posts ON wp_comments.comment_post_id = wp_posts.id WHERE wp_comments.comment_post_id = '#{row['id']}' AND wp_posts.post_status = 'publish' ORDER BY wp_comments.comment_post_id, wp_comments.comment_date").each_hash do |row2| # redo comments as arrays of comments in YAML. # use Comment class above comments.push(Comment.new(row2['post_title'], row2['comment_author'], row2['comment_date'], row2['comment_content'])) end outfile = tron.hobix_title_name + ".comments" yamlfile = File.join(dir, outfile) if comments.length > 0 then puts "writing: #{yamlfile}" modestr = "w+" File.open(yamlfile, modestr) do |f| f.write(comments.to_yaml + "\n") end end end end ensure mysql.close end