#! ruby -Ku # mkrexml.rb version 0.8 (coding: utf-8) 2010/10/27 # Copyright (C) T. Yoshiizumi, 2009-2010 All rights reserved. # $ ruby mkrexml.rb input.xml (this will create input_xml.rb ) # $ ruby input_xml.rb > output.xml (which is about the same as input.xml) # the encoding of output.xml shall be utf-8 require "rexml/document" Mkrexml_DS = '_LI842zWB_' Mkrexml_DE = '_BFD4WGnH_' # dump with marking multi-byte characters def k_dump(s) s = s.gsub(/[^\x00-\x7f]+/) {|mp| mp = "#{Mkrexml_DS}#{mp}#{Mkrexml_DE}" } return s.dump end # recover a string dumped by k_dump def k_undump(s) s = s.gsub(/#{Mkrexml_DS}(.+?)#{Mkrexml_DE}/) {|mp| w = '"' + $1 + '"' mp = eval(w) } return s end # element name -> ruby variable name def var_name(str) res = str.gsub(/[A-Za-z]+/) {|part| part = part.gsub(/([A-Z])([a-z])/) {|mp| mp = "_" + $1.tr("A-Z", "a-z") + $2 } part = part.gsub(/([A-Z]+)/) {|mp| mp = "_" + mp.tr("A-Z", "a-z") } part = part.gsub(/__+/, "_") part = part.sub(/^_/, "") } res = "@" + res.gsub(/[^0-9A-Za-z_]/, "_") end # make a script on each element (recurse) def seek_element(element, parent) if parent.class == String # the element is root(parent is not an element) parent_name = parent else # if parent.class == REXML::Element parent_name = var_name(parent.expanded_name) end element_name = var_name(element.expanded_name) hs_str = "" if element.has_attributes? element.attributes.each {|key,val| hs_str += sprintf("\t%s=>%s,\n", k_dump(key), k_dump(val)) } hs_str = hs_str.sub(/\A\t+/, "{") hs_str = hs_str.sub(/,\n\Z/, "}") end text_str = "" if element.has_text? text_str = element.text.to_s end # make a script string scr = "" if hs_str == "" scr = "#{element_name} = #{parent_name}.add_element(#{k_dump(element.expanded_name)})" else scr = "#{element_name} = #{parent_name}." + "add_element(#{k_dump(element.expanded_name)}, #{hs_str})" end if text_str != "" if text_str =~ /^\s+$/ scr += ".add_text(#{k_dump(text_str)})" else scr += "\n#{element_name}.add_text(#{k_dump(text_str)})" end end if scr != "" scr = scr + "\n\n" end # recurse if element.elements.size > 0 element.elements.each {|c_element| scr = scr + seek_element(c_element, element) } end return scr end ## main ARGV.each {|infile| # resource xml file outrb = infile.gsub(/\./, "_") + ".rb" doc_name = "doc" # name of REXML::Document in the outrb src = File.open(infile) {|fp| fp.read} doc = REXML::Document.new(src) doc.xml_decl.encoding = "UTF-8" rb_str = < #{File.basename(outrb)} at #{Time.now.strftime("%Y/%m/%d %H:%M")} require \"rexml/document\" EOS before_str = "" after_str = "" element_str = "" other_str = "" doc.each {|e| unless e.class == REXML::Element if element_str == "" before_str += e.to_s elsif e.class == REXML::Text after_str += e.to_s else other_str += sprintf("- %s: <%s>, %s\n", infile, e.class.to_s, e.to_s) end else # if e.class == REXML::Element element_str += seek_element(e, doc_name) end } rb_str += "#{doc_name} = REXML::Document.new(#{k_dump(before_str)})\n\n" rb_str += element_str rb_str += "#{doc_name}.add_text(#{k_dump(after_str)})\n" rb_str += "print #{doc_name}\n" rb_str = k_undump(rb_str) File.open(outrb, "w") {|ff| ff.write rb_str} if other_str != "" STDERR.puts "** missing the infomation below\n#{other_str}" end }