#
# Take a file in our .tip LaTeX format and convert it to XML

class Ref
  def initialize
    @count = "xref-000000"
  end
  def next
    @count.succ!
  end
end
 
$ref = Ref.new   

# Name of the LaTeX aux file and abbreviations for the book.

LATEX       = "../latex"
SAMPLES_DIR = File.join(LATEX, "samples")

AUX_FILE    = File.join(LATEX, "book.aux")
ABBREV_FILE = File.join(LATEX, "Abbreviations.sty")


# LaTeX doesn't work with 8-bit input, so we're safe using these as placeholders

MAGIC  = 200.chr                # character that doesn't appear in a document
MAGIC1 = 201.chr                # and another
MAGIC2 = 202.chr
# A literal backslash

BSLASH    = 212.chr
PERCENT   = 213.chr
OBR       = 214.chr
CBR       = 215.chr
SDASH     = 216.chr
AMPERSAND = 217.chr
LESSTHAN  = 218.chr
GREATERTHAN  = 219.chr
TILDA        = 220.chr
UNDERSCORE   = 221.chr
DEGREE       = 222.chr
CLOSESQUARE  = 223.chr

STOPPER      = 224.chr

# Hard-wired for now

MIXIN_CONTENTS = {
  "Enumerable" => "
   collect, detect, each_with_index, entries, find, find_all, grep,
   include?, map, max, member?, min, reject, select, sort, to#{UNDERSCORE}a",

  "Comparable" => "
   #{LESSTHAN}, #{LESSTHAN}=, ==, #{GREATERTHAN}=, #{GREATERTHAN}, between?",
   
  "FileTest" => "
   blockdev?, chardev?, directory?, executable?, executable_real?,
   exist?, exists?, file?, grpowned?, owned?, pipe?, readable?,
   readable_real?, setgid?, setuid?, size, size?, socket?, sticky?,
   symlink?, writable?, writable_real?, zero?"
}

# Names of library modules

LIBRARY_MODULES = {}
%w{
  Array Bignum Binding Class Continuation Dir Exception FalseClass File File::Stat Fixnum
  Float Hash Integer IO MatchData Method Module NilClass Numeric Object Proc Range Regexp
  String Struct Struct::Tms Symbol Thread ThreadGroup Time TrueClass
  Comparable Enumerable Errno FileTest GC Kernel Marshal Math ObjectSpace Process
}.each {|name| LIBRARY_MODULES[name] = 1}

###########################################################################
#
# we contain stuff that maps a function name to text that can be
# used as a cross reference string
#

module TidyName

  MAP = {
    '[' => '_ob',
    ']' => '_cb',
    '?' => '_qm',
    '!' => '_oh',
    ',' => '_cm',
    '%' => '_pc',
    '~' => '_sd',
    '+' => '_pl',
    '-' => '_mi',
    '*' => '_st',
    '/' => '_sl',
    '<' => '_lt',
    '>' => '_gt',
    '=' => '_eq',
    '`' => '_bq',
    '@' => '_at',
    '^' => '_up',
    '&' => '_et',
    '|' => '_ba',

    LESSTHAN     => "_lt",
    GREATERTHAN  => "_lt",
    AMPERSAND    => "_et",
    PERCENT      => "_pc",
    UNDERSCORE   => "_",
  }

  PATTERN = Regexp.new("[" + Regexp.quote(MAP.keys.join('')) + "]")

  def TidyName.tidy(name)
    oname = name.dup
    oname = '_bq' if name =~ /^\\bq/
    oname.sub!(/\(backquote\)/, '')
    oname.gsub!(/\s+/, '')
    oname.gsub!(/!!/,  '')
    oname.gsub!(/\\sd/, '~')
    oname.gsub!(/\\up/, '^')
    oname.gsub!(/\\_/, '_')
    oname.gsub!(/\\largesym\{:(\d\d\d\d):(.*)\}:\1:/, '\2')
    oname.gsub!(/\{:(\d\d\d\d):\}:\1:/, '')
    oname.gsub!(PATTERN) { MAP[$&] }
    raise "'#{name}': Unhandled character in #{oname.dump}" unless oname =~ /^[\w\d]+$/
    oname
  end

end

###########################################################################
# 
#  This module contains a method for each environment defined in
#  the input document
#

module Env

  def Env._alltt(body)
    "<alltt>#{body}</alltt>"
  end

  def Env._begingroup(body)
    body
  end

  def Env._cdoc(body, title)
    leader = ""
    if body =~ %r{^(.*?)(\\cfunc|\\begin\{:\d\d\d\d:CFUNC)}m
      leader = $1
      body = $2 + $'            # '
    end

    "#{leader}<table>
      <cdoctitle>#{title}</cdoctitle>
      #{body}
     </table>" 
  end

  def Env._center(body)
    "<center>#{body}</center>"
  end

  def Env._CFUNC(body, returns, name, parms)
    %{<cfunc name="#{name}">
    <creturns>#{returns}</creturns>
        <cparams>#{parms}"</cparams>
        <cbody>#{body}</cbody>
      </cfunc>}
  end

  def Env._classmethods(body)
    "<methods type=\"class\">#{body}</methods>\n"
  end

  def Env._constants(body, name1, name2)
    doTable(body, "constants")
  end

  def Env._class(body, cname, csuper, cincludes, *rest)
    "<class name=\"#{cname}\" super=\"#{csuper}\" " +
      "type=\"class\" ref=\"#{$ref.next}\">#{body}</class>\n"
  end

  def Env._codefragment(body)
    "<codefragment>#{body}</codefragment>"
  end

  def Env._eqnarray_STAR(body, *args)
    doTable(body)
  end

  def Env._flushleft(body)
    body
  end

  def Env._flushright(body)
    body
  end

  def Env._hidden(body)
    body.strip.gsub(/^/, '#-')
  end

  def Env._library(body, name)
    "<library name=\"#{name}\">\n#{body}</library>\n"
  end

  def Env._method(body, name, callseq, extra)
    ref = TidyName.tidy(name)
  
    <<-EOM
    <method name=\"#{name}\" ref=\"#{ref}\">
      <callseq>
        #{callseq}
      </callseq>
      <desc>
        #{body}
      </desc>
    </method>
    EOM
  end


  def Env._methods(body)
    "<methods type=\"instance\">#{body}</methods>\n"
  end

  def Env._minipage(body, *args)
    body
  end

  def Env._mixins(body)
    "<mixins>#{body}</mixins>\n"
  end

  def Env._module(body, mname, mincludes)
    "<class name=\"#{mname}\" type=\"module\" ref=\"#{$ref.next}\">\n#{body}</class>\n"
  end

  # this cheats. we know that the example in the book has a \penalty
  # between where the columns are
  def Env._multicols(body, colcount)
    if body =~ /(.*?)\\penalty[-\d]+\s*(.*)/m
      %{<table cellpadding="5">
        <tr>
          <td>#$1</td>
          <multicolsep/>
          <td>#$2</td>
        </tr>
        </table>}
    else
      body
    end
  end
  
  def Env._nothtml(body)
    ''
  end

  def Env._privatemethods(body)
    "<methods type=\"private\">#{body}</methods>\n"
  end

  def Env._sidebar(body, title='')
    "<sidebar name=\"#{title}\">\n#{body}\n</sidebar>"
  end

  def Env._raggedleft(body)
    body
  end

  def Env._sloppypar(body)
    body
  end

  def Env._ruby(body, args='')
    opt=""
    opOpt=""
    onlyPrint = []
    args.split(/,\s*/).each do |arg|
      case arg
      when /^[\d\w]$/
        puts "Only: #{arg}"
        onlyPrint << arg
      when "norun"
        opt << ' run="no"'
      when "synerr"
        opt << ' run="synerr"'
      when "comment"
        opt << ' op="comment"'
      when "comment*"
        opt << ' op="comment*"'
      when "nooutput"
        opt << ' op="none"'
      when "showspace"
        opt << ' showspace="yes"'

      when "rawop"              # we can't deal with this, because
        return ''               # the code's generating LaTeX

      else
        raise "Unhandled code option: #{arg.dump}"
      end
    end

    opt << " only_print=\"#{onlyPrint.join(',')}\"" if onlyPrint.size > 0

    "<code#{opt}>#{body}</code>"
  end

  def Env._syntax(body, *args)
    raise "Unhandled code option: #{args}" if args.size > 0
    "<syntax>#{body}</syntax>"
  end

  def Env._noTableConstants(body, name1, name2)
    "<noTableConstants>#{body}</noTableConstants>"
  end

  def Env._usage(body)
    body
  end

  def Env._verbatim(body, *args)
    raise "Unhandled code option: #{args}" if args.size > 0
    "<verbatim>#{body}</verbatim>"
  end

  ###
  # Table stuff
  #

  def Env._table(body, *args)
    "<figure type=\"table\">#{body}</figure>\n"
  end

  def Env._figure(body, *args)
    "<figure type=\"figure\">Figure not available...</figure>\n"
  end

  def Env.doTable(body, name='table')
    row = (body =~ /\\midrule/) ? "th" : "tr"
    
    body.gsub!(/\\(top|mid|bottom)rule/) { "<#$1rule/><br/>" }
    
    lines = body.split(%r{<br/>})
    
    res = "<#{name}>\n"
    
    lines.each do |line|
      
      next if line =~ /\A\s*\Z/

      if line =~ %r{<(top|mid|bottom)rule/>}

        next if ($1 == "top") && (row == "th")
        if ($1 == "mid") 
          row = (row == "tr") ? "th" : "tr"
        else 
          res << line
        end
      else 
        res << "<#{row}>\n"
        line.split(/&/).each do |field|
          field.strip!
          if (field =~ /\\multicol/)
            res << field
          else 
            res << "  <td>#{field}</td>\n"
          end
        end
        res << "</#{row}>\n"
      end
    end
    res << "</#{name}>\n"
  end

  def Env._tabularx(body, *args)
    doTable(body)
  end

  def Env._tabularruby(body, *args)
    doTable(body, "rubycode")
  end

  def Env._tabular(body, *args)
    doTable(body)
  end

  def Env._tabbing(body, *args)
    lines = body.split %r{<br/>}
    res = "<table>\n"
    lines.each do |line|
      res << "<tr><td>" << line.split(%r{\\[=>#{GREATERTHAN}]}).join("</td><td>") << "</td></tr>"
    end
    res << "</table>"
  end
  
  ####
  # Lists

  def Env._itemize(body)
    doList("ul", "", body)
  end

  def Env._nsitemize(body)
    doList("ul", "", body)
  end

  def Env._enumerate(body)
    doList("ol", "", body)
  end

  def Env._list(body, *args)
    raise "Unhandled begin{list}"
    doList("xx", "", body)
  end

  def Env._OptList(body, *args)
    body.gsub!(/\\(item|ITEM)\[(.*?)\]/, '<dt>\2</dt><dd/>' )
    "<dl>#{body}</dl>"
  end

  def Env._RegexpList(body, *args)
    body.gsub!(/\\(item|ITEM)\[(.*?)\]/, '</dd><dt><b>\2</b></dt><dd>' )
    body.sub!(%r{</dd>}, '')
    "<dl>#{body}</dd></dl>"
  end

  def Env._description(body, *args)
    body.gsub!(/\\(item|ITEM)\[(.*?)\]/, '</dd><dt>\2</dt><dd>' )
    body.sub!(%r{</dd>}, '')
    "<dl>#{body}</dd></dl>"
  end

  def Env.doList(tag, between, text)
    text.gsub!(/\\(item|ITEM)/) { "</li>#{between}<li>" }
    text.sub!(%r{</li>}, '')
    "<#{tag}>#{text}</li></#{tag}>"
  end

end

###########################################################################
#
#  And this one has all the non-environment commands
#

module Cmd

  def Cmd.xml(tag, func=nil)
    [ "<#{tag.to_s}>",  "</#{tag.to_s}>", func ]
  end

  def Cmd.txt(text, func=:swallowArgs)
    [ text,  "", func ]
  end

  def Cmd.func(func)
    [ "",  "", func ]
  end

  CMDTAB = {
    # direct XML stuff
    "BOOKTITLE"      => xml("em"),
    "C"              => xml("classname"),
    "caption"        => xml("caption"),
    "CC"             => xml("classconst", "mapName"),
    "CCI"            => xml("classconst", "mapName"),
    "CCM"            => xml("ccm", "mapName"),
    "CCMI"           => xml("ccm", "mapName"),
    "CF"             => xml("tt"),
    "CI"             => xml("classname"),
    "CIM"            => xml("cim", "mapName"),
    "CIMI"           => xml("cim", "mapName"),
    "CMDOPT"         => xml("cmdopt"),
    "CMDOPTI"        => xml("cmdopt"),
    "CONST"          => xml("const"),
    "CONSTi"         => xml("const"),
    "CONSTI"         => xml("const"),
    "E"              => xml("exception"),
    "EI"             => xml("exception"),
    "emph"           => xml("em"),
    "footnote"       => xml("footnote"),
    "hrule"          => xml("hr"),
    "KW"             => xml("kw"),
    "M"              => xml("modulename"),
    "MC"             => xml("moduleconst", "mapName"),
    "MCI"            => xml("moduleconst", "mapName"),
    "METHOD"         => xml("meth"),
    "meth"           => xml("meth"),
    "METHODi"        => xml("meth"),
    "MI"             => xml("modulename"),
    "MIM"            => xml("mim", "mapName"),
    "MIMI"           => xml("mim", "mapName"),
    "MMM"            => xml("mmm", "mapName"),
    "MMMI"           => xml("mmm", "mapName"),
    "nt"             => xml("nt"),
    "obj"            => xml("obj"),
    "opt"            => xml("opt"),
    "optn"           => xml("optn"),
    "optz"           => xml("optz"),
    "relieson"       => xml("relieson"),
    "returns"        => xml("returns"),
    "section"        => xml("section"),
    "section*"       => xml("section"),
    "subclasses"     => xml("subclasses"),
    "subsection"     => xml("subsection"),
    "subsection*"    => xml("subsection"),
    "subsubsection"  => xml("subsubsection"),
    "textbf"         => xml("b"),
    "textsf"         => xml("sansfont"),
    "textsl"         => xml("em"),
    "texttt"         => xml("tt"),
    "VAR"            => xml("var"),
    "VARI"           => xml("var"),
    "underline"      => xml("u"),

    # Other text substitutions
    "bq"             => txt("`"),
    "bs"             => txt(BSLASH),
    "ding"           => txt("Y"),
    "dots"           => txt("..."),
    "enspace"        => txt("&nbsp;"),
    "HAT"            => txt("^"),
    "hookleftarrow"  => txt("-&gt;"),
    "hspace"         => txt("&nbsp;"),
    "infty"          => txt("infinity"),
    "nil"            => txt("<tt>nil</tt>"),
    "nohyph"         => txt(""),
    "par"            => txt("<p/>"),
    "pi"             => txt("PI"),
    "quad"           => txt("&nbsp;"),
    "rightarrow"     => txt("&#187;"),
    "sd"             => txt("~"),
    "synis"          => txt("&lt;-"),
    "textbullet"     => txt("*"),
    "times"          => txt("*"),
    "up"             => txt("^"),
    "y"              => txt("Y"),

    # special stuff in language.tip

    "RO"             => txt("[r/o]"),
    "ROTL"           => txt("[r/o, thread]"),
    "TL"             => txt("[thread]"),
    "PDV"            => func(:pdv),
    "PDC"            => func(:pdv),
    "EG"             => xml("tt"),

    # Special stuff in trouble.tip

    "IP"             => func(:userinput),

    # ext_tk
    
    "EPS"            => func(:eps),
 
    # ext_ruby

    "cfunc"          => func(:cfunc),
    "gap"            => txt(" "),
    
    # lib_socket

    "SCONST"         => xml("tt"),

    # lib_cgi

    "HT"             => func(:HT),
    "HK"             => func(:HK),
    "BLK"            => func(:BLK),
    "NOTE"           => xml("blockquote"),

    # and the special ones
    "appendix"       => func(:chapter),
    "block"          => func(:block),
    "blockp"         => func(:block),
    "cfunc"          => func(:cfunc),
    "chapter"        => func(:chapter),
    "chapter*"       => func(:chapter),
    "constant"       => func(:mapConstant),
    "frac"           => func(:mapFrac),
    "largesym"       => func(:mapPassthrough),
    "LogCodeRef"     => func(:mapCodeRef),
    "nohyphen"       => func(:mapPassthrough),
    "MATH"           => func(:mapMath),
    "mbox"           => func(:mapPassthrough),
    "mixin"          => func(:mapMixin),
    "multicolumn"    => func(:mapMultiCols),
    "multiprot"      => func(:mapMultiline),
    "pageref"        => func(:mapPageRef),
    "prompt"         => func(:prompt),
    "protect"        => func(:mapPassthrough),
    "ref"            => func(:mapRef),
    "symbol"         => func(:mapSymbol),
    "textsc"         => func(:mapTextSc),
    "url"            => func(:mapUrl),
    "verb"           => func(:verb),
    "verb*"          => func(:verb),
    "vpageref"       => [ "on page ", "", :mapPageRef],
    "vref"           => func(:mapVRef),
  }
  
  # These things are all ignored

  %w{
    addcontentsline
    AtIndent
    begingroup
    cite
    codewidth
    eject
    epsfbox
    endgroup
    fontsize
    footnotesize
    ForceIntoContents
    gotcha
    hfill
    ifdim
    index
    indexBackslashChar
    indexClass
    indexClassMethod
    indexClassMethodBold
    indexCmdOpt
    indexConstant
    indexEnvVar
    indexExtApi
    indexMethod
    indexModule
    indexModuleExt
    indexModuleMethod
    indexPatternChar
    indexSubstChar
    indexTTComment
    indexUnderClass
    indexUnderMethod
    indexUnderModule
    indexUnderVariable
    indexVariable
    Ikey
    Ivalue
    label
    LANG
    large
    LIB
    libentry
    marginlabel
    markboth
    ncarc
    newcolumntype
    newenvironment
    newlength
    newpage
    newsavebox
    normalfont
    numberline
    penalty
    raggedcolumns
    raggedright
    raisebox
    relax
    rnode
    rput
    rule
    safe
    sbox
    setcounter
    setlength
    Skip
    small
    SP
    VerbatimFont
    ttfamily
    vfil
    vspace
  }.each { |name| CMDTAB[name] = ['', '', :ignore] }


  def Cmd.addAbbrev(name, content)
    CMDTAB[name] = [ content, '', nil ]
  end

  def Cmd.block(*arg)
    if (arg && arg.size > 0)
      "<block>{| #{arg} | <blockbody>block</blockbody> }</block>\n"
    else 
      "<block>{ <blockbody>block</blockbody> }</block>\n"
    end
  end


  def Cmd.convert(name, args)
    cmd = CMDTAB[name]
    raise "Unknown command '#{name}'" unless cmd

    pre, post, func = cmd

    if func
      txt = Cmd.send(func, *args)
    else
      txt = args.to_s
    end
    return pre + txt + post
  end

  ###
  # Handler methods
  #

  def Cmd.cfunc(returns, name, params, body)
    Env._CFUNC(body, returns, name, params)
  end

  def Cmd.chapter(*args)
    name = args[1] || args[0]
    name.gsub!(%r{<br/>}, ' ')
    name.gsub!(%r{\\~}, ' ')
    $stderr.puts name
    "<chapter ref=\"#{$ref.next}\">#{name}</chapter>"
  end
  
  def Cmd.eps(name, size)
    "<p/><em>Missing #{name}</em><p/>"
  end

  def Cmd.ignore(*args)
    ''
  end

  def Cmd.mapCodeRef(file, number)
    fname = File.join(SAMPLES_DIR, "#{file}:#{number}")
    "<fullcode><![CDATA[#{fname}]]></fullcode>"
  end

  def Cmd.mapConstant(name, value, desc)
    "<constant>
       <constname>#{name}</constname>
       <constval>#{value}</constval>
       <constdesc>#{desc}</constdesc>
     </constant>\n"
  end

  def Cmd.mapFrac(top, bottom)
    "#{top}/#{bottom}"
  end

  def Cmd.mapMath(txt)
    txt.gsub!(/\\le/, '&lt;=')    
    txt.gsub!(/\\ge/, '&gt;=')
    txt.gsub!(/([_^])\{:(\d\d\d\d):(.*?)\}:\2:/) {
      kw = $1 == "_" ? "sub" : "sup"
      "<#{kw}>#$3</#{kw}>"
    }
  
    txt.gsub!(/([_^])(.)/) {
      kw = $1 == "_" ? "sub" : "sup"
      "<#{kw}>#$2</#{kw}>"
    }
    txt
  end

  def Cmd.mapMixin(name)
    "<mixin name=\"#{name}\">#{MIXIN_CONTENTS[name]}</mixin>"
  end

  def Cmd.mapMultiline(txt)
    txt.gsub(/\\\\(\[.*?\])?\s*/, '<br/>')
  end

  # handle the argument to \CIM and friends
  def Cmd.mapName(name)
    parts = name.split(/(?:\.|::)/)

    back = parts.pop
    front = parts.join("_")

    if (LIBRARY_MODULES[front]) 
      file = front.downcase
      mref = TidyName.tidy(back)
      "<file>#{file}</file><front>#{front}</front><back>#{back}</back><mref>#{mref}</mref>"
      
    else
      "<front>#{front}</front><back>#{back}</back>"
    end
  end

  def Cmd.mapPassthrough(content='')
    content
  end

  def Cmd.mapRef(name)
    number = $crossRef.numberOf[name]
    raise "Unknown cross reference: #{name}" unless number
    number
  end

  def Cmd.mapPageRef(name)
    page = $crossRef.pageOf[name]
    raise "Unknown cross reference: #{name}" unless page
    page
  end

  def Cmd.mapSlmeth(name, returns)
  end

  SYMBOLS = {
    "'134" => BSLASH,
    "'136" => "^",
    "'176" => SDASH,
    "'0"   => '`',
  }

  def Cmd.mapSymbol(val)
    res = SYMBOLS[val]
    raise "Unknown symbol: #{val}" unless res
    res
  end

  def Cmd.mapUrl(url)
    return "" if url == "com"

    # Sometimes use \url for filenames
    if url[0] == ?/
      return "<tt>#{url}</tt>"
    end

    url = "mailto:rubybook@pragmaticprogrammer.com" if url =~ /^rubybook@/
    url = "http://"+url unless url =~ /:/
    "<url>#{url}</url>"
  end

  def Cmd.mapVRef(name)
    Cmd.mapRef(name) + " on page " + Cmd.mapPageRef(name)
  end

  def Cmd.mapTextSc(txt)
    txt.gsub(/([a-z]+)/) { %{<smallfont>#{$1.upcase}</smallfont>} }
  end

  def Cmd.prompt(cmd)
    "<tt>%~#{cmd}</tt>"
  end

  ###
  # Tables
  #

  def Cmd.mapMultiCols(cols, fmt, content)
    "<td colspan=\"#{cols}\">#{content.strip}</td>\n";
  end

  def Cmd.verb(txt)
    txt.gsub(/\\/, '\&\&')
  end

  ###
  # Read in the abbreviations file and define commands for each
  # of the entries
  
  def Cmd.readAbbreviations(fileName)
    File.open(fileName) do |f|
      conv = Converter.new(f)
      conv.convert
    end
  end

  ##
  # Stuff in language.tip
  def Cmd.pdv(name, type, body)
    name.gsub!(/"/, '&#34;')
    %{<variable name="#{name}" type="#{type}">#{body}</variable>}
  end

  def Cmd.pdc(name, type, body)
    Cmd.pdv(name, type, body)
  end

  def Cmd.swallowArgs(*args)
    ''
  end

  def Cmd.trace(*args)
    $stderr.puts args
    ""
  end

  ##
  # Stuff in trouble.tip
  #
  def Cmd.userinput(str)
    "<code><b>#{str}</b></code>"
  end

  #
  # Stuff in lib_cgi
  def Cmd.HT(name, param, block, key)
    res  = "<br/><tt>#{name}</tt>( <em>#{param}</em> )"
    res += " #{block}" if block.length > 0
    res += "<br/><tt>#{name}</tt>( <em>#{key}</em> =#{GREATERTHAN} )"
    res += " #{block}" if block.length > 0
    res + "<p/>"
  end

  def Cmd.HK(key)
    "<tt>#{key}</tt>"
  end
  
  def Cmd.BLK(txt)
    "{ <em>#{txt}</em> }"
  end
end

###########################################################################
#
# Handle cross references. We read in the .aux file at start up
# and use it to return the page numbers, section numbers,
# and section names

class CrossRef
  attr :pageOf
  attr :numberOf
  attr :nameOf

  def initialize
    @pageOf = {}
    @numberOf = {}
    @nameOf = {}
  end

  def readFrom(auxFileName)

    File.foreach(auxFileName) do |line|
      next unless line =~ /^\\newlabel\{(.*?)\}\{\{(.*?)\}\{(.*?)\}\}/
      name, number, page = $1, $2, $3
      next if name =~ /\@/
      @pageOf[name]   = page
      @numberOf[name] = number
    end
  end

end

###########################################################################
#

class Converter

  # Map \charnnn to something useful

  CHAR_MAP = {
    32 => "<visible_space/>",
  }

  def initialize(stream)
    @content = stream.read
  end

  def writeTo(stream)
    stream << <<EOM
<ppdoc>
<copyright>
    Copyright (c) 2001 by Addison Wesley Longman.  This
    material may be distributed only subject to the terms and
    conditions set forth in the Open Publication License, v1.0 or
    later (the latest version is presently available at
    http://www.opencontent.org/openpub/).
</copyright>
EOM
    stream << @content
    stream << "</ppdoc>\n"
  end

  def convert
    handlePreSpecialCharacters
    matchBraces
    handleSpecialCases
    handleEnvironments
    handleCommands
    handlePostSpecialCases
    handlePostSpecialCharacters
  end

  # Swap out characters that will confuse the parse later. We'll
  # swap them back in in handlePostSpecialCharacters

  def handlePreSpecialCharacters
    @content.gsub!(%r{\\end\{alltt\}\n\\vspace\{-4pt\}\n\\begin\{alltt\}}, "\n")

    # Don't mess with '%' and '&' between \begin{ruby} and \end{ruby}, 
    # syntax, or codefragment
    @content.gsub!(/\\begin\{(ruby|syntax|alltt)\}(.*?)\\end\{\1\}/m) do
      name = $1
      m = $2.gsub(/\\?%/, PERCENT).gsub(/\\?&/, AMPERSAND)
      "\\begin{#{name}}#{m}\\end{#{name}}"
    end

    # lines starting '%' are always comments
    @content.gsub!(/^\s*%.*?\n/, '')

    # This is a particular \def in irb.tip
    @content.gsub!(/\\def\\cmd\[.*/, '')

    # in language.tip, we have a list containing
    # \item[\CF{[}\emph{characters}\CF{]}]
    # that ']' in the \CF confuses the parse, so we zap it here
    
    @content.gsub!(%r{\\CF\{]\}\]}, "\\CF{#{CLOSESQUARE}}]")


    @content.gsub!(%r{\\slmeth\{(.*?)\}\{(.*?)\}}) {
      func = $1
      func.tr! ']', CBR

      "\\item[#{func} &#187; #$2]"
    }

    @content.gsub!(/\\"\{a\}/, "&#228;") # "
    @content.gsub!(/\\`\{a\}/, "&#224;") # `
    @content.gsub!(/\\&/, AMPERSAND)
    @content.gsub!(/</,   LESSTHAN)
    @content.gsub!(/>/, GREATERTHAN)
    @content.gsub!(/\\,/, '')
    @content.gsub!(/\\\\(\[.*?\])?\s*/, '<br/>')
    @content.gsub!(/\\\n/, '')
    @content.gsub!(/\\ /, '~')
    @content.gsub!(/\\_/, UNDERSCORE)
    @content.gsub!(/\\-/, '')
    @content.gsub!(/\$(\d+)\^\\circ\$/, "\\1#{DEGREE}") 
    # Convert math, but not escaped $'s
    @content.gsub!(/\\\$/, MAGIC)
    @content.gsub!(/\$([-+<>\\\s\na-zA-Z0-9_^.{}=]*?)\$/m) { "\\MATH{#$1}" }
    @content.gsub!(/#{MAGIC}/, '$')

    @content.gsub!(/\\char\s*32/, '<visible_space/>')

    # There are times where \self is followed by a '[', but it isn;'t
    # an optional argument, so

    @content.gsub!(/\\self([^{])/, "\\self#{STOPPER}\\1")

    # finally, \verb can use arbitrary delimiters
    @content.gsub!(/\\verb\*?(.)(.*?)\1/) do
      verb = $2
      verb.tr! '%', PERCENT
      verb.tr! '\\', BSLASH
#      STDERR.puts "\\verb{#{verb}}"
      "\\verb{#{verb}}"
    end


    # remove conditionally excluded text
    @content.gsub!(%r{\\if\s*0.*?\\fi\s*}m, '')

    # and the rest of the comments
    @content.gsub!(/\\%/, PERCENT)
    @content.gsub!(/%.*?\n/, '')

    # sometimes we have the .com part of a url in a separate \url
    @content.gsub!(/\.\}\\url\{com\}/, '.com}')
    @content.gsub!(/\}\.\\url\{com\}/, '.com}')

    # \rput has it's own special syntax...
    @content.gsub!(%r{\\rput\[.*?\]\(.*?\)}, '\\rput')

    # as does \hrule. These only occur in lib_cgi, hence the cheat
    @content.gsub!(%{\\hrule.*}, '\\hrule{}')

    # irb.tip has \cmd in a list
    @content.gsub!(%r{\\cmd\[(.*?)\]}, '\\item[\\texttt{\1}]')


  end

  # Convert matched braces to {:nnnn:  }:nnnn:
  def matchBraces
    count = "0000";

    # Don't mess with escaped braces

    @content.gsub!(/\\\{/, OBR)
    @content.gsub!(/\\\}/, CBR)

    # Don't mess with stuff in ruby code blocks (except \begin and \end)

#    @content.gsub!(/\\begin\{ruby\}(.*?)\\end\{ruby\}/m) {
#      m = $1
#      m.gsub!(/\\(begin|end)\{(.*?)\}/) {  "#{MAGIC2}#$1#{OBR}#$2#{CBR}" }
#      m.gsub!(/\{/, MAGIC)
#      m.gsub!(/\}/, MAGIC1)
#      m.gsub!(/\\/, BSLASH)
#      "\\begin{ruby}#{m}\\end{ruby}"
#    }

           
    1 while @content.gsub!(/\{([^\{\}]*)\}/m) {
      count = count.succ
      "#{MAGIC}:#{count}:#$1#{MAGIC1}:#{count}:"
    }

    @content.gsub!(/#{MAGIC}/,  '{')
    @content.gsub!(/#{MAGIC1}/, '}')
  end

  ###
  #
  # Special cases are commands that we can't treat using the
  # simple substitutions
  #

  IGNORE_DEFS = {}

  %w{ 
    BLK 
    cmd
    EG
    EPS
    gap 
    HK
    HT
    indexExtApi
    IP
    NOTE
    PDC 
    PDV 
    ROPT 
    SCONST
    Skip
    cfunc 
    meth 
    nohyph
   }.each {|key| IGNORE_DEFS[key] = 1}

  def handleSpecialCases
    # Remove everyting within \LAYOUT's
    @content.gsub!(/\\LAYOUT\{:(\d\d\d\d):(.*)\}:\1:/, '')

    # \ROPTS become \items
    @content.gsub!(/\\ROPT\{:(\d\d\d\d):(.*)\}:\1:/) do
      %{\\item[#{$2.gsub(/\]/, CBR)}]'}
    end

    # simple \def's become abbreviations
    @content.gsub!(/\\def\\([a-zA-Z]+)(.*?)\{:(\d\d\d\d):(.*?)\}:\3:/) do
      Cmd.addAbbrev($1, $4) unless IGNORE_DEFS[$1]
      ''
    end

    @content.gsub!(/\\newcommand\{:(\d\d\d\d):\\([a-zA-Z]+)\}:\1:(\[.*?\])?\{:(\d\d\d\d):(.*?)\}:\4:/m) do
      Cmd.addAbbrev($2, $5) unless IGNORE_DEFS[$2]
      ''
    end

    @content.gsub!(/\\newenvironment\{:(\d\d\d\d):[a-zA-Z]+\}:\1:(\[.*?\])?\{:(\d\d\d\d):.*?\}:\3:\s*\{:(\d\d\d\d).*?\}:\4:/m) {
      ''
    }

    # \char's are special too
    @content.gsub!(/\\char(\d+)\s*/) do
      n = $1.to_i
      txt = CHAR_MAP[n]
      raise "Invalid \\char#{n}" unless txt
      txt
    end

    # lose chardef and let altogether

    @content.gsub!(/\\chardef.*?\n/, '')
    @content.gsub!(/\\let.*?\n/, '')

    @content.gsub!(/\\\w+=\S+/, '')

    # Remove .PS/.PE pictures (we could render then by piping them out
    # and saving the result n a gif, but...)

    @content.gsub!(/^\.PS.*?^\.PE\s*\n/m, '')
  end

  # Look for \begin{...} ... \end{...} environments and convert them

  def handleEnvironments
    pat = /\\begin\{:(\d\d\d\d):(.*)\}:\1:(.*?)\\end\{:(\d\d\d\d):\2\}:\4:/m

    1 while @content.gsub!(pat) do
      posn = $`.length          # `

      name = $2
      body = $3
      args = []

      # convert \begin{xxx*} to \begin{xx_STAR}
      name['*'] = '_STAR'

      # collect the parameters
      1 while body.gsub!(/\A\s*\{:(\d\d\d\d):(.*)\}:\1:/m) do
        args <<  $2
        '' 
      end

      1 while body.gsub!(/\A\[(.*?)]/m) do
        args <<  $1
        '' 
      end

      begin
        Env.send("_" + name, body, *args)
      rescue Exception => error
        $stderr.puts "Roughly at:", @content[posn-100..posn+300]
        $stderr.puts
        raise
      end
    end
  end

  # Then handle 'normal' latex commands

  def handleCommands
    parampat = /#{MAGIC}\s*\{:(\d\d\d\d):(.*?)\}:\1:}/m

    while @content.sub!(/\\([a-zA-Z]+\*?)\s*/, MAGIC) 
      cmdName = $1
      posn = $`.length          # `
      args = []

      begin
        done = true

        # ignore leading optional parameters
        @content.sub!(/#{MAGIC}\[.*?\]/m) do
          done = false
          MAGIC
        end

        @content.sub!(/#{MAGIC}\s*\{:(\d\d\d\d):(.*?)\}:\1:/m) do
          args << $2
          done = false
          MAGIC
        end
        @content.sub!(/#{MAGIC}\[(.*?)\]/m) do
          args << $1
          done = false
          MAGIC
        end
      end until done

      begin
        @content[MAGIC] = Cmd.convert(cmdName, args)
      rescue Exception => error
        @content[MAGIC] = "\n\n>>\\#{cmdName}<<\n"
        $stderr.puts "Roughly at:", @content[posn-100..posn+300]
        $stderr.puts
        raise
      end
    end
  end

  # Tidy up stuff and the end
  def handlePostSpecialCases
    # If tihs is a chapter, make that the document type
    if @content.gsub!(%r{<chapter(.*?)>(.*?)</chapter>}m, '<chapter \1 name="\2">')
      @content << "</chapter>\n"
    end
    
    # paragraphs
    @content.gsub!(/\n(\s*\n)+/m, "\n<p/>\n");

    # no paragraphs at the start
    @content.sub!(%r{\A\s*<p/>\s*}, '')

    # no paragraphs or the end
    @content.sub!(%r{\s*<p/>\s*\Z}, '')

    # remove braces used to separate things like -{}-
    @content.gsub!(/\{:(\d\d\d\d):\}:\1:/, '')

    # and other residual brace markers
    @content.gsub!(/:\d\d\d\d:/, '')
  end

  # Convert remaining characters that cause problems
  def handlePostSpecialCharacters
    @content.gsub!(/\$~/, "$" + TILDA)
    @content.gsub!(/~/, '<nbsp/>')
    @content.gsub!(/&nbsp;/, '<nbsp/>')
    @content.gsub!(/\\([_$#])/) { $1 }
    @content.gsub!(/#{LESSTHAN}/, '&lt;')
    @content.gsub!(/#{GREATERTHAN}/, '&gt;')
    @content.gsub!(/#{AMPERSAND}/, '&amp;')
    @content.gsub!(/#{DEGREE}/,    '&#176;')
    @content.gsub!(/#{STOPPER}/,   '')
    @content.tr! BSLASH,    '\\'
    @content.tr! PERCENT,   '%'
    @content.tr! SDASH,     '~'
    @content.tr! OBR,       '{'
    @content.tr! CBR,       '}'
    @content.tr! TILDA,     '~'
    @content.tr! UNDERSCORE,     '_'
    @content.tr! CLOSESQUARE,    ']'
    
    # very last thing: substitute in CDATAs

    @content.gsub!(/<!\[CDATA\[(.*?)\]/) { 
      txt = File.open($1) {|f| f.read} 
      %{<!\[CDATA[#{txt}]}
    }
  end
end

###########################################################################
#     
# Driver
#

def usage
  $stderr.puts USAGE
  exit(1)
end


$crossRef = CrossRef.new
$crossRef.readFrom(AUX_FILE) if File.exist?(AUX_FILE)

Cmd.readAbbreviations(ABBREV_FILE)

begin
  conv = Converter.new(STDIN)
  conv.convert
  conv.writeTo(STDOUT)
end
