#! /usr/bin/env python

""" GENERAL ------------------------------------------------------------------------
    Extracts components of LyX documents exported in LayTex format.

                        written by Nick Thomas

  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License
  as published by the Free Software Foundation; either version 2
  of the License, or (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  ---------------------------------------------------------------------------

  The result is written in /tmp with the name of the orignal document and
      extension .ftd
    A report of the findings is written similarly with extension .ftr
    The document is flayed into its component text, equation, format etc. parts
      and all LayTex formatting is separated out into separate lists.
    This enables conversion to another format by applying the formatting
      commands for that document type to the stripped objects.
      Decorated text strings and other format directives are preceded in the
      document structure object by the entries in the following dictionaries:
                escapeDict
                fsDict
                fstDict
                fmtDict
            there are four corresponding 'vanilla' dictionary files for these
            with the entries which may be copied and edited for a given
            document type, for use in constructing a new converted document, called
                FontSizeVanilla
                FontStyleVanilla
                EscVanilla
            the entries have defaults which can be changed to whatever
            the new document requires in order to implement them;
            the format directives are entered in the corresponding list but the
              the text to be decorated is put in its native object list, so that on
              reconstruction a format or font directive index will be found in the
              document structure list, immediately followed by an index to the
              object list where the text to be decorated is located; counters are
              maintained during reconstruction to locate the text in that object
              list; see function extractObj() which extracts objects in sequence
              based on the document structure list
            the reason for this linear approach rather than grouping the document
              structure list is that it is simple and access to object instances
              is direct rather than via some form of further indirection to
              sub-lists;
            the separation of the format and font directives into their own lists
              allows them to be converted easily from the dictionaries without
              having to pick through the main text to find them
    Exceptions are:
        escape characters like '\&' are stored as they are found

    Unrecognised objects are added to the main object dictionary if possible during
    parsing, or else entered unaltered in the unrecognised object list

    Tables are stored as follows:
        the initial {c|c..} for the number of columns is in the format list

    Page refs are set to the corresponding section (or whatever) ref, as paging is
      not known; that can be used by the Conv... program to find the corresponding
      page number

    For guidance in implementing a new object goto NEWOBJ

    The main program starts at MAIN PROGRAM
------------------------------------------------------------------------------------
"""

""" EXCEPTIONS FROM LyX/Tex --------------------------------------------------------

    Equation labels
        all equation labels, including equation array labels, are simple numbers
        in brackets i.e. without reference to the section etc.

    Equation array labels
        only those equations in an equation array which possess a cross-reference
        are labelled, as other labels would obviously be redundant;
        this avoids unnecessary "fussiness"

------------------------------------------------------------------------------------
"""

import  getopt  #for command-line param parsing
import  sys     #for above and general fns like exit()
import  os      #for operating system functions like getcwd()
import  pickle
import  popen2
from popen2 import *
from Tkinter import *   #GUI support module
import Tix
from Tix import *       #must do this as well as above import (!!)

# ---------------------------------------------------------------------------------

"Configuration presets are read in from file TexConvPresets, located in same"
"directory as the program"
"At present this contains the home directory, dictionary and document paths"

"Path presets are fixed for the platform, and are read in to enable the same"
"program to be used for different platforms"

# ---------------------------------------------------------------------------------
"Development presets"

Presets='/home/nct/Projects/python/TexConv/Office/ConvTexConfig'
Paths='/home/nct/Projects/python/TexConv/Office/ConvTexPaths'

# ---------------------------------------------------------------------------------

dirySep=''
fnr='File name required'
spaces='                                    '
alpha="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
roman={'i': 'ii','ii': 'iii','iii': 'iv','iv': 'v','v': 'vi','vi': 'vii',
        'vii': 'viii','viii': 'ix','ix': 'x','x': 'xi','xi': 'xii'}
Roman=['I','II','III','IV','V','VI','VII','VIII','IX','X','XI','XII','XIII',
        'XIV','XV','XVI','XVII','XVIII','XIX','XX']
pairs={ '{}':   '',
        '--':   '\\endash{}',
        '``':   '\\quoteds{}',
        "\'\'": '\\quotede{}',
        '_{':   '\\subscript{',
        '^{':   '\\superscr{',
        '<<':   '\\latin\xab',
        '>>':   '\\latin\xbb'
}
exceptions='<>{}%&~$`_^\'\"\n-'
escChars="\\&@#$/_{}-," # must NOT include [ (as '\\[' starts an equation)
charAccents="^~\'="     # Circumflex etc.
accentDict={
        '\\^': ['\\circumflex',11],
        '\\~': ['\\tilde',6],
        '\\\'': ['\\acute',6],
        '\\=': ['\\macron',7]           # this is an overbar in text mode
        }
mathsSymbol=[
        '\\alpha','\\beta','\\gamma','\\chi','\\delta','\\epsilon','\\eta','\\iota',
        '\\kappa','\\lambda','\\mu','\\nu','\\omega','\\omicron','\\pi','\\phi',
        '\\psi','\\rho','\\sigma','\\tau','\\upsilon','\\theta','\\xi','\\zeta',
        '\\Alpha','\\Beta','\\Gamma','\\Chi','\\Delta','\\Epsilon','\\Eta','\\Iota',
        '\\Kappa','\\Lambda','\\Mu','\\Nu','\\Omega','\\Omicron','\\Pi','\\Phi',
        '\\Psi','\\Rho','\\Sigma','\\Tau','\\Upsilon','\\Theta','\\Xi','\\Zeta',
        '\\exp','\\int','\\sum','\\times','\\div','\\infty','\\lim','\\pm','\\mp',
        '\\ln','\\sin','\\cos','\\tan','\\cot','\\csc','\\sec','\\arcsin',
        '\\arccos','\\arctan','\\sinh','\\cosh','\\tanh','\\coth',
        '\\angle','\\arg','\\bmod','\\bot','\\cong','\\deg','\\det','\\dim',
        '\\emptyset','\\ell','\\exists','\\forall','\\gcd','\\hbar','\\hom','\\Im',
        '\\imath','\\inf','\\jmath','\\ker','\\lg','\\liminf','\\limsup','\\max',
        '\\min','\\neg','\\partial','\\Pr','\\prime','\\quad','\\qquad','\\Re',
        '\\sqrt','\\top','\\Vert','\\wp','\,','\:','\;','\!'
        ]
escChar=['\\aleph','\\amalg','\\approx','\\asymp','\\bigcap','\\bigcirc','\\bigcup',
        '\\bigodot','\\bigoplus',
        '\\bigotimes','\\bigtriangledown','\\bigtriangleup','\\biguplus','\\bigvee',
        '\\bigwedge','\\bowtie','\\bullet','\\cap','\\cdot','\\cdots','\\circ',
        '\\clubsuit','\\coprod','\\cup','\\dagger','\\dashv','\\ddagger','\\ddots',
        '\\diamond','\\diamondsuit','\\doteq','\\downarrow','\\Downarrow',
        '\\emdash','\\endash','\\equiv',
        '\\flat','\\frown','\\geq','\\gg','\\gt','\\hardspace','\\heartsuit',
        '\\hookleftarrow','\\hookrightarrow','\\in','\\ldots','\\leftarrow',
        '\\Leftarrow','\\leftharpoondown','\\leftharpoonup','\\leftrightarrow',
        '\\Leftrightarrow','\\leq','\\ll','\\longleftarrow','\\Longleftarrow',
        '\\longleftrightarrow','\\Longleftrightarrow','\\longrightarrow',
        '\\Longrightarrow','\\longmapsto','\\lt','\\mapsto','\\mathcircumflex',
        '\\mid','\\models','\\nabla','\\natural','\\nearrow','\\neq','\\ni',
        '\\notin','\\nwarrow','\\odot','\\oint','\\oplus','\\ominus','\\oslash',
        '\\otimes','\\parallel','\\perp','\\pi','\\prec','\\preceq','\\prod',
        '\\propto','\\quotes','\\guilsinglleft','\\guilsinglright','\\quoteds',
        '\\quotede','\\rightharpoondown',
        '\\rightharpoonup','\\rightleftharpoons','\\rightarrow','\\Rightarrow',
        '\\searrow','\\setminus','\\sharp','\\sim','\\simeq','\\smile','\\spadesuit',
        '\\sqcap','\\sqsubseteq','\\sqsupseteq','\\sqcup','\\star',
        '\\subset','\\subseteq','\\supseteq','\\succ','\\succeq','\\surd',
        '\\swarrow','\\supset','\\triangle','\\triangleleft','\\triangleright',
        '\\uparrow','\\Uparrow','\\Updownarrow','\\updownarrow','\\uplus','\\vdash',
        '\\vdots','\\vee','\\wedge','\\wr','\\lda','\\rda'
        ]

mathsBrackets=['\\left','\\right']
brackets=['(',')','[',']','|']
mathsBr=['lceil','rceil','lfloor','rfloor','langle','rangle','Vert',
         'uparrow','Uparrow','downarrow','Downarrow','/','\\'
        ]

enumType={
        2: '1.',
        4: '(a)',
        6: 'i.',
        8: 'A.',
        10: '*',
        12: '*'
        }
itemBullet={
        2: '#',
        4: '-',
        6: '*',
        8: '.',
        10: '*',
        12: '*'
        }
"""
def aStable(setIt):
  "for debugging: controls one subsequent action if prior set has occured"
  "state goes on if setIt != 0"
  "if setIt=0 then resets state to false & returns true if was on"
  if setIt:
    pv.aState=1
  elif pv.aState:
    pv.aState=0
    return 1
  return 0
  # aStable
"""

def get_res(n):
  "for diry GUI return result"
  pv.GUIresult=pv.fsd.cget("value")
  pv.root.destroy()

def diryGUI(d):
  "get file name using file dialog"
  pv.root = Tix.Tk()
  pv.GUIresult=None
  pv.fsd=ExFileSelectBox(pv.root,command=get_res,pattern='*.tex',directory=d)
  pv.root.geometry(newGeometry='+300+20')
  pv.fsd.pack()
  pv.fsd.mainloop()
  return pv.GUIresult
  #diryGUI

def textGUI(s):
  "display text in window"
  pv.root = Tix.Tk()
  pv.fsd = ScrolledText(pv.root)
  pv.root.geometry(newGeometry='400x200+300+20')
  pv.fsd.text["wrap"]='word'
  pv.fsd.text["font"]=('Courier','12')
  pv.fsd.text['fg']='blue'
  pv.fsd.pack(side='right')
  pv.fsd.text.insert("end",s)
  pv.fsd.mainloop()
  #textGUI

def removeErrFile():
  try:
    os.remove(ErrPath)                  # delete error file
  except:
    pass
  # removeErrFile

def makeErrorFile(msg):
  "make error file for client"
  removeErrFile()
  ff=open(ErrPath,'wt')
  ff.write(msg+'\n')
  ff.close()
  pv.syntaxErr=0
  # makeErrorFile

class parsedVars:
  "structure to hold global variables"
  "DO NOT CHANGE NAMES without checking procedure setIndices() and reflecting"
  "those changes in ALL the Conv....py programs, and calling makeDics() for the"
  "  Start.. and End.. dictionaries"
  "The indices must correspond to those in mainTable"
  "The pointers are initialised to point to the first appended entry"
  "The table may grow adaptively during a run if objects are unrecognised"
  """ NEWLIST for a new object
        enter name in this table
  """
  debug=0
  ff=''
  obf=''
  objDict={#    name                    pointer         appended entries
        0:      ['parent',              2],
        1:      ['Abstract',            2],
        2:      ['Address',             2],
        3:      ['Array',               2],
        4:      ['Author',              2],
        5:      ['Bibliography',        2],
        6:      ['Bibliography label',  2],
        7:      ['Lyxcode',             2],
        8:      ['Caption',             2],
        9:      ['Chapter heading',     2],
        10:     ['spare01',             2],
        11:     ['Comment',             2],
        12:     ['Cross reference',     2],
        13:     ['Date',                2],
        14:     ['Description',         2],
        15:     ['Document structure',  2],
        16:     ['EndObject',           2],
        17:     ['ensuremath',          2],
        18:     ['Enumerated list',     2],
        19:     ['Equation',            2],
        20:     ['Eqn array',           2],
        21:     ['Eqn array numbered',  2],
        22:     ['Eqn inline',          2],
        23:     ['Equation label',      2],
        24:     ['Escape char',         2],
        25:     ['Figure',              2],
        26:     ['Font size',           2],
        27:     ['Font style',          2],
        28:     ['Footnote',            2],
        29:     ['Format',              2],
        30:     ['Fracdenom',           2],
        31:     ['Fraction',            2],
        32:     ['Graphics',            2],
        33:     ['Itemised list',       2],
        34:     ['Label',               2],
        35:     ['List item',           2],
        36:     ['Lyx-style list',      2],
        37:     ['Margin note',         2],
        38:     ['Multicolumn',         2],
        39:     ['Numbered paragraph',  2],
        40:     ['Numbered part',       2],
        41:     ['Numbered section',    2],
        42:     ['Numbered subparagraph',2],
        43:     ['Numbered subsection', 2],
        44:     ['Numbered subsubsection',2],
        45:     ['Object parameters',   2],
        46:     ['Page ref',            2],
        47:     ['Spare2',              2],
        48:     ['Parbox',              2],
        49:     ['Quotation',           2],
        50:     ['Quote',               2],
        51:     ['Right address',       2],
        52:     ['Sideways',            2],
        53:     ['StartObject',         2],
        54:     ['Table',               2],
        55:     ['Table of contents',   2],
        56:     ['Tabular',             2],
        57:     ['Text',                2],
        58:     ['Title',               2],
        59:     ['Unnumbered paragraph',2],
        60:     ['Unnumbered part',     2],
        61:     ['Unnumbered section',  2],
        62:     ['Unnumbered subparagraph',2],
        63:     ['Unnumbered subsection',2],
        64:     ['Unnumbered subsubsection',2],
        65:     ['Unrecognised',        2],
        66:     ['URL',                 2],
        67:     ['Verse',               2],
        68:     ['vspace',              2]      # must be last object
      }
# NEW ITEMS

  standardTypes={'sec': ['Section ',    'self.secRef'],
                 'fig': ['Figure ',     'self.figRef'],
                 'eq':  ['',            'self.getEqn']  # dummy entry
              }
  #aState=0

  test1=0
  test2=0
  syntaxErr=0           # 1 if syntax or exec error, else 0
  isTex=0               # flag for Tex style object such as {\large ... }
  # the variables below are set by setIndices() on initialisation so we are free to
  # ammend the table without upsetting things elsewhere
  StructP=0             # index of 'Document structure' in objDict
  DescrP=0              # index of 'Description' in objDict
  UnrecP=0              # index of 'Unrecognised' in objDict
  FormatP=0             # index of 'Format' in objDict
  TextP=0               # index of 'Text' in objDict
  TTOCP=0               # index of '#Table of contents' in objDict
  EqnP=0                # index of 'Equation' in objDict
  EqnLab=0              # index of 'Equation label' in objDict
  EqnLP=0               # index of 'Eqn inline' in objDict
  EqnArrP=0             # index of 'Eqn array numbered' in objDict
  ArrP=0                # index of 'Array' in objDict
  EscP=0                # index of 'Escape char' in objDict
  ObjP=0                # index of 'Object parameters' in objDict
  SizeP=0               # index of 'Font size' in objDict
  styleP=0              # index of 'Font style' in objDict
  bibEP=0               # index of 'Bibliography label' in objDict
  refP=0                # index of 'Cross reference' in objDict
  StartObP=0            # index of 'StartObject' in objDict
  EndObP=0              # index of 'EndObject' in objDict
  ItemP=0               # index of 'List item' in objDict
  QuoteP=0              # index of 'Quote' in objDict
  QuotnP=0              # index of 'Quotation' in objDict
  eqnLab=''             # pending equation label number yet to be inserted
  itemType=[]           # number or bullet stack for itemised lists

  quotep=[]             # for test of Quote & Quotation indices; set up in setIndices
  nextEntry=0           # index for next new entry in onjDict
  geometry=''           # may contain margins etc.
  citations=[]          # locations of citations for fixup
  references=[]         # locations of references for fixup
  Object=""
  TSp=2                 # running pointer into structure list
  Rwidth=0
  fontChanged=0         # true if end of font change just done, to retain next space
  enumIndent=0          # for enumerated lists
  itemIndent=0          # for itemised lists
  indentation=0         # overall list indentation
  tableRows=0           # table row count
  tableParams=0         # index of table parameters
  param=0               # used by doCite
  Error="Unknown reason" # error message if aborted
  ErrorLine=" not recorded"
  stackDepth=0
  resultsFile=''
  reportFile=''
  objListFile=''
  GUIresult=''          # for result of GUI call
  fsd=None              # for diry GUI
  root=None

  """ ------------------------------------------------------------------------------
        the following dictionaries are constructed on the fly ready for
        internal use when doing post-processing fixups
  ----------------------------------------------------------------------------------
  """
  labelDict={}
  commandDict={}

  def findObj(self,s):
    "find index of object from objDict"
    for i,L in self.objDict.iteritems():
      if s == L[0]:
        return i
    # findObj

  def setIndices(self):
    """ ----------------------------------------------------------------------------
        This sets up pointers to various objects.
        This allows the dictionary to be changed freely without incurring other
          changes.
    --------------------------------------------------------------------------------
    """
    self.Rwidth=0
    self.stackDepth=0
    j=0
    for i,L in self.objDict.iteritems():
      s=L[0]
      j=j+1
      if len(L[0]) > self.Rwidth:
        self.Rwidth=len(s)
      if s == 'Unrecognised':
        self.UnrecP=i
      elif s == 'Document structure':
        self.StructP=i
      elif s == 'Description':
        self.DescrP=i
      elif s == 'Format':
        self.FormatP=i
      elif s == 'Table of contents':
        self.TTOCP=i
      elif s == 'Text':
        self.TextP=i
      elif s == 'Equation':
        self.EqnP=i
      elif s == 'Equation label':
        self.EqnLab=i
      elif s == 'Escape char':
        self.EscP=i
      elif s == 'Object parameters':
        self.ObjP=i
      elif s == 'Font size':
        self.SizeP=i
      elif s == 'Font style':
        self.styleP=i
      elif s == 'Eqn inline':
        self.EqnLP=i
      elif s == 'Bibliography label':
        self.bibEP=i
      elif s == 'Cross reference':
        self.refP=i
      elif s == 'Array':
        self.ArrP=i
      elif s == 'Eqn array numbered':
        self.EqnArrP=i
      elif s == 'StartObject':
        self.StartObP=i
      elif s == 'EndObject':
        self.EndObP=i
      elif s == 'List item':
        self.ItemP=i
      elif s == 'Quote':
        self.QuoteP=i
        self.quotep.append(i)
      elif s == 'Quotation':
        self.QuotnP=i
        self.quotep.append(i)
    self.Rwidth=self.Rwidth+17
    self.nextEntry=j            # next new dictionary index
    # setIndices

  def resetIndices(self):
    "reset start indices in objDict"
    self.TSp=2
    for i,L in self.objDict.iteritems():
      L[1]=2
    # resetIndices

  def extractObj(self):
    "extract next document object"
    try:
      obj=self.objDict[self.StructP][self.TSp]  # get next object type index
    except:
      return None
    i=self.objDict[obj][1]              # get its index pointer
    Object=self.objDict[obj][i]         # get corresponding entry
    self.objDict[obj][1]=i+1            # increment list pointer
    self.TSp=self.TSp+1                 # increment document structure pointer
    return Object
    # extractObj
  # parsedVars

pv=parsedVars() # access global variables with pv.
                # must put () to access methods

pv.setIndices()                                 # set some object indices

def findMatch(s,i,chl,chr):
  "find chr matching chl e.g. for chl='{' and chr='}'"
  while s[i:]:
    if s[i] == chl[0]:
      i=1+findMatch(s,i+1,chl,chr)
      continue
    if s[i] == chr[0]:
      break
    i=i+1
  return i              # if not found return i past end so that s[i:] = ''
  # findMatch

def findEnd(s,tagEnd,ff):
  "find matching tagEnd for begin"
  ss=""
  while not tagEnd in s:
    if '\\begin{' in s:
      i=s.find('\\begin{')
      ss=ss+s[:i+6]
      s2=findEnd(s[i+6:],tagEnd,ff)
      ss=ss+s2[0]
      s=s2[1]
      return ss,s
    ss=ss+s
    try:
      s=ff.readline()
    except:
      return ss,None
  i=s.find(tagEnd)
  L=len(tagEnd)
  return ss+s[:i+L],s[i+L:]
  # findEnd

def findChar(s,ch):
  i=s.find(ch[0])
  if i == -1:
    i=len(s)            # if not found return i past end so that s[i:] = ''
  return i
  # findChar

def findCharRev(s,ch):
  i=s.rfind(ch[0])
  if i<0:
    i=0
  return i              # if not found return i = 0 so that s[:i+1] = ''
  # findCharRev

def parse2char(s1,ch):
    "extract next ch[0]-delimited symbol from s1"
    i=0
    L=len(s1)-1
    while s1[i] != ch[0] and i<L:
      i=i+1
    if s1[i] == ch[0]:
      return s1[:i],s1[i+1:]    #skip char
    return None
    # parse2char

def prlst(L,tags):
  "Needed when we cannot write the mutable list L directly"
  if L == None:
    print 'None'
    return
  for i in L:
    if tags:
      print tags+i+tags+tags
    else:
      print(i)
  #prlst

class parseTex:
  "Parse Latex document"
  #ff1=open('/tmp/nesting','wt')
  objProps='{['
  objPrRt={'{': '}','[':']'}
  PartNum=0
  ChapNum=0
  SectNum=0
  sectApp=0
  chapApp=0
  SubSectNum=0
  SubSubSectNum=0
  ParaNum=0
  SubParaNum=0
  figNum=0
  eqnNum=0
  isTOC=0
  inEqn=0                       # true if in equation
  inEqArr=0                     # true if in equation array
  inArr=0                       # true if in array
  ChapN=''                      # current chapter number
  SectN=''                      # current section number
  SubSectN=''                   # current subsection number
  SubSubSectN=''                # current subsubsection number
  ParaN=''                      # current paragraph number
  secRef=''                     # current sec:label number
  figRef=''                     # current fig:label number


  """ ------------------------------------------------------------------------------
     NEWOBJ enter new object in this table
     goto NEWLIST to enter object in object dictionary
      the table entries are as follows:
        first item      = LaTex keyword for it followed by ':'
        second item     = a list
        head of list      = list of alternative matching end tags for the object;
                            they must all be of the same length and the first
                            one must be that of the object and others of its
                            parent(s) e.g. an \item object is terminated
                            either by another \item object or by \end{<parent>}
                'continue' here means continue with current line and object
                           without recursion i.e. keyword has no list of its own
                'skip'     here means skip (ignore) the keyword entirely and
                           continue with current line and object
                'PoStFiX'  here means re-execute the special handling function
                           after completion as well as before (it must exist!)
                'RetAll'   here means do not skip end tag on return
                           e.g. for \item which is ended by the next instance of it
                           this must be the last entry in the list
        second list entry = an index into objDict to extract the list
                              where the object is be stored
                            this is the index number in objDict
                0           means there is none e.g. for a format object,
        third list entry != 0 if 'StartObject' & 'EndObject' strings required when
                                 object current instance starts & finishes
                1 if blank line to be appended after last object item
                2 otherwise
                        0 otherwise
        fourth list entry  = special handling routine if required
                            this must be written and must have the parameters
                                (self,s,keyEnd,objType)
                            it returns either:
                                1. None if is distinct object neededing recursion, or
                                2. the rest of the line if no recursion required, or
                                3. '\\&' + the rest of the line to skip cells,
                                   but is a distinct object requiring recursion
                            this entry need not exist as it is tested for with
                                        e.g. if x[2:] == []:
                            It is called again after the recursion if 'PoStFiX' is
                              in mainTable[][0]; it MUST then return None unless
                              a string is to be prefixed to the returned line
                              (e.g. for \frac)
                the functions below are re-usable and act as follows:
                  doFormat()    adds format object to dictionary & format list;
                                returns None as is an independent object
                  doFontStyle() adds font style to dictionary & style list;
                                returns None as is an independent object
                  doFontSize()  adds font size to dictionary & style list;
                                returns None as is an independent object
                  doEsc()       adds an escape char e.g. '\\pi' to escape char list
                                in exactly that form i.e. '\\pi' or whatever;
                                returns rest of line after e.g. '\\pi' to continue
                                in the parent object
                  doInsert()    inserts text in parent object e.g. special quotes;
                                returns None as line being parsed is unaltered,
                                but is combined with 'continue' end tag to stay in
                                parent object as is not a distinct object
                  doEqn()       sets state to 'equation' by setting inEqn true on
                                prefix and false on postfix
                                returns None

  ----------------------------------------------------------------------------------
  """
  mainTable={
        # Python hashes this, so it may be large without degrading performance
        # keyword          matching end tag             object  end     special handling
        #                                               index   flag    function
        # the following keywords are for \begin{keyword} objects
        'abstract':     [['\\end{abstract}'],           1,      1],
        'array':        [['\\end{array}','PoStFiX'],    3,      2,      'doArray'],
        'comment':      [['\\end{comment}'],            11,     1],
        'center':       [['\\end{center}','PoStFiX'],   0,      0,      'doFormat'],
        'description':  [['\\end{description}'],        14,     1],
        'enumerate':    [['\\end{enumerate}','PoStFiX'],18,     1,      'doEnum'],
        'equation':     [['\\end{equation}','PoStFiX'], 19,     0,      'doEqnLabel'],
        'eqnarray':     [['\\end{eqnarray}','PoStFiX'], 21,     1,      'doEqnArr'],
        'eqnarray*':    [['\\end{eqnarray*}'],          20,     1,      'doEqn'],
        'figure':       [['\\end{figure}'],             25,     1,      'doFigure'],
        'flushleft':    [['\\end{flushleft}','PoStFiX'],0,      0,      'doFormat'],
        'flushright':   [['\\end{flushright}','PoStFiX'],0,     0,      'doFormat'],
        'itemize':      [['\\end{itemize}','PoStFiX'],  33,     1,      'doItemize'],
        'lyxcode':      [['\\end{lyxcode}'],            7,      1],
        'lyxlist':      [['\\end{lyxlist}','PoStFiX'],  36,     1,      'doLyxlist'],
        'quotation':    [['\\end{quotation}'],          49,     1],
        'quote':        [['\\end{quote}'],              50,     1],
        'sideways':     [['\\end{sideways}'],           52,     2],
        'table':        [['\\end{table}'],              54,     1,      'doCentre'],
        'tabular':      [['\\end{tabular}','PoStFiX'],  56,     1,      'doTabular'],
        'thebibliography': [['\\end{thebibliography}'], 5,      1],
        'verse':        [['\\end{verse}'],              67,     1],
        '\\begin':      [['begin'],                     0,      0], # type found above
        # the following are new keywords we introduce
        '\\fracDenom':  [['}'],                         30,     2], # 2nd part of fraction
        '\\macron':     [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\subscript':  [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\superscr':   [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\tex':        [['}','PoStFiX'],               0,      0,      'doTex'],
        # the following are other keywords in LyX
        '\\[':          [['\\]','PoStFiX'],             19,     1,      'doEqnNoLabel'],
        '\\inlineEqn':  [['$','PoStFiX'],               22,     2,      'doEqn'],
        '\\appendix':   [['\\end{document}','RetAll'],  0,      0,      'doAppendix'],
        '\\author':     [['}'],                         4,      1],
        '\\bibitem':    [['}'],                         6,      1],
        '\\caption':    [['}'],                         8,      1],
        '\\chapter':    [['}'],                         9,      1,      'doChap'],
        '\\char':       [['continue'],                  0,      0,      'doChar'],
        '\\cite':       [['}','PoStFiX'],               0,      0,      'doCite'],
        '\\cline':      [['continue'],                  0,      0,      'doFormat'],
        '\\date':       [['}'],                         13,     1],
        '\\ensuremath': [['}'],                         17,     2],
                #ensuremath applies to keyboard characters like 0xF7 (plus-or-minus)
        '\\footnote':   [['}','PoStFiX'],               28,     2,      'doFootnote'],
        '\\frac':       [['}','PoStFiX'],               31,     2,      'doFrac'],
        '\\hfill':      [['continue',],                 0,      0,      'doFormat'],
        '\\hline':      [['continue'],                  0,      0,      'doFormat'],
        '\\includegraphics': [['}'],                    32,     2],
        '\\item':       [['\\item','\\end{','RetAll'],  35,     1,      'doItem'],
        '\\label':      [['}'],                         34,     0,      'doLabel'],
        '\\lyxaddress': [['}'],                         2,      1],
        '\\lyxarrow':   [['continue'],                  0,      0,      'doLyxarrow'],
        '\\lyxrightaddress': [['}'],                    51,     1],
        '\\maketitle':  [['continue'],                  0,      0],
        '\\marginpar':  [['}','PoStFiX'],               28,     2,      'doFootnote'],
        '\\multicolumn':[['}'],                         38,     2,      'doMulti'],
        '\\newcommand': [['continue'],                  0,      0,      'doNewComm'],
        '\\noindent':   [['continue'],                  0,      0,      'doFormat'],
        '\\nonumber':   [['continue'],                  0,      0,      'doFormat'],
        '\\pageref':    [['}'],                         46,     0],
        '\\pagestyle':  [['}'],                         0,      0,      'doFormat'],
        '\\parbox':     [['}'],                         48,     2,      'doPbox'],
        '\\paragraph':  [['}'],                         39,     1,      'doPara'],
        '\\paragraph*': [['}'],                         59,     1],
        '\\quotesinglbase': [['continue'],              0,      0,      'doInsert'],
        '\\subparagraph': [['}'],                       42,     1,      'doSubPara'],
        '\\subparagraph*': [['}'],                      62,     1],
        '\\part':       [['}'],                         40,     2,      'doPart'],
        '\\part*':      [['}'],                         60,     1],
        '\\ref':        [['}'],                         12,     0],     # not 1
        '\\section':    [['}'],                         41,     1,      'doSect'],
        '\\section*':   [['}'],                         61,     1],
        '\\subsection': [['}'],                         43,     1,      'doSubsect'],
        '\\subsection*': [['}'],                        63,     1],
        '\\subsubsection': [['}'],                      44,     1,      'doSubSubsect'],
        '\\subsubsection*': [['}'],                     64,     1],
        '\\tableofcontents':[['continue'],              55,     0,      'doTOC'],
        '\\tabularnewline': [['continue'],              0,      0,      'doFormat'],
        '\\textasciicircum':[['continue'],              0,      0,      'doInsert'],
        '\\textasciitilde':[['continue'],               0,      0,      'doInsert'],
        '\\textbackslash':  [['continue'],              0,      0,      'doInsert'],
        '\\textcompwordmark':  [['continue'],           0,      0,      'doInsert'],
        '\\title':      [['}','PoStFiX'],               58,     1,      'doTitle'],
        '\\underbar':   [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\url':        [['}'],                         66,     2],
        '\\vspace':     [['}'],                         68,     0],
        # the following are font styles
        '\\emph':       [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\mathbb':     [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\mathbf':     [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\mathcal':    [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\mathfrak':   [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\mathit':     [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\mathrm':     [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\mathtt':     [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\noun':       [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\textbf':     [['}','PoStFiX'],               0,      0,      'doFontStyle'], # bold
        '\\textsf':     [['}','PoStFiX'],               0,      0,      'doFontStyle'], # sans serif
        '\\textrm':     [['}','PoStFiX'],               0,      0,      'doFontStyle'], # Roman
        '\\texttt':     [['}','PoStFiX'],               0,      0,      'doFontStyle'], # teletype
        '\\textit':     [['}','PoStFiX'],               0,      0,      'doFontStyle'], # italic
        '\\textsl':     [['}','PoStFiX'],               0,      0,      'doFontStyle'], # slanted
        '\\textsc':     [['}','PoStFiX'],               0,      0,      'doFontStyle'], # small caps
        # the following are character accents
        '\\acute':      [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\bar':        [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\breve':      [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\c':          [['}','PoStFiX'],               0,      0,      'doFontStyle'], # cedilla
        '\\check':      [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\circumflex': [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\ddot':       [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\dot':        [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\grave':      [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\hat':        [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\tilde':      [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\vec':        [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\H':          [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\widehat':    [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\widetilde':  [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\overbrace':  [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\overrightarrow': [['}','PoStFiX'],           0,      0,      'doFontStyle'],
        '\\overline':   [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\underbrace': [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        '\\underline':  [['}','PoStFiX'],               0,      0,      'doFontStyle'],
        # below are not in StarMath
        '\\overleftarrow': [['}','PoStFiX'],            0,      0,      'doFontStyle'],
        '\\underleftarrow':     [['}','PoStFiX'],       0,      0,      'doFontStyle'],
        '\\underleftrightarrow': [['}','PoStFiX'],      0,      0,      'doFontStyle'],
        '\\overleftrightarrow': [['}','PoStFiX'],       0,      0,      'doFontStyle'],
        '\\underrightarrow':    [['}','PoStFiX'],       0,      0,      'doFontStyle'],
        # the following are font sizes
        '\\tiny':       [['}','PoStFiX'],               0,      0,      'doFontSize'],
        '\\scriptsize': [['}','PoStFiX'],               0,      0,      'doFontSize'],
        '\\footnotesize':[['}','PoStFiX'],              0,      0,      'doFontSize'],
        '\\small':      [['}','PoStFiX'],               0,      0,      'doFontSize'],
        '\\large':      [['}','PoStFiX'],               0,      0,      'doFontSize'],
        '\\Large':      [['}','PoStFiX'],               0,      0,      'doFontSize'],
        '\\LARGE':      [['}','PoStFiX'],               0,      0,      'doFontSize'],
        '\\huge':       [['}','PoStFiX'],               0,      0,      'doFontSize'],
        '\\Huge':       [['}','PoStFiX'],               0,      0,      'doFontSize'],
        # the following keywords are for Tex style syntax where '{' has been removed
        '\\displaystyle': [['}','PoStFiX'],             0,      0,      'doFontSize'], # NOT doFontStyle
        '\\textstyle': [['}','PoStFiX'],                0,      0,      'doFontSize'],
        '\\scriptstyle': [['}','PoStFiX'],              0,      0,      'doFontSize'],
        '\\scriptscriptstyle': [['}','PoStFiX'],        0,      0,      'doFontSize'],
        # the following keywords are ignored altogether
        '\\lines':      [['skip'],                      0,      0],
        '\\par':        [['skip'],                      0,      0],
        '\\switch':     [['skip'],                      0,      0]
        }
# NEW ITEMS

  def parseDoc(self,document):
    try:
      self.fd=open(document,'r')
    except:
      pv.Error="Could not open file: "+document
      return 0
    currentObj=self.parseObj('\\documentclass',0)       # may contain font size
    pv.geometry=self.parseObj('\\geometry',0)           # get document properties
    pv.geometry=pv.geometry+currentObj
    currentObj=self.parseObj('\\documentclass',1)
    self.checkFont(currentObj)
    currentObj=self.parseObj('\\usepackage{setspace',1)
    if currentObj:
      self.setSpacing(currentObj)
    currentObj=self.parseObj('\\begin{document}',0)     # find beginning
    line = self.fd.readline()                           # read next line of text
    self.context_stack=[pv.TextP]                       # initialise stack
    s=self.parseBody(line,['\\end{document}'])          # parse body of document
    if s[:7] == "\\\\Abort":
      pv.Error=s[8:]
    else:
      pv.Error=None
    self.fd.close()                                     # close LayTex file
    #self.ff1.close
    return 1
    # parseDoc

  def parseObj(self,thing,returnNext):
    "parse thing from file; reposition file if not found"
    L=len(thing)
    line=""
    self.fd.seek(0)
    while True:
      try:
        line = self.fd.readline()
      except:
        return ''                       # let later call raise an error to avoid
        #                                 many messy tests etc.; also a non-None
        #                                 return value can give wrong results
        #                                 if there is an error
      if len(line) == 0:                #if not found
        return ''
      if line[0] == '%':                # skip comment lines
        continue
      if thing == line[:L]:             # thing assumed to be at start of line
        if returnNext:
          line = self.fd.readline()
          return line
        return line[L:]
    return ''
    # parseObj

  def checkFont(self,s):
    if 'palatino' in s:
      fn='Palatino'
    elif 'helvet' in s:
      fn='Helvetica'
    elif 'avant' in s:
      fn='Avant Garde Gothic'
    elif 'newcent' in s:
      fn='New Century Schoolbook'
    elif 'bookman' in s:
      fn='Bookman'
    else:
      return
    pv.geometry=pv.geometry+' font='+fn+','
    # checkFont

  def setSpacing(self,s):
    "set line spacing"
    if 'double' in s:
      ls='200,'
    elif 'onehalf' in s:
      ls='50,'
    elif 'setstretch' in s:
      ls=s[12:13]+'00,'
    else:
      return
    pv.geometry=pv.geometry+' line-spacing='+ls
    # setSpacing

  def expandCommand(self,line,keyEnd):
  # must recursively parse arguments, substitute in ss
    macro=pv.commandDict[line[:keyEnd]]         # then get macro
    obj=line[keyEnd:]                           # and actual object
    while macro[0] == '[':
        j=findMatch(obj,1,'{','}')
        aarg=obj[1:j]                           # next actual argument
        obj=obj[j+1:]                           # & go past it in obj
        farg='#'+macro[1]                       # next formal argument
        while farg in macro:
          i=macro.find(farg)                    # find it
          macro=macro[:i]+aarg+macro[i+2:]      # splice aarg into copy of macro
        macro=macro[3:]                         # skip [n]
    macro=macro[1:-1]                           # remove { and }
    i=macro.find('\\')
    if i != -1:
      macro=macro.replace('\\','')
    return macro,obj
    # expandCommand

  def testMaths(self,line,keyEnd):
    s=line[:keyEnd]
    if s in mathsSymbol or s in escChar:
      self.doEsc(s,keyEnd,0)
      return keyEnd
    elif s in mathsBrackets:
      n=1
      if line[keyEnd] in brackets:
        pass
      elif line[keyEnd] == '\\':
        if line[keyEnd+1] == '{' or line[keyEnd+1] == '}': # { } handled as esc char
          n=2
        else:
          while line[keyEnd+n].isalpha():       # LayTex follows lceil etc with space
            n=n+1
          if not line[keyEnd+1:keyEnd+n] in mathsBr:    # check for \lceil etc.
            n=1                                         # restore n if not there
      self.doEsc(line,keyEnd+n,0)
      return keyEnd+n
    return 0
    # testMaths

  def augmentTables(self,s,type):
    "add unrecognised object to tables"
    "if type != 'begin' then it is e.g. '}'"
    n=pv.nextEntry
    pv.objDict[n] = ['New object = '+s, 2]
    if type == 'begin':
      self.mainTable[s] = [['\\end{'+s+'}'],n,1]
    else:
      self.mainTable[s] = [[type],n,0]
    pv.nextEntry=n+1
    return self.mainTable[s]
    # augmentTables

  def getline(self):
      "get next significant line"
      line=''
      while not line[:1] or line[:1] == '\n' or line[0] == '%':
                                # this works ok for null string
        try:
          line = self.fd.readline()
          if not line:                  # this only occurs on EOF
            pv.stackDepth=len(self.context_stack)
            return "\\\\Abort Premature end of document"
        except:
          return "\\\\Abort Read file failed"
        if line[:1] == '\n':
          return '\\\\'         # this gets fixed-up during post-processing
      return line
      # getline

  def parseParam(self,line,endTag):
    "parse out [] or {} bracketted parameters"
    chl=line[0]
    chr=self.objPrRt[chl]
    i=findMatch(line,1,chl,chr)
    if i == 1:
      return line[2:]
    while not line[i:]:
      ss=line
      if ss[-1] == '\n':
        ss=ss[:-1]
      line=self.getline()
      if line[:7] == '\\\\Abort':
        return line
      line=ss+line
      i=findMatch(line,1,chl,chr)
    pv.objDict[pv.ObjP].append(line[:i+1]) # append prefix to list
    pv.objDict[pv.StructP].append(pv.ObjP) # & append prefix type to struc list
    if endTag[0] == '}':
      i=line.find('{')
      line=line[i+1:]
    else:
      line=line[i+2:]
    return line
    # parseParam

  def storeObjText(self,s):
    "store text s in current object, and enter type in document structure list"
    "if s consists only of spaces it is discarded unless it immediately follows"
    "a font change or escape char; other instances are redundant and ruin layout"
    if not s:
      return
    if s.isspace() and not pv.fontChanged:
      return
    objType=self.context_stack[-1]              # get object type from stack
    pv.objDict[objType].append(s)               # append it to that list
    pv.objDict[pv.StructP].append(objType)      # append type index to structure list
    pv.fontChanged=0
    # storeObjText

  def parseBody(self,line,endTag):
    """ ----------------------------------------------------------------------------
       procedure to read the body of the document;
       'line' is the current line to parse
       'endTag' is a list of end-of-object tags e.g. [\end{equation}]
       it extracts the plain text portions at its depth and parses nested objects
         encountered by means of recursion;
       It is table-driven by the dictionary mainTable{} as follows for each item:
       1        the first item is the list of alternative end tags
       2        the second item is an index into objDict to extract the index of
                the list where the object is to be stored
       3        the third item != 0 if 'StartObject' & 'EndObject' required
       4        the fourth item if present is a pre-processing function to be
                    executed prior to recursion, and possibly after
    --------------------------------------------------------------------------------
        On entry we check for special start e.g. object params, null object etc.
    """
    if line[0] == '}':                             # if null-content new object e.g.
      s=pv.objDict[self.context_stack[-1]][0][14:] # LyX{}, then get its name
      objType=self.context_stack[-2]               # & parent object index
      pv.objDict[objType].append(s)                # and append it to that list
      pv.objDict[pv.StructP].append(objType)       # append index to structure list
      pv.fontChanged=1                  # retain space if followed by font change
      return line[1:]                              # & return rest of line
    tagLen=len(endTag[0])
    if line[0] in self.objProps and line[1] != '\\':
      if self.inEqn and not self.inArr:
        pass
                # this prevents part of an equation being treated wrongly
                # as parameters, which loses a '{' from the equation
      else:
        """
        if special prefix exists e.g. \vspace{0.3cm}
                or \begin{tabular}{|c||c|c|c|c|}
                in this case | means provide vertical table line
                and each c represents one column
                thus {c|c} provides only one central vertical dividing line
                \hline defines a horizontal line in the table
        """
        if line[2] == '}' and not line[1] in 'clrp':    # e.g. line[:3] == {*}
          line=line[1]+line[3:]
        else:
          line=self.parseParam(line,endTag)
    pv.ErrorLine=line

    """ ---------------------------------------------------------------------------
        main loop, ended when the end tag is encountered
    """

    while not line[:tagLen] in endTag:
      para=''
      if not line[:1]:
        line=self.getline()
        if line[:7] == '\\\\Abort':
          return line
        if pv.fontChanged:
          para=' '
          pv.fontChanged=0
      i=0

      """ --------------------------------------------------------------------------
        inner loop which extracts current object chars until the keyword for a
        nested object is encountered
      """

      while line[i] != '\\':            # look for next keyword
        if line[i:i+tagLen] in endTag:          # if possible end of object
          break                                         # then do more checks
        if ord(line[i]) > 0x9f:                 # if is latin1
          line=line[:i]+'\\latin'+line[i:]              # then make into escape char
          # e.g. latin char 0xb1 becomes '\\latin\xb1'
          break
        if line[i] in exceptions:               # if in '{&$^_`\'\n'
          if line[i:i+2] in pairs:                      # check for {} `` '' etc.
            ss=pairs[line[i:i+2]]
            if ss == '\\endash{}' and line[i+2:] and line[i+2] == '-':
              ss='\\emdash{}'
              line=line[:i]+ss+line[i+3:]
            else:
              if ss == '\\quoteds{}':           # if '``'
                if not i:                               # then check if leading
                  pass                                  # or trailing
                elif not line[i+2:] or line[i+2].isspace() or not line[i-1].isspace():
                  i=i+2                                 # retain if trailing
                  continue
              line=line[:i]+ss+line[i+2:]               # else replace
            if not ss or ss[1] != 's':                  # if null or escape symbol
              continue                                  # then continue
            break                                       # else break
          if line[i] == '~':                    # if hard space
            line=line[:i]+'\\hardspace{}'+line[i+1:]    # trailing [] delimits it
                                                        # in case line[i+1] is alpha
                                                        # char; [] is removed later
            break
          if line[i] == '%':                    # if footnote/caption bracket
            line=line[:i]+line[i+1:]                    # then remove it
            continue
          if line[i] == '{':
                                                        # must come after '{}' check
            if line[i+2] == '}':                # if e.g. {*}
              line=line[:i]+line[i+1]+line[i+3:]        # then strip {}
            elif not self.inEqn:                # else if Tex style object is next
              line=line[:i]+'\\tex{}'+line[i+1:]        # then handle it
            else:
              i=i+1                                     # else continue
            continue
          if line[i] == '&':                    # if end of table cell
            para=para+line[0:i]                         # then insert in para
            self.storeObjText(para)
            para=''
            line=line[i+1:]                             # and skip
            i=0
            self.doFormat('&',1,0)                      # insert format item
            continue
          if line[i] == '\'':
            line=line[:i]+'\\quotes{}'+line[i+1:]       # single quote
            continue
          if line[i] == '<':
            line=line[:i]+'\\lt{}'+line[i+1:]   # for Office: < has XML significance
            continue
          if line[i] == '>':
            line=line[:i]+'\\gt{}'+line[i+1:]   # for Office: > has XML significance
            continue
          if line[i] == '$':                    # if inline equation
            line=line[:i]+'\\inlineEqn  '+line[i+1:]
            continue
          if line[i] == '\n':                   # if end of line
            if self.context_stack[-1] in pv.quotep:
              para=para+line                            # keep '\n' for quotes
            else:
              para=para+line[:i]+' '                    # else discard
              if pv.fontChanged:
                pv.fontChanged=0
              else:
                if para.isspace():
                  para=''
            i = -1
            line=self.getline()                         # get next
            if line[:7] == '\\\\Abort':
              return line
            if line == '\\\\' and self.context_stack[-1] == pv.ItemP: #if end item
              line=endTag[0]                    # then set line to major end tag
              endTag=endTag[:-1]                # and remove 'RetAll'
                                # this causes the item to be ended properly
                                # as '\n\n' as well as endTag can end a list item
              i=0
              break
        i=i+1

      """ -------------------------------------------------------------------------
        end of inner while loop: a nested object has been found, or the end of
        the current obect, or an escape character;
        next append outstanding text to para
        and check for end of object or escape char
      """

      if i and line[i:i+4] == '\\url':
        line=line[i:i+4]+'['+line[:i]+']'+line[i+4:]
        # re-splice tag as parameter, as it precedes '\\url'
        i=0
      if i:                                     # if text in line
        para=para+line[:i]                              # include it
        line=line[i:]                                   # & set line past it
      self.storeObjText(para)
      para=''
      if line[:tagLen] in endTag:               # if possible end of object
        break                                        # else finish this object
      if line[1] in escChars:                   # if escape char
        self.doEsc(line,2,0)                            # put in that list
        line=line[2:]                                   # & skip it
        continue
      if line[:6] == '\\latin':                 # if latin1 char
        self.doEsc(line,7,0)                            # then enter in escape list
        line=line[7:]
      else:                             # else handle nested object

        """ -----------------------------------------------------------------------
          fall through here if embedded object to be parsed;
          first parse object keyword e.g. \item or \begin{title}
        """

        keyEnd=1
        while line[keyEnd] in alpha:            # find end of keyword
          keyEnd=keyEnd+1
        if keyEnd == 1:
          if line[1] in charAccents:            # if char accent
            keyEnd=accentDict[line[:2]][1]      # then set keyend from dictionary
            line=accentDict[line[:2]][0]+line[2:]# and replace keyword from dictionary
          elif line[1] == '[':
                # if char accent or un-numbered equation ('[' not in alpha)
            keyEnd=keyEnd+1                     # then increment keyEnd
        if line[keyEnd] == '*':                 # e.f. \section*
          keyEnd=keyEnd+1
        if line[keyEnd] == '{':                 # if next char is '{'
          rightB=findMatch(line,keyEnd+1,'{','}') # then find matching '}'
          if not line[rightB:]:                 # if not found
            rightB=keyEnd+1                     # then set rightB = keyEnd+1
          embedStart=keyEnd+1                   # embedded text starts after '{'
        else:
          rightB=keyEnd
          embedStart=keyEnd                     # embedded text starts at keyEnd
        # keyEnd is now index of char after end of keyword
        # embedStart is index of first char in embedded text
        # rightB is now either index of '}' or = keyEnd+1

        """------------------------------------------------------------------------
                now obtain matching end tag from mainTable
                  directly if not \begin, else x.. in \begin{x..};
                  if not in mainTable:
                           check if special maths-style escape char,
                        or starts with '\\' (non-keyword)
                        or is not recognised
        """

        try:
            mtEntry=self.mainTable[line[:keyEnd]]       # get matching end tag
        except:                                 # if not found
            i=self.testMaths(line,keyEnd)
                        # do this here so tests not done for every keyword
            if i:
              line=line[i:]
              continue
            elif line[:keyEnd] in pv.commandDict: # if in command dictionary
              m=self.expandCommand(line,keyEnd)    # then expand it
              self.storeObjText(m[0])              # put result in current object
              line = m[1]                                  # and set line past it
              continue
            elif line[keyEnd] == '{':           # if starts with '{'
              if line[:keyEnd] == '\\':         # if '\\' is not start of an object
                line=line[1:]                   # then skip it
                continue                                # and do rest of line as text
              mtEntry=self.augmentTables(line[:keyEnd],'}')# add to tables
            else:
              if line[:keyEnd] == '\\':         # if keyword = '\\'
                line=line[1:]                           # then skip it
                continue                                        # and do rest of line
              line=self.storeUnrecog(line,keyEnd) # else not recognised
              continue
        if 'begin' in mtEntry[0]:
          s=line[keyEnd+1:rightB]               # assume '}' found in this case
          try:
            mtEntry=self.mainTable[s]           # get table entry
          except:                               # else not recognised
            mtEntry=self.augmentTables(s,'begin')       # so add to tables
          embedStart=rightB+1

        """-------------------------------------------------------------------------
                now get object index and check for special handling requirements
                  prior to recursion;
                the pre-processing function names are in the main table dictionary
                  and are executed with the exec() function
        """

        objType=mtEntry[1]                      # look up object index number
        if mtEntry[2]:                          # if object start to be stored
          currObj=objType                               # then keep type for EndObject
          self.doObject(0,objType)                      # and store it
        if mtEntry[3:]:                 # if special handling required
          sphKey=line #[:keyEnd]                                # (special handling key needed later)
          cmnd='noMore=self.'+mtEntry[3]+'(line,keyEnd,'
          if objType<1:                         # if not independent object
            cmnd=cmnd+str(self.context_stack[-1])+')'   #then put results in parent
          else:
            cmnd=cmnd+'objType)'                # else is new object
          exec(cmnd)                            # do special handling
          if noMore != None:                    # if value returned
            if noMore[:2] != '\\&':                     # then if not separate object
              line=noMore                               # then set it in line
              continue                                  # & skip the rest
            else:
              line=noMore[2:]                   # else continue, skipping '\\&'
              embedStart=0
        if not line[embedStart:]:               # if at end of line
          embedStart = 0                                # then set embedStart for all the new line
          line=self.getline()
          if line[:7] == '\\\\Abort':
            return line
          if pv.fontChanged:
            line=' '+line
            pv.fontChanged=0
        # at this point embedStart points to where the new object text starts
        if 'skip' in mtEntry[0]:                # if object not wanted
          line=line[keyEnd:]                            # then skip entirely, unlike
          continue                                      # 'continue' which goes past any '}'
        if 'continue' in mtEntry[0]:            # if not embedded object
                                                        #then continue with current one
          if line[embedStart:] and line[embedStart] == '}':
            embedStart=embedStart+1             # do not write '}' in text
          line=line[embedStart:]                # for null content object
          continue

        """ -----------------------------------------------------------------------
                now push object type on stack
                and execute recursive call to parse embedded object
                and then honour any post-processing requirements
        """

        if objType<1:                           # e.g. for formatting objects
          self.context_stack.append(self.context_stack[-1]) # push current type
        else:                                   # else set new
          self.context_stack.append(objType)    # type of object (to store text in)
        line=self.parseBody(line[embedStart:],mtEntry[0])       # parse object
        if mtEntry[2]:                  # if end-of-object notification needed
          self.doObject(-mtEntry[2],currObj)    # then provide it!
        self.context_stack.pop()                # pop stack
        if line[:7] == "\\\\Abort":
          return line
        if 'PoStFiX' in mtEntry[0]:     # if post processing required
          exec('noMore=self.'+mtEntry[3]+'(sphKey,keyEnd,-100)') # then repeat special handling
          if noMore != None:
            line=noMore+line                    # e.g. for '\\fracDenom'
      # now continues in main while loop

    """ ---------------------------------------------------------------------------
        end of main loop
        return whole line if end tag is that of parent or is own delimiter
        else return rest of line after end tag
    """

    if line[:tagLen] != endTag[0] or 'RetAll' in endTag:
                # if ended on parent end tag or is own delimiter e.g. '\\item'
      return line                               # then return that tag for parent
    return line[tagLen:]                        # else return rest of line
  # parseBody

  def storeUnrecog(self,s,keyEnd):
    "store unrecognised object of type '\\x'"
    pv.objDict[pv.UnrecP].append(s[:keyEnd])            # store as unrecognised
    pv.objDict[pv.StructP].append(pv.UnrecP)            # & add to document structure
    return s[1:]                                        # skip '\\'
    # storeUnrecog

  def doObject(self,i,objType):
    "include start and end of object items, & blank lines if required"
    if i < 0:
      pv.objDict[pv.StructP].append(pv.EndObP)
      pv.objDict[pv.EndObP].append(pv.objDict[objType][0])
    else:
      pv.objDict[pv.StructP].append(pv.StartObP)
      pv.objDict[pv.StartObP].append(pv.objDict[objType][0])
    # doObject

  def doTitle(self,s,keyEnd,stage):
    "insert font info for title"
    self.doFontSize('\\huge',6,stage)
    self.doFontStyle('\\textsf',7,stage)
    return None
    # doTitle

  def doFrac(self,s,keyEnd,stage):
    "handle fraction"
    if stage != -100:
      return None                       # do nothing on first pass
    return '\\fracDenom'                # incorporate special function on 2nd pass
                                        # to handle denominator
    # doFrac

  def doNewComm(self,s,keyEnd,objType):
    "parse out e.g. macro command and enter in dictionary"
    i=s.find('}')                       # skip command name
    ss=s[keyEnd+1:i]                    # ss = command name e.g. \macro
    j=i
    while s[j+1] == '[':
      j=s.find(']')                     # skip args
    j=findMatch(s,j+2,'{','}')          # skip body
    pv.commandDict[ss]=s[i+1:j+1]       # enter in dictionary
    return s[j+1:]                      # and return rest of line
    # doNewComm

  insertDict={
        '\\ldots':              ". . .",
        '\\textasciicircum':    "^",
        '\\quotesinglbase':     ",",
        '\\textbackslash':      "\\",
        '\\textasciitilde':     "~",
        '\\textcompwordmark':   " "     # ligature break
        }

  def doItem(self,s,keyEnd,objType):
    "insert number or bullet before an item if required (called by \item)"
    if len(pv.itemType) == 0:
      return None
    pv.objDict[objType].append(spaces[:pv.indentation]+pv.itemType[-1]+' ')
                                                # append list itemizer to object
    pv.objDict[pv.StructP].append(objType)      # and object to structure list
    s=pv.itemType[-1]
    if s[:1].isdigit():                 # if not bulleted list
      s=str(eval(s[:-1])+1)+'.'
    elif s[0] == '(':
      i=alpha.find(s[1])
      s='('+alpha[i+1]+')'
    elif s[:-1] in roman:
      s=roman[s[:-1]]+'.'
    elif s[0] in alpha:
      i=alpha.find(s[0])
      s=alpha[i+1]+'.'
    pv.itemType[-1]=s
    return None
    # doItem

  def doLyxlist(self,s,keyEnd,stage):
    "initiate Lyx-style list indentation for nesting purposes"
    if stage == -100:
      pv.indentation=pv.indentation-2
    else:
      pv.indentation=pv.indentation+2
    # doLyxlist

  def doItemize(self,s,keyEnd,stage):
    "initiate itemised list"
    if stage == -100:
      pv.itemType.pop()
      pv.indentation=pv.indentation-2
      pv.itemIndent=pv.itemIndent-2
    else:
      pv.indentation=pv.indentation+2
      pv.itemIndent=pv.itemIndent+2
      pv.itemType.append(itemBullet[pv.itemIndent])
    # doItemize

  def doEnum(self,s,keyEnd,stage):
    "initiate enumerated list"
    if stage == -100:
      pv.itemType.pop()
      pv.enumIndent=pv.enumIndent-2
      pv.indentation=pv.indentation-2
    else:
      pv.enumIndent=pv.enumIndent+2
      pv.indentation=pv.indentation+2
      pv.itemType.append(enumType[pv.enumIndent])
    # doEnum

  def doInsert(self,s,keyEnd,objType):
    "insert implied text or char in object"
    ss=self.insertDict[s[:keyEnd]]
    pv.objDict[objType].append(ss)              # enter char in object text
    pv.objDict[pv.StructP].append(objType)      #& add to document structure
    return None
    # doInsert

  def doTOC(self,s,keyEnd,objType):
    pv.objDict[pv.StructP].append(pv.TTOCP)     # & add to document structure
    self.isTOC=1                                # flag to construct this
    return None
    # doTOC

  def doPbox(self,s,keyEnd,objType):
    "a parbox has two cells e.g. [t]{3cm}"
    "parse them out "
    i=9+findChar(s[8:],']')                     # find end of first cell
    i=i+2+findChar(s[i:],'}')                   # find end of second cell
    pv.objDict[pv.ObjP].append(s[:i])           # store result in object param list
    pv.objDict[pv.StructP].append(pv.ObjP)      # & add to document structure
    return '\\&'+s[i:]                          # preceding \\& means do recursion
    # doPbox

  def doMulti(self,s,keyEnd,objType):
    "a multicolumn has two parameter cells e.g. {1}{|c|}"
    "parse them out here & return rest"
    i=14+findChar(s[13:],'}')                   # find end of first cell
    i=i+1+findChar(s[i:],'}')                   # find end of second cell
    pv.objDict[pv.ObjP].append(s[12:i])         # store result in object param list
    pv.objDict[pv.StructP].append(pv.ObjP)      # & add to document structure
    return '\\&'+s[i+1:]                        # prefix to continue with recursion
    # doMulti

  def doChar(self,s,keyEnd,stage):
    "e.g. \\char`\\x{} where s[keyEnd]='x'"
    if s[keyEnd:keyEnd+2] != '`\\':
      return None
    return s[keyEnd+2]+s[keyEnd+5:]     # splice out char itself and {}
    # return result to stay in current object
    # doChar

  def doEsc(self,s,keyEnd,objType):
    "for chars like pi denoted by '\\pi'"
    ss=s[:keyEnd]
    pv.objDict[pv.EscP].append(ss)
    pv.objDict[pv.StructP].append(pv.EscP)
    pv.fontChanged=1
    return s[keyEnd:]                   # no separate object so continue after it
    # doEsc

  def doLyxarrow(self,s,keyEnd,objType):
    "lyxarrow has null content body, so must be handled specially"
    self.doEsc('\\lyxarrow',9,0)
    return None
    # doLyxarrow

  fsDict={
        '\\tiny':               'tiny',
        '\\scriptsize':         'scriptsize',
        '\\footnotesize':       'footnotesize',
        '\\small':              'small',
        '\\large':              'large',
        '\\Large':              'larger',
        '\\LARGE':              'largest',
        '\\huge':               'huge',
        '\\Huge':               'huger',
        '\\displaystyle':       'mathsDisp',    # this IS actually a size (relative)
        '\\textstyle':          'mathsText',
        '\\scriptstyle':        'mathsScript',
        '\\scriptscriptstyle':  'mathsSScript',
        '\\default':            'default'
        }

  def doFontSize(self,s,keyEnd,stage):
    ss=self.fsDict[s[:keyEnd]]
    pv.objDict[pv.StructP].append(pv.SizeP)
    if stage == -100:
      pv.objDict[pv.SizeP].append('end '+ss)
      pv.fontChanged=1
    else:
      pv.objDict[pv.SizeP].append(ss)
    return None
    # doFontSize

  def doFontStyle(self,s,keyEnd,stage):
    i=s.find('{')
    if s[i+1] == '}':           # if LaTex error (null content)
      return s[i+2:]            # then omit altogether
    ss=s[:keyEnd]+'{'
    pv.objDict[pv.StructP].append(pv.styleP)
    if stage == -100:
      pv.objDict[pv.styleP].append('end '+ss)
      pv.fontChanged=1
      return None
    else:
      pv.objDict[pv.styleP].append(ss)
    return '\\&'+s[i+1:]                        # skip { ; is separate object
    # doFontStyle

  # changes in this dictionary must be reflected in ALL Conv...py programs
  # where the DO functions handle them
  fmtDict={
        '\\hline':              'hline',
        '\\cline':              'cline',
        '\\hfill':              'hfill',
        '\\noindent':           'noindent',
        '\\nonumber':           'nonumber',
        '\\tabularnewline':     'tabularnewline',
        'center':               'centre',
        '\\pagestyle':          'pagestyle',
        'flushleft':            'leftAlign',
        'flushright':           'rightAlign',
        '&':                    'endCell'
        }

  def doFormat(self,s,keyEnd,stage):
    if s[:keyEnd] == '\\begin':
      i=s.find('}')
      ss=s[7:i]                 # 'center', 'flushleft' or 'flushright'
    else:
      ss=s[:keyEnd]
    ss=self.fmtDict[ss]
    if stage == -100:
      ss='end'+ss
    pv.objDict[pv.FormatP].append(ss)
    pv.objDict[pv.StructP].append(pv.FormatP)
    if ss == 'tabularnewline':
      pv.tableRows=pv.tableRows+1
    elif ss == 'vspace':
      i=9+findChar(s[8:],'}')                   # find end of cell
      pv.objDict[pv.ObjP].append(s[7:i])        # store result in object param list
      pv.objDict[pv.StructP].append(pv.ObjP)    # & add to document structure
      return '\\&'+s[i+1:]                      # prefix to continue with recursion
    return None
    # doFormat

  def TOCItem(self,obj):
    "stores ref index to where next TOC item is stored"
    "we must convert this during post-processing as subsequent parsing"
    "of the heading is necessary e.g. for formats"
    "we do not reflect such decorations in the TOC"
    pv.objDict[pv.TTOCP].append(len(pv.objDict[pv.StructP])-1)
    pv.objDict[pv.TTOCP].append(len(pv.objDict[obj])-1)
    # TOCItem

  def doFootnote(self,s,keyEnd,stage):
    "insert font size change"
    pv.objDict[pv.StructP].append(pv.SizeP)
    ss='footnotesize'
    if stage == -100:
      ss='end '+ss
    pv.objDict[pv.SizeP].append(ss)
    if stage == -100:
      return None
    i=s.find('{')
    return '\\&'+s[i+1:]
    # doFootnote

  def doTabular(self,s,keyEnd,stage):
    "add row count to parameters"
    if stage == -100:
      s=pv.objDict[pv.ObjP][pv.tableParams]
      s=s+' '+str(pv.tableRows)
      pv.objDict[pv.ObjP][pv.tableParams]=s     # append row count
      self.inArr=self.inArr-1
    else:
      pv.tableRows=0
      pv.tableParams=len(pv.objDict[pv.ObjP])
      self.inArr=self.inArr+1
    #  doTabular

  def doTex(self,s,keyEnd,stage):
    "handle expression enclosed in {}"
    if stage == -100:
      pv.isTex=pv.isTex-1
    else:
      pv.isTex=pv.isTex+1
    # doTex

  def doCentre(self,s,keyEnd,stage):
    "insert centre format"
    self.doFormat('\\begin{center}',6,0)
    # doCentre

  def getEqn(self):
    return '('+str(self.eqnNum)+')'
    # getEqn

  def doEqnArr(self,s,keyEnd,stage):
    "set labelling for equation array"
    self.doEqn('',0,stage)
    if stage == -100:
      self.inEqArr=self.inEqArr-1
    else:
      self.inEqArr=self.inEqArr+1
    # doEqnArr

  def doEqn(self,s,keyEnd,stage):
    "set state to 'in equation'"
    if stage == -100:
      self.inEqn=0
    else:
      self.inEqn=1
    return None
    # doEqn

  def doEqnLabel(self,s,keyEnd,stage):
    "fix label cross reference for equation"
    "N.B. every \begin{equation} formula is labelled regardless of whether or not"
    "the Lyx document has defined a cross reference label for it"
    "the latter is handled by doLabel()"
    self.doEqn('',0,stage)
    if stage == -100:
      self.eqnNum=self.eqnNum+1
      Ln='('+str(self.eqnNum)+')'
      pv.objDict[pv.EqnLab].append(Ln)          # insert label number in list
      pv.objDict[pv.StructP].append(pv.EqnLab)
      self.doObject(-1,pv.EqnP)
        # we handle start and end object here to include label before concluding
        # blank line
    else:
      self.doObject(0,pv.EqnP)
    return None
    # doEqnLabel

  def doEqnNoLabel(self,s,keyEnd,stage):
    "ensure a blank line is inserted before an unlabelled equation"
    self.doEqn('',0,stage)
    return None
    # doEqnNoLabel

  def doLabel(self,s,keyEnd,objType):
    "set up label reference dictionary"
    "the main loop will set up a document entry for this, and here we augment the"
    "label dictionary for easy reference later from the label list"
    "the dictionary entry has the format:"
    "{label name: current section/equation/figure reference}"
    "e.g. {'sec:fred': 'Section 4.1'"
    i=s.find(':')
    ss=s[keyEnd+1:i]                            # label type (sec, fig or eq)
    j=s.find('}')
    if not ss in pv.standardTypes:              # if not in standard types dictionary
      if self.inEqn:
        st='eq'
      else:
        st='sec'
      pv.standardTypes[ss]=pv.standardTypes[st] # then set new types-dictionary entry
                                                # & assume is a section ref
    if self.inEqn:
      Ln='('+str(self.eqnNum+1)+')'
    else:
      Ln=eval(pv.standardTypes[ss][1])
        # this gets current value of variable (e.g. of secRef) as a string
    #dicEnt=pv.standardTypes[ss][0]+Ln
        # pv.standardTypes[ss][0] is e.g. 'Section: '
    dicEnt=Ln
    pv.labelDict[s[keyEnd+1:j]]=dicEnt          # put new entry in label dictionary;
                                                # access with label name to get
                                                # e.g. "Section: 3.4.1"
    if self.inEqArr:
      self.eqnNum=self.eqnNum+1
      Ln='('+str(self.eqnNum)+')'
      pv.objDict[pv.StructP].append(pv.EqnLab)
      pv.objDict[pv.EqnLab].append(Ln)          # insert label number in object list
    if pv.standardTypes[ss][0] == 'Figure ':
      pass
    i=s.find('{')
    return '\\&'+s[i+1:]
    # doLabel

  def doFigure(self,s,keyEnd,objType):
    "fix label cross reference for figure"
    self.figNum=self.figNum+1
    if self.ChapN:
      self.figRef=self.ChapN+str(self.figNum)
    else:
      self.figRef=self.SectN+str(self.figNum)
    return None
    # doFigure

  def doArray(self,s,keyEnd,stage):
    "set state to 'in equation' if not already"
    if stage == -100:
      self.inArr=self.inArr-1
      self.inEqn=self.inEqn-1
    else:
      self.inArr=self.inArr+1
      self.inEqn=self.inEqn+1
    return None
    # doArray

  def getRef(self):
    "get location of reference object"
    i=len(pv.objDict[pv.StructP])-1     # get previous structure object index
    j=pv.objDict[pv.StructP][i] # get index of object it points to (usually text)
    k=len(pv.objDict[j])-1              # get index of citation in that object
    return j,k
    # getRef

  def doCite(self,s,keyEnd,stage):
    "store reference to location of citation for subsequent fixup"
    if stage == -100:
      L=self.getRef()                           # get location of reference
      pv.citations.append(L[0])                 # store object index
      pv.citations.append(L[1])                 # and the citation index
      if len(pv.objDict[pv.ObjP])>pv.param:             # if citation has parameter
        pv.citations.append(pv.objDict[pv.ObjP][-1])    # then append it
      else:
        pv.citations.append('')                         # else append null
    else:
      pv.param=len(pv.objDict[pv.ObjP]) # store to check if citation has parameter
    return None
    # doCite

  def doAppendix(self,s,keyEnd,objType):
    "change numbering to appendix style"
    if self.ChapNum:                            # if chapters exist
      self.chapApp=ord('A')-self.ChapNum-1      # then set chapApp to convert CapNum
    else:
      self.sectApp=ord('A')-self.SectNum-1      # else ditto for sectApp
    # doAppendix

  def doPart(self,s,keyEnd,objType):
    "insert Part number"
    self.ChapNum=0
    self.SectNum=0
    self.SubSectNum=0
    self.SubSubSectNum=0
    self.ParaNum=0
    self.SubParaNum=0
    pv.objDict[objType].append(Roman[self.PartNum]+' ')
    self.PartNum=self.PartNum+1
    pv.objDict[pv.StructP].append(objType)
    if self.isTOC:
      self.TOCItem(objType)
    i=s.find('{')
    return '\\&'+s[i+1:]
    # doPart

  def doChap(self,s,keyEnd,objType):
    "append first part of chapter object: Chapter x"
    "the second part, the name, is inserted under this and is handled by"
    "parseBody; both are treated as TChap objects"
    "what about font size?"
    self.SectNum=0
    self.SubSectNum=0
    self.SubSubSectNum=0
    self.ParaNum=0
    self.SubParaNum=0
    self.ChapNum=self.ChapNum+1
    if self.chapApp:
      self.ChapN=chr(self.ChapNum+self.chapApp)
    else:
      self.ChapN=str(self.ChapNum)
    pv.objDict[objType].append('Chapter '+self.ChapN+' ')
    pv.objDict[pv.StructP].append(objType)
    if self.isTOC:
      self.TOCItem(objType)
    self.secRef=self.ChapN
    self.ChapN=self.ChapN+'.'
    self.figNum=0               # initialise figure number rel to chapter
    self.eqnNum=0               # and equation number
    i=s.find('{')
    return '\\&'+s[i+1:]
    # doChap

  def doSect(self,s,keyEnd,objType):
    "the section number is appended to the chapter number if that exists"
    self.SubSectNum=0
    self.SubSubSectNum=0
    self.ParaNum=0
    self.SubParaNum=0
    self.SectNum=self.SectNum+1
    if self.sectApp:
      self.SectN=self.ChapN+chr(self.SectNum+self.sectApp)
    else:
      self.SectN=self.ChapN+str(self.SectNum)
    pv.objDict[objType].append(self.SectN+' ')
    pv.objDict[pv.StructP].append(objType)
    if self.isTOC:
      self.TOCItem(objType)
    self.secRef=self.SectN
    self.SectN=self.SectN+'.'
    if not self.ChapN:
      self.figNum=0             # initialise figure number rel section  in this case
      self.eqnNum=0             # and equation number
    i=s.find('{')
    return '\\&'+s[i+1:]
    # doSect

  def doSubsect(self,s,keyEnd,objType):
    "write next subsection number"
    self.SubSubSectNum=0
    self.ParaNum=0
    self.SubParaNum=0
    self.SubSectNum=self.SubSectNum+1
    self.SubSectN=self.SectN+str(self.SubSectNum)
    pv.objDict[objType].append(self.SubSectN+' ')
    pv.objDict[pv.StructP].append(objType)
    if self.isTOC:
      self.TOCItem(objType)
    self.secRef=self.SubSectN
    self.SubSectN=self.SubSectN+'.'
    i=s.find('{')
    return '\\&'+s[i+1:]
    # doSubsect

  def doSubSubsect(self,s,keyEnd,objType):
    "write next subsubsection number"
    self.ParaNum=0
    self.SubParaNum=0
    self.SubSubSectNum=self.SubSubSectNum+1
    self.SubSubSectN=self.SubSectN+str(self.SubSubSectNum)
    pv.objDict[objType].append(self.SubSubSectN+' ')
    pv.objDict[pv.StructP].append(objType)
    if self.isTOC:
      self.TOCItem(objType)
    self.secRef=self.SubSubSectN
    self.SubSubSectN=self.SubSubSectN+'.'
    i=s.find('{')
    return '\\&'+s[i+1:]
    # doSubSubsect

  def doPara(self,s,keyEnd,objType):
    "write next paragraph number"
    self.SubParaNum=0
    self.ParaNum=self.ParaNum+1
    self.ParaN=self.SubSubSectN+str(self.ParaNum)
    pv.objDict[objType].append(self.ParaN+' ')
    pv.objDict[pv.StructP].append(objType)
    self.secRef=self.ParaN
    self.ParaN=self.ParaN+'.'
    i=s.find('{')
    return '\\&'+s[i+1:]
    # doPara

  def doSubPara(self,s,keyEnd,objType):
    "write next sub-paragraph number"
    self.SubParaNum=self.SubParaNum+1
    self.SubParaN=self.ParaN+str(self.SubParaNum)
    pv.objDict[objType].append(self.SubParaN+' ')
    pv.objDict[pv.StructP].append(objType)
    self.secRef=self.SubParaN
    self.SubParaN=self.SubParaN+'.'
    i=s.find('{')
    return '\\&'+s[i+1:]
    # doSubPara
  # parseTex

def fixCite():
  "replace citations with their corresponding bibliography index numbers,"
  "pv.citations entries are in triples:"
  "  the object index (usually text) followed by the index to the citation in it,"
  "followed by an extension if there is one e.g. of a chapter number"
  i=0
  while pv.citations[i:]:
    label=pv.objDict[pv.citations[i]][pv.citations[i+1]]
    j=2
    while pv.objDict[pv.bibEP][j:]:
      if pv.objDict[pv.bibEP][j] == label:
        s=pv.citations[i+2].strip('[]')         # get e.g. possible chapter ref
        if s:                                   # if it exists
          s=', '+s                              # then format it
        pv.objDict[pv.citations[i]][pv.citations[i+1]]=' ['+str(j-1)+s+']'
        break
      j=j+1
    i=i+3
  # fixCite

def fixRefs():
  "replace cross references"
  obj=pv.findObj('Page ref')                    # find index of Page refs
  i=2
  while pv.objDict[obj][i:]:
    s=pv.labelDict[pv.objDict[obj][i]]          # look up target in dictionary
    pv.objDict[obj][i]='section '+s             # and replace ref item with section
    i=i+1
  i=2
  while pv.objDict[pv.refP][i:]:                # while i in label list
    try:
      s=pv.labelDict[pv.objDict[pv.refP][i]]    # look up target in dictionary
      pv.objDict[pv.refP][i]=s                  # and replace ref item
    except:
      pass
    i=i+1
  # fixRefs

def fixFigLabels():
  "replace Lyx figure labels with their actual values"
  i=2
  j=2
  obj=pv.findObj('Label')                       # find index of label list
  while pv.objDict[obj][i:]:
    if pv.objDict[obj][i][:3] == 'fig':
      s=pv.labelDict[pv.objDict[obj][i]]        # look up target in dictionary
      pv.objDict[obj][i]=s                      # replace Lyx label with true one
    i=i+1
  # fixFigLabels

def fixSlash():
  "convert '\\\\' to '\\newline'"
  i=2
  while pv.objDict[pv.EscP][i:]:                # while in escape char list
    if pv.objDict[pv.EscP][i] == '\\\\':
      pv.objDict[pv.EscP][i]='\\newline'
    i=i+1
  # fixSlash

def fixLyxList():
  "add spaces after terminating ] of initial item"
  "these actually occur in the List Item list"
  i=2
  while pv.objDict[pv.ItemP][i:]:               # while in 'Lyx list' list
    s=pv.objDict[pv.ItemP][i]
    j=s.rfind(']')
    if j != -1:
      pv.objDict[pv.ItemP][i]=s[:j+1]+' '+s[j+1:]
    i=i+1
  # fixLyxList

def fixTOC():
  "copy parsed headings into TOC"
  "this is done here so that the entries have been parsed"
  "uses EndObject to find end of compound item such as chapter or section heading"
  i=2                                   # index into TOOP
  ri=2                                  # replacement index
  while pv.objDict[pv.TTOCP][i:]:
    k=pv.objDict[pv.TTOCP][i]           # next document structure index
    obj=pv.objDict[pv.StructP][k]       # next object index from structure list
    j=pv.objDict[pv.TTOCP][i+1]         # index into object list
    s=''
    while pv.objDict[pv.StructP][k] != pv.EndObP:
      if pv.objDict[pv.StructP][k] == obj:      # ignore e.g. font objects
        if not s:
          n=pv.objDict[obj][j].count('.')
          s=spaces[:2*n]
        s=s+pv.objDict[obj][j]          # concatenate items in next object
        j=j+1                           # goto next object item
      k=k+1                             # goto next document structure item

    pv.objDict[pv.TTOCP][ri]=s          # write result back into TOC list
    ri=ri+1
    i=i+2
  while len(pv.objDict[pv.TTOCP])>ri:
    pv.objDict[pv.TTOCP].pop()          # remove redundant entries from TOC list
  # fixTOC

def postProcessing():
  "clean up and write results to file"
  "fixups must be done before the dictionary is saved"
  fixSlash()                    # convert '\\\\' to '\\newline'
  fixCite()                     # fix up citation references
  fixRefs()                     # fix up label cross-references
  fixFigLabels()                # fix up figure labels
  fixLyxList()                  # insert spaces after ]
  fixTOC()                      # construct Table of Contents
  makeReport()                  # make main report file
  makeResultsFile()             # make summary of results file
  makeObjListFile()             # make object menu file for access by client program
  # postProcessing

def makeObjListFile():
  "make file of object names for client to use as a menu"
  pv.objListFile=pv.reportFile[0:-1]+'m'
  ff=open(pv.objListFile,'wt')
  for i,L in pv.objDict.iteritems():
    ff.write(L[0])
    ff.write(' ('+str(len(L)-2)+' items)')
    ff.write('\n')
  ff.close()
  # makeObjListFile

def makeResultsFile():
  pv.resultsFile=pv.reportFile[0:-1]+'d'
  ff=open(pv.resultsFile,'wt')
  if pv.geometry:
    ff.write('Geometry\n')
    pickle.dump(pv.geometry,ff)         # save geometry
  ff.write('Dictionary\n')
  pv.resetIndices()                     # reset individual object indices to 2
  pickle.dump(pv.objDict,ff)            # save dictionary of object files
  ff.close()
  # makeResultsFile

def makeReport():
  i=findCharRev(document,'.')
  j=findCharRev(document,dirySep)
  if i == 0 and document[:i+1] == 0:    # if no extension exists
    i=100
  pv.reportFile=Tmp+document[j:i]+'.ftr'
  ff=open(pv.reportFile,'wt')           # flayed text document
  for i,L in pv.objDict.iteritems():
    n=len(L)-2                          # number of instances of object
    if n>0:                             # if > 0
      s='Number of '+L[0]+' items '     # 17 literal chars
      s=s.ljust(pv.Rwidth)                      # Rwidth is set on startup allowing for 17
      ff.write(s+'= '+str(n)+'\n') # writw report string & number of instances
  if len(pv.objDict[pv.UnrecP])>2:
    ff.write('Unrecognised items:-\n')
    ff.write('------------------------------------------\n')
    i=2
    j=len(pv.objDict[pv.UnrecP])
    while i<j:
      ff.write(pv.objDict[pv.UnrecP][i]+'\n')
      ff.write('------------------------------------------\n')
      i=i+1
  if pv.Error != None:
    ff.write("Aborted      : "+pv.Error+'\n')
    ff.write("Near or after: "+pv.ErrorLine+'\n')
    makeErrorFile(pv.Error+'\n')
  ff.close()
  # makeReport

def showReport(showUnrecog):
  print ''
  try:
    ff=open(pv.reportFile,'rt')
  except:
    return
  L=ff.readline()
  while len(L)>0:
    if L[:12] == 'Unrecognised' and showUnrecog == 0:
      break
    else:
      print L,
    L=ff.readline()
  ff.close()
  if pv.Error != None:
    print 'Error        :',pv.Error
    print 'Near or after: ',pv.ErrorLine
  if pv.stackDepth:
    print 'Stack depth  :',pv.stackDepth
  print ''
  # showReport

def showList(n,tag):
  "display list on terminal for object n"
  print ''
  try:
    ff=open(pv.resultsFile,'rt')
  except:
    return
  s=ff.readline()
  if s[:8] == 'Geometry':
    L=pickle.load(ff)           # skip margins
    s=ff.readline()             # object name
  L=pickle.load(ff)             # object dictionary
  prlst(L[n][2:],tag)
  ff.close()
  print ''
  # showList

def openFiles():
  "open files"
  try:
    pv.ff=open(pv.resultsFile,'rt')
  except:
    return 0
  try:
    pv.obf=open(pythonTmp,"wt")
  except:
    pv.ff.close()
    return 0
  return 1
  # openFiles

def makeObjFile(n):
  "make a file containing the contents of object number n for a client to peruse"
  if not openFiles():
    return
  pv.obf.write('Listing of: '+pv.objDict[n][0]+'\n\n')
  s=pv.ff.readline()
  if s[:8] == 'Geometry':
    L=pickle.load(pv.ff)                # skip margins
    s=pv.ff.readline()          # object name
  D=pickle.load(pv.ff)          # load objDict
  Obj=D[n]
  i=2
  j=len(Obj)
  while i<j:
    pv.obf.write(str(Obj[i]))
    pv.obf.write('\n')
    i=i+1
  pv.ff.close()
  pv.obf.close()
  # makeObjFile

def makeObjRep(n):
  "construct an object for client"
  try:
    pv.ff=open(pv.resultsFile,'rt')
  except:
    return
  try:
    pv.obf=open(pythonTmp,"wt")
  except:
    pv.ff.close()
    return
  s=pv.ff.readline()
  if s[:8] == 'Geometry':
    L=pickle.load(pv.ff)                # skip margins
    s=pv.ff.readline()          # object name
  pv.objDict=pickle.load(pv.ff)         # load objDict
  pv.resetIndices()
  i=pv.findObj('Document structure')
  n=n+2
  pv.obf.write('Listing of: '+pv.objDict[pv.objDict[i][n]][0]+'\n\n')
  i=2
  while i <= n:
    obj=pv.extractObj()
    i=i+1
  pv.obf.write(obj)
  pv.obf.close()
  # makeObjRep

def makeOPxRef(D):
  "make cross-ref dictionary to get object structure index from an object instance"
  for sp,L in D.iteritems():
    if L[0] == 'Document structure':
      break                     # when sp = index of document structure list
  RD={}
  j=len(D)
  i=0
  while i<j:
    RD[i]={}                    # initialise dictionary
    i=i+1
  i=2
  while True:
    try:
      obj=D[sp][i]              # get next object type index
    except:
      break
    if obj == sp:
      i=i+1
      continue                          # skip document structure list
    j=D[obj][1]                 # get its index pointer
    D[obj][1]=j+1               # increment list pointer
    RD[obj][j]=i-1              # and enter object structure list index
    i=i+1                       # increment document structure pointer
  return RD
  # makeOPxRef

def findText(ss):
  "make a file containing the contents of object containing text ss"
  nf=0
  if not openFiles():
    return
  pv.obf.write('Results for find: "'+ss+'"\n')
  s=pv.ff.readline()
  if s[:8] == 'Geometry':
    L=pickle.load(pv.ff)        # skip margins
    s=pv.ff.readline()          # object name
  D=pickle.load(pv.ff)          # load objDict
  opx=makeOPxRef(D)
  for n,Obj in D.iteritems():
    if n<1:
      continue
    i=2
    j=len(Obj)
    while i<j:
      f=str(Obj[i]).find(ss)
      if f != -1:
        sn=opx[n][i]
        pv.obf.write('\n------------------------------------------------------\n')
        pv.obf.write('NEXT RESULT is in '+Obj[0]+'\n')
        pv.obf.write('Structure index = '+str(sn)+'\n\n')
        pv.obf.write(str(Obj[i][f:]))
        nf=nf+1
      i=i+1
  if nf:
    pv.obf.write('\n------------------------------------------------------\n')
    pv.obf.write('Number found = ')
    pv.obf.write(str(nf)+'\n')
  pv.ff.close()
  pv.obf.close()
  # findText

def makeSummary():
  "make a file listing the document summary for a client to peruse"
  if not openFiles():
    return
  s=pv.ff.readline()
  if s[:8] == 'Geometry':
    L=pickle.load(pv.ff)                # skip margns
    s=pv.ff.readline()          # object name
  D=pickle.load(pv.ff)          # load objDict
  pv.obf.write('Structure of file '+pv.resultsFile[-4]+'\n\n')
  obj=D[pv.StructP]             # document structure
  i=2
  j=len(obj)-1
  ks=2
  ke=2
  while i<j:
    pv.obf.write(D[obj[i]][0])
    if D[obj[i]][0] == 'StartObject':
      s=' : '+D[obj[i]][ks]
      ks=ks+1
      pv.obf.write(s)
    elif D[obj[i]][0] == 'EndObject':
      s=' : '+D[obj[i]][ke]
      ke=ke+1
      pv.obf.write(s)
    pv.obf.write('\n')
    i=i+1
  pv.obf.close()
  pv.ff.close()
  # makeSummary

def makeDics():
  "make Start and End Vanilla and Template dictionaries"
  "allows objDict to be edited freely"
  "these may not be needed, but are useful for debugging with ConvVanilla.py"
  s=dicPath
  names=[s+'StartVanilla',s+'StartTemplate',s+'StartLatex',s+'EndVanilla',
         s+'EndTemplate',s+'EndLatex']
  files=[]
  for s in names:
    try:
      pv.ff=open(s,'wt')
      if 'Latex' in s:
        i=1
      else:
        i=0
      files.append([pv.ff,i])
    except:
      print 'Could not create:',s
      for i in files:
        i[0].close()
      return
  names=[]
  for x in pt.mainTable:
    L=pt.mainTable[x]
    if L[2] != 0:               # if object related to start and end objects
      if x[0] != '\\':
        s='\\begin{'+x+'}'      # this is for the Latex dictionaries
      else:
        s=x
      names.append([pv.objDict[L[1]][0],s])
  names.sort()
  for s in names:
    for i in files:
      i[0].write(s[0])
      if i[1]:                          # if is Latex dictionary
        i[0].write('\n'+s[1]+'\n')      # then write conversion
      else:
        i[0].write('\n\n')              # else convert to nil
  for i in files:
    i[0].close()
  print 'ok'
  # makeDics

# MAIN PROGRAM --------------------------------------------------------------------

args = getopt.getopt(sys.argv[1:],'dummy')      # get run-time arguments

homeDir=''

#  read in presets (file paths)

i=0
try:
  action=eval(sys.argv[2])
  i=2                                   #if client console call
except:
  try:
    document=sys.argv[1]
    i=1
  except:
    pass                                #or direct call (for GUI support)

if i != 1:
  try:
    pv.ff=open('ConvTexConfig')                 #then open presets
  except:
    s='\'ConvTexConfig\' configuration file missing\n'
    s=s+'Please run Config.py to create it'
    textGUI(s)
    sys.exit(0)
else:
  pv.ff=open(Presets)                           # else open development presets file

homeDir=pv.ff.readline().strip()                # home or base directory
docPath=pv.ff.readline().strip()                # documents path
pv.ff.close()                                   # skip the rest

#  read in paths to suit platform

if i == 2:
  action=eval(sys.argv[2])                      # if client (development) program call
  pv.ff=open(Paths)                             # then open those presets
else:
  try:
    pv.ff=open('ConvTexPaths')                  # else open presets file in current dir
  except:
    s='\'ConvTexPaths\' configuration file missing\n'
    s=s+'Please run Config.py to create it'
    textGUI(s)
    sys.exit(0)

Tmp=pv.ff.readline().strip()                    # temp path
ErrPath=pv.ff.readline().strip()                # error file path
pv.ff.readline().strip()                        # discard officeXML path
pythonTmp=pv.ff.readline().strip()              # path for development
dicPath=homeDir+pv.ff.readline().strip()        # path for dictionaries
dirySep=pv.ff.readline().strip()                # directory path separator
pv.ff.close()

if i == 0:                                      # if direct call, no args
  document=diryGUI(docPath)                     # get file path and name
  if document == None:
      sys.exit()                                # abort if not given

#----------------------------------------------------------------------------------

if i == 2:
  action=eval(sys.argv[2])                      # for client (development) program call
elif i == 0:
  action = 0                                    # parse document using GUI-given path
else:                                           # else call from console
  if document[:8] == 'makeDics':
    action = -2                                 # make dictionaries
  else:
    action = -1                                 # or assume is terminal call
    if document[0] == '~':
      document=homeDir+document+'.tex'
    elif document[0] == dirySep:
      document=document+'.tex'
    else:
      document=docPath+document+'.tex'          # add path to document name

""" -----------------------------------------------------------------------------
Now follows the execution of the program with the options:-
  If called by a client program then we expect
        argv[1] = document file path and name
        argv[2] = action code,which may be
                0 client call to parse a document
                  the results are stored in the following files in /tmp/
                  (d is document name)
                    d.ftd       pickled file of results
                                contains format presets, main dictionary (objDict),
                                the font size and style dictionaries, the general
                                format dictionary and the label dictionary
                    d.ftr       text file containing report of results
                    d.ftm       text list of all object names, for client menu
                    various dictionaries
                1 client call to extract contents of one object list from d.ftd
                2 client call to construct summary of structure
                3 client request to find text
        argv[3] = depends on argv[2]:-
                if 0 then absent
                if 1 then is an index number for selecting an object list (1)
                     or number of words in find text cue (3)
  If called from the terminal for testing
        argv[1] = file name (no path)        or may be absent for default file
        argv[2] is absent which indicates that it is a terminal call
                the action code is set to -1 or -2
----------------------------------------------------------------------------------
"""

makeErrorFile('Syntax or execution error')      # contingency error file
pv.syntaxErr=1                                  # for syntax or exec error

i=document.rfind(dirySep)
pv.resultsFile=Tmp+document[i:-3]+'ftd'

if action == -1 or action == 0: # parse document
  pt = parseTex()
  pt.parseDoc(document)
  postProcessing()
elif action == -2:              # direct call to make start and end dictionaries
  pt = parseTex()
  makeDics()
elif action == 1:               # external request for object info
  n=eval(sys.argv[3])
  makeObjFile(n)
elif action == 2:               # external request for document structure summary
  makeSummary()
elif action == 3:               # external request to find text
  ss=''
  n=eval(sys.argv[3])           # number of words in cue
  i=0
  while i<n:
    ss=ss+' '+sys.argv[i+4]     # allows multi-word search cue
    i=i+1
  ss=ss[1:]
  findText(ss)
elif action == 4:               # info to display one object item
  n=eval(sys.argv[3])
  makeObjRep(n)                 # typically called to expand structure summary item

#---------------------------------------------------------------------------------

if action == -1:
  showReport(0)
  #showList(41,'#')             # arg is object index from objDict 9 41

if pv.syntaxErr == 1:                           # if no errors occurred
  removeErrFile()                               # then remove error file
  i=document.rfind(dirySep)
  if dirySep == '/':
    popen2('./ConvOffice.py '+document[i+1:-4])
  else:
    popen2('ConvOffice.py '+document[i+1:-4])
else:
  pv.ff=open(ErrPath)
  s=pv.ff.read()
  pv.ff.close()
  textGUI(s)
