#! /usr/bin/env python
"""
Pyxie
An Open Source XML processing library for Python

                         The Pyxie Project
                        http://www.pyxie.org

Disclaimer

THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL PROPYLON OR ITS CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	
http://www.pyxie.org

XML Processing with Python
Prentice Hall

Sean Mc Grath

Change Log:
Version 1.08 - 27 Aug 2001  (Derek Higgins)
  Changed the functions WalkElements and WalkData to call the appropriate function 
  upon the end of each element once and only once. 
  (Thanks to Rob van Wees for the bug report)

Version 1.07 - 2 Aug 2001  (Derek Higgins)
  Change to escape backslashes in the content properly 
  (Thanks to John Cowan for pointing out the problem)

Version 1.06 - 8 Feb 2001
 Support for Python 2.0 added. In Python 2.0, the pyexpat module has been renamed
 to expat. Pyxie now tries to import pyxepat, and if that failed, imports
 expat. Should now would with Python 1.5.x and 2.0
 (Thanks Tim (and some others whose name I forget, sorry.))
 
Version 1.05 - 24 October 2000
 Changed StartElementHandler to cope with the more recent versions
 of pyexpat that use a dictionary rather than a list to represent
 attributes (thanks John)

Version 1.01 - 16 March 2000
 xDispatch.Dispatch bug fixed (processing instructions) (Thanks Chris)
 xTree.PasteDown bug fixed (Thanks Noel)

Version 1.02 -  7 April 2000
Added Envelope function based on a feature request from Stuart
Hungerford. (Thanks Stuart).

Version 1.03 - 1 August 2000
Fixed bug with PIs (Thanks John)

Version 1.04 - 3 August 2000
Fixed bug in and improved structure of NWS (Thanks John)

Introduction
------------
The Pyxie library provides facilities for processing XML. The library
uses a simple notation to capture the information generated by XML
parsers known as PYX.

PYX is a line oriented notation in which the first character serves
to specify what type of parsing event the line represents:

--------------------------
First         Parsing
Character     Event
--------------------------
(              Start-tag
A              Attribute
)              End-tag
-              Data
?              Processing
               Instruction
--------------------------

Line ends and tabs occuring in data or attribute lines are escaped
to "\" followed by "n" and "\" followed by "t" respectively. 

Any process that generates information in PYX can be
used as a data source for this library - relational databases,
HTML parsers, SGML parsers, XML parsers, latex parsers... whatever.

Facilities provided include:
	Tree-driven XML Processing (see xTree and related classes)
	Event-driven XML Processing (see the xDispatch class)
	Event-driven XML Processing with full Tree access
	(see the Dispatch method of xTree)
	Sparse Trees - (see demo in test harness)
	A SAX to PYX driver to generate PYX from any SAX parser
	An SGML-like white space normalization function
	A Pyxie Exception class
	PYX encoder and decoder functions for handling escaped
	line ends/tabs
	
Tree driven XML Processing
---------------------------
The xTree class provides:
	Navigational methods for moving current position around
	a tree structure
	Cut and Paste facilities
	Serialization to XML via repr
	Node list assembly methods such as Ancestors, Descendants etc.
	Tree walking with call-backs to methods named after element
	type names
	A "Pythonic" tree walking facility using a simple Python for loop
	An event dispatch facility (Dispatch) which will can call handler
	methods in arbitrary Python classes
	

Event-driven XML Processing
---------------------------
The xDispatch class provides:
	Ancestor information available in a simple list structure
	Callbacks to methods named after element type names
	e.g. start_foo, end_foo
	Default method handlers default_start and default_end
	Callback for data content (the characters method)
	Supports sparse tree building by allowing dispatched events
	to be pushed back onto the PYX stream (see demo in test harness)

The xDispatchMultiplexor class provides the ability to have
multiple event-driven "clients" processing a PYX event stream
in parallel.

See also the Pyxie project home page at http://www.pyxie.org.

There is a Pyxie mailing list. For more information, send an e-mail
with just the word 'help' as subject or body to:
    pyxie-request@starship.python.net

"""
__version__ = "1.08"

import string,types

# Import c implementation of StringIO if available
try:
	import cStringIO
	StringIO = cStringIO
except ImportError:
	import StringIO

from exceptions import Exception

# An exception base class for Pyxie
# ---------------------------------
class PyxieException (Exception):
	def __init__(self,s=""):
		self.problem = s

	def __str__(self):
		return self.problem
	
# An abstract base class for Pyxie nodes 
# --------------------------------------
class xNode:
	"""
	xNode: All nodes in a Pyxie tree are derived from the xNode
	abstract base class.
	All nodes can be connected to other nodes, up, down, left
	and right.
	"""
	def __init__(self):
		self.Up = self.Down = self.Left = self.Right = None

	
# A node representation of an XML element
# ---------------------------------------
class xElement (xNode):
	"""
	xElement: A node representation of an XML element
	consisting of elment type name and attribute
	information
	"""
	def __init__(self,ElementTypeName):
 		xNode.__init__(self)
		# An xElement must have an Element type name (tag name)
		self.ElementTypeName = ElementTypeName
		# An xElement can have any number of attributes
		# storage is of the form key/value in a Dictionary
		self.AttributeValues = {}
		
	def __str__(self):
		"""
		Return a meaningful representation of an xElement
		"""
		return ("xElement: "
			"Element Type Name='%s'. Attributes='%s'" % (
				self.ElementTypeName,
				`self.AttributeValues.items()`))
	def __repr__(self):
		"""
		Return an XML serialization of an xElement
		"""
		if self.ElementTypeName == "?pi":
			# Processing instructions are stored
			# internally as a pseudo-element
			res = "<?%s %s?>" % (
				self.AttributeValues["target"],
				self.AttributeValues["data"])
			return res
		if len(self.AttributeValues) == 0:
			# No attributes so make ">" character
			# flush with element type name
			res = '<%s>' % self.ElementTypeName
		else:
			# Emit start of start tag
			res = '<%s' % self.ElementTypeName
			for (aName,aValue) in self.AttributeValues.items():
				# Emit attribute name/value pairs.
				# Double quotes always used with internal double
				# quotes escaped
				aValue = string.replace(aValue,'"',"&quot;")
				res = res + ' %s = "%s"' % (aName,aValue)
			# tack on the ">" to terminate the start-tag
			res = res + ">"
		# Need to process all children before emitting end-tag
		# This is a recursive process
		# First, establish a list of all children
		children = []
		pos = self.Down
		if pos:
			children.append(pos)
			while pos.Right:
				pos = pos.Right
				children.append(pos)

		# Iterate the list of children, invoking the
		#  __repr__ method on each one
		for c in children:
			res = res + `c`
		res = res + '</%s>' % self.ElementTypeName
		return res

# A node representation of XML character data
# --------------------------------------------
class xData (xNode):
	"""
	xData: a node representation of XML data content
	"""
	def __init__(self,str):
		xNode.__init__(self)
		self.Data = str

	def __str__(self):
		"""
		Return a meaningful string representation of an xData object.
		Returns the first 10 characters
		"""
		return "xData (%s...)" % self.Data[:10]
	
	def __repr__(self):
		"""
		Return the XML representation of an xData node.
		The XML represetation of an xData node is simply the
		data content of the node
		"""
		return self.Data

	def __setslice__(self,i,j,s):
		"""
		Convenience slicing method to allow assignment to a slice
		of the data content of an xData node
		"""
		l = list(self.Data)
		l[i:j] = list(s)
		self.Data = string.join(l,"")

# xTree: A representation of an XML tree structure
# ------------------------------------------------
class xTree:
	"""
	xTree : a Python representation on an XML document
	as a hierarchical data structure made up of
	interconnected nodes
	"""
	def __init__(self,rootnode=None):
		"""
		Construct an xTree. A root node (xElement) can be provided
		if desired. (This is especially useful when doing sparse tree
		building.
		"""
		if rootnode:
			assert isinstance(rootnode,xNode)
			self.RootNode = rootnode
		else:
			self.RootNode = None
		self.CurPos = self.RootNode
		# Position stack supporting the PushPos
		# and PopPos functionality
		self.__PushStack = []

	def __getitem__(self,n):
		"""
		Allow a tree to be iterated using Python's for loop
		"""
		if n == 0:
			# Start of iteration, return root node
			self.CurPos = self.RootNode
			return self.CurPos
		# We have been around the for loop at least
		# once, so return the "next" node. This is
		# the next node in a downward direction
		# or the next node in an easterly direction
		if self.CurPos.Down:
			self.CurPos = self.CurPos.Down
			return self.CurPos
		elif self.CurPos.Right:
			self.CurPos = self.CurPos.Right
			return self.CurPos
		else:
			while self.CurPos.Up and (not self.CurPos.Right):
				# backtrack
				self.CurPos = self.CurPos.Up
			if self.CurPos.Right:
				self.CurPos = self.CurPos.Right
				return self.CurPos
			else:
				# returning an IndexError terminates the for loop
				raise IndexError("No more nodes in xTree")
		
	def __getattr__(self,n):
		"""
		Allow attributes of the current xElement or xData node to
		be accessed as attributes of the xTree object. Particularly
		useful for ElementTypeName and Data attributes. i.e.
		Instead of saying:
		  "tree.CurPos.Data" can simply say "tree.Data"
		Instead of saying:
		  "tree.CurPos.ElementTypeName" can simply say
		  "tree.ElementTypeName"
		"""
		if hasattr(self.CurPos,n):
			return getattr(self.CurPos,n)
		else:
			raise PyxieException (
			"No attribute '%s' on xTree or current xNode" % n)
		
	def __del__(self):
		"""
		When an xTree is garbage collected we need
		to break the circular
		references joining the xNode objects together.
		"""
		self.ZapTree()

	def ZapTree(self):
		"""
		Delete an xTree object completely by deleting the xNode objects
		attached to it. The xNodes are joined in a circular fashion and
		so the links need to be broken to allow Python's reference
		counting garbage collector to process them
		"""
		L = []
		# Create a list of all descendants of the root node
		self.Descendants1(L,self.RootNode)
		for n in L:
			# Iterate the list, breaking all links
			n.Up = n.Down = n.Left = n.Right = None

	def PushPos(self):
		"""
		Push the current position onto a position stack for later
		retrieval via the PopPos method.
		"""
		self.__PushStack.append(self.CurPos)

	def PopPos(self):
		"""
		Pop a position from the position stack and make it
		the current position
		"""
		self.CurPos = self.__PushStack[-1]
		del self.__PushStack[-1]

	def AtElement(self,etn=None):
		"""
		Predicate method. Is current position an Element?
		The optional argument allows the method to check
		for a particular element type name
		"""
		if not isinstance (self.CurPos,xElement):
			return 0
		if etn==None:
				return 1
		else:
			if self.ElementTypeName==etn:
				return 1
		return 0

	def AtData(self):
		"""
		Predicate method. Is current position data?
		"""
		return isinstance (self.CurPos,xData)
	
	def Home (self):
		"""
		Set current position to root node
		"""
		self.CurPos = self.RootNode
		return self

	def Seek (self,Node):
		"""
		Set current position to the specified node
		"""
		self.CurPos = Node

	def Down(self):
		"""
		Set current position to first child of current node
		"""
		self.CurPos = self.CurPos.Down

	def HasDown(self):
		"""
		return true if current position has a child
		"""
		if self.CurPos.Down:
			return 1
		return 0
	
	def Up(self):
		"""
		Set current position to parent of current node
		"""
		self.CurPos = self.CurPos.Up

	def GetUp(self):
		"""
		Return parent of current node
		"""
		return self.CurPos.Up

	def HasUp(self):
		"""
		return true if current position has a parent
		"""
		if self.CurPos.Up:
			return 1
		return 0
	
	def Right(self):
		"""
		Set current position to first sibling of current node
		"""
		self.CurPos = self.CurPos.Right

	def HasRight(self):
		"""
		return true if current position has a right sibling
		"""
		if self.CurPos.Right:
			return 1
		return 0

	def Left(self):
		"""
		Set current position to previous  sibling of current node
		"""
		self.CurPos = self.CurPos.Left

	def HasLeft(self):
		"""
		return true if current position has left sibling
		"""
		if self.CurPos.Left:
			return 1
		return 0
		
	def Walk(self,func):
		"""
		Walk the descendants of the current position, calling
		the specified function twice for each node. Once
		"on the way down" and once "on the way up"
		"""
		func(self,1)
		self.PushPos()
		for c in self.Children():
			self.Seek(c)
			# recurse
			self.Walk(func)
		self.PopPos()
		func(self,0)

	def WalkData(self,func):
		"""
		Walk the data descendants of the current position, calling
		the specified function twice for each data node. Once
		"on the way down" and once "on the way up"
		"""
		if self.AtData():
			func(self,1)
		for c in self.Children():
			self.PushPos()
			self.Seek(c)
			self.WalkData(func)
			self.PopPos()
		if self.AtData():
			func(self,0)
		
	def WalkElements(self,func):
		"""
		Walk the element descendants of the current position, calling
		the specified function twice for each element node. Once
		"on the way down" and once "on the way up"
		"""
		if self.AtElement():
			func(self,1)
		for c in self.Children():
			self.PushPos()
			self.Seek(c)
			self.WalkElements(func)
			self.PopPos()
		if self.AtElement():
			func(self,0)

	def Dispatch(self,obj):
		self.PushPos()
		if self.AtElement():
			etn = self.ElementTypeName
			if hasattr(obj,"handle_%s" % etn):
				getattr(obj,"handle_%s" % etn)(1)
			elif hasattr(obj,"default_handler"):
				obj.default_handler(1)
			self.PushPos()
			for c in self.Children():
				self.Seek(c)
				self.Dispatch(obj)
			self.PopPos()
			if hasattr(obj,"handle_%s" % etn):
				getattr(obj,"handle_%s" % etn)(0)
			elif hasattr(obj,"default_handler"):
				obj.default_handler(0)
		else:
			if hasattr(obj,"characters"):
				getattr(obj,"characters")(1)
				getattr(obj,"characters")(0)
		self.PopPos()
		
	def PYX2xTree(self,f):
		"""
		Build an xTree from a PYX source.
		"""
		if self.RootNode:
			self.ZapTree()
		# create a temporary root node - will be zapped after
		# the build
		self.RootNode = xElement("!TEMP")
			
		self.CurPos = self.RootNode
		# Start off, pasting nodes in a downward direction
		PasteDown = 1
		while 1:
			L = f.readline()[:-1]
			if L=="":
				raise IOError("PYX stream terminated prematurely")
			if L[0] == '(':
				# Start-tag, create an element node
				etn = L[1:]
				element = xElement(etn)
				# Attach the new node to the tree
				if PasteDown:
					self.CurPos.Down = element
					element.Up = self.CurPos
					self.CurPos = element
				else:
					self.CurPos.Right = element
					element.Left = self.CurPos
					element.Up = self.CurPos.Up
					self.CurPos = element
					PasteDown = 1
					
			elif L[0] == ')':
				# End-tag, next node will be pasted right rather
				# than down.
				if not PasteDown:
					self.CurPos = self.CurPos.Up
				PasteDown = 0
				if self.CurPos == self.RootNode.Down:
					# Back to root? if so, finished
					break
				
			elif L[0] == '-':
				# Character data, create an xData node with the
				# decoded data
				datum = xData(PYXDecoder(L[1:]))
				if PasteDown:
					self.CurPos.Down = datum
					datum.Up = self.CurPos
					self.CurPos = datum
					PasteDown = 0
				else:
					self.CurPos.Right = datum
					datum.Left = self.CurPos
					datum.Up = self.CurPos.Up
					self.CurPos = datum
					
			elif L[0] == 'A':
				# An attribute. Up to the first
				# space is the attribute name
				# the rest is the attribute value
				i = string.index (L," ")
				aName = L[1:i]
				aValue = L[i+1:]
				self.CurPos.AttributeValues[aName] = PYXDecoder(aValue)

			elif L[0] == '?':
				# A processing instruction. These are stored in
				# the tree as "?pi" pseudo-elements with
				# two attributes called "target" and "data"
				# Up to first space is the PI target, rest is the
				# PI data
				i = string.index (L," ")
				target = L[1:i]
				data = L[i+1:]
				element = xElement("?pi")
				element.AttributeValues["target"] = target
				element.AttributeValues["data"] = data
				if PasteDown:
					self.CurPos.Down = element
					element.Up = self.CurPos
					self.CurPos = element
					PasteDown = 0
				else:
					self.CurPos.Right = element
					element.Left = self.CurPos
					element.Up = self.CurPos.Up
					self.CurPos = element

		# Get rid of temporary root node
		temp = self.RootNode.Down
		self.RootNode.Down = None
		temp.Up = None
		self.RootNode = temp
		# After loading from PYX source, root node is current position
		self.CurPos = self.RootNode
		return self

	def Cut(self):
		"""
		Cut out tree rooted at current position and return it
		as a new tree
		New Current Position is set to parent of current node
		"""
		if self.CurPos.Up is None:
			return self
		l = self.CurPos.Left
		r = self.CurPos.Right
		if r:
			r.Left = l
		if l:
			l.Right = r
		self.CurPos.Right = self.CurPos.Left = None
		tree = xTree()
		tree.CurPos = tree.RootNode = self.CurPos
		t = self.CurPos.Up
		self.CurPos.Up = None
		if l is None:
			t.Down = r
		# New Current Position always becomes parent of current node
		self.CurPos = t
		return tree

	def PasteDown(self,l):
		"""
		Paste the specified tree into this tree as first child
		of current position
		"""
		assert isinstance (l,xTree)
		b = self.CurPos.Down
		self.CurPos.Down = l.RootNode
		if b:
			b.Left = l.RootNode
		l.RootNode.Up = self.CurPos
		l.RootNode.Right = b
		l.RootNode.Left = None
		l.CurPos = l.RootNode = None
		
	def PasteRight(self,l):
		"""
		Paste the specified tree into this tree as next sibling
		of current position
		"""
		assert isinstance (l,xTree)
		e = self.CurPos.Right
		self.CurPos.Right = l.RootNode
		l.RootNode.Left = self.CurPos
		l.RootNode.Up = self.CurPos.Up
		if e:
			e.Left = l.RootNode
		l.RootNode.Right = e
		l.CurPos = l.RootNode = None

	def __repr__(self):
		"""
		Return xml serialization of an xTree
		"""
		return '<?xml version="1.0"?>\n' + `self.CurPos`


	def Descendants1(self,res,n):
		"""
		Add descendants of node "n" to theresult list "res".
		This is an internal recursive method invoked from
		the Descendants method
		"""
		if n is None:
			return
		pos = n.Down
		if pos is None:
			return
		while pos:
			res.append (pos)
			self.Descendants1(res,pos)
			pos = pos.Right

	def Descendants(self,n=None):
		"""
		Create a list of the descendants of the current node
		or the specified node. Most of the work is done by
		the recursive Descendants1 method
		"""
		self.PushPos()
		if n==None:
			n = self.CurPos
		res = []
		self.Descendants1(res,n)
		self.PopPos()
		return res

	def JoinData(self,sep,n=None):
		"""
		Create a string by concatenating the data content of
		an element node. A seperator string will be spliced
		between adjacent data items
		"""
		res = []
		if n == None:
			n = self.CurPos
		D = self.Descendants(n)
		for i in D:
			if isinstance(i,xData):
				res.append(i.Data)
		return string.join(res,sep)
			
	def Children (self,n=None):
		"""
		Create a list of the children of the current node
		or the specified node. Most of the work is done by
		the recursive Children1 method.
		"""
		self.PushPos()
		if n is None:
			n = self.CurPos
		res = []
		self.Children1(res,n)
		self.PopPos()
		return res

	def Children1 (self,res,n):
		"""
		Create a list of the children of node "n" adding the child
		nodes to the result list "res"
		"""
		pos = n.Down
		if not pos:
			return res
		res.append (pos)
		while pos.Right:
			pos = pos.Right
			res.append (pos)
		return res

	def Ancestors1(self,res,n):
		"""
		Create a list of the ancestors of node "n" adding the child
		nodes to the result list "res"
		"""
		while n.Up:
			n = n.Up
			res.append (n)

	def Ancestors(self,n=None):
		"""
		Create a list of the Ancestors of the current node
		or the specified node.
		"""
		self.PushPos()
		res = []
		if n is None:
			n = self.CurPos
		while n.Up:
			n = n.Up
			res.append (n)
		self.PopPos()
		return res


# xDispatch: A class for event-driven XML processing
# ---------------------------------------------------
class xDispatch:
	"""
	xDispatch: a Class supporting event-driven XML processing
	via callback methods

	start_foo    : start of element foo
	end_foo      : end of element foo
	characters   : character data
	default_start: start of element with no specified handler
	default_end  : end of element with no specified handler
	processinginstruction: processing instruction
	
	Keeps track of ancestors and their descendants in the
	Ancestors instance method.

	Allows PYX events to be pushed back onto the
	stream of events (used to support sparse tree building)

	Can act as a data source for PYX2xTree.
	"""
	def __init__(self,fo=None):
		self.Ancestors = []
		self.PYXSource = fo
		self.PushedEvents = []

	def Sanitize(self,s):
		"""
		Replace periods with underscores so that an element called
		x.y will have  handler methods called start_x_y and end_x_y
		"""
		s = string.replace(s,".","_")
		return s

	def PushElement(self,etn,attrs):
		"""
		Given an element type name and an attribute dictionary
		xElement, push the PYX events necessary to create
		it onto a stack of events. This is used to support
		sparse tree building
		"""
		avs = attrs.items()
		avs.sort()
		avs.reverse()
		for (a,v) in avs:
			self.PushedEvents.append ("A%s %s\n" % (a,v))
		self.PushedEvents.append ("(%s\n" % etn)
		self.Ancestors.pop()
			
	def readline(self):
		"""
		Return the next line of PYX. Any PYX pushed via
		previous PushElement() calls take precedence.
		"""
		if self.PushedEvents:
			# At least 1 pushed event exists
			Line = self.PushedEvents.pop()
			return Line
		# No pushed events
		Line = self.PYXSource.readline()
		return Line


	def Dispatch(self,fo=None):
		"""
		Process a PYX source calling any callback methods
		defined in this class
		"""
		if fo is not None:
			self.PYXSource = fo
		L = self.readline()[:-1]
		while 1:
			if L=="":
				raise IOError("PYX stream terminated prematurely")
			if L[0] == '(':
				etn = L[1:]
				attrs = {}
				# Accumulate attributes for this element
				L = self.readline()[:-1]
				while L[0] == "A":
					i = string.index (L," ")
					aName = L[1:i]
					aValue = L[i+1:]
					attrs[aName] = aValue
					L = self.readline()[:-1]
				# Push the event after the start-tag+attributes
				# back on the event stream. This is important
				# because the handler we are about to call
				# may rely on everthing being on the event
				# stream - e.g. for sparse tree building
				self.PushedEvents.append (L+"\n")
				StartMethod = "start_%s" % self.Sanitize(etn)
				# Does a handler exist for this element?
				if hasattr(self,StartMethod):
					getattr(self,StartMethod)(etn,attrs)
				elif hasattr(self,"default_start"):
					# call default start-tag handler
					self.default_start(etn,attrs)
				# Add current element to the list of open elements
				self.Ancestors.append((etn,attrs.copy()))
			elif L[0] == ')':
				etn = L[1:]
				EndMethod = "end_%s" % self.Sanitize(etn)
				# take most recently opened element off the list
				# of open elements
				self.Ancestors.pop()
				# Does the element type have an end-tag handler?
				if hasattr(self,EndMethod):
					getattr(self,EndMethod)(etn)
				elif hasattr(self,"default_end"):
					# call the default end-tag handler
					self.default_end(etn)
				if len(self.Ancestors)==0:
					# Stop dispatching once end-tag for root
					# element is encountered
					return
			elif L[0] == '-':
				# Call character data handler if it exists
				if hasattr(self,"characters"):
					self.characters(L[1:])
			elif L[0] == '?':
				# Call processing instruction handler if it
				# exists
				if hasattr(self,"processinginstruction"):
					i = string.index (L," ")
					target = L[1:i]
					data = L[i+1:]
					self.processinginstruction(target,data)
			else:
				raise PyxieException (
					"Unknown PYX event '%s'" % L[0])
			L = self.readline()[:-1]


# xDispatchMultiplexor: A class for parallel dispatch of XML events
# -----------------------------------------------------------------
class xDispatchMultiplexor(xDispatch):
	def __init__(self,fo=None):
		xDispatch.__init__(self,fo)
		# Storage for the list of objects that wish to recieve
		# event notification
		self.Sinks=[]

	def RegisterSink (self,object):
		"""
		Register a sink with the Multiplexor
		"""
		self.Sinks.append(object)
		
	def default_start(self,etn,attrs):
		"""
		For each registered sink, see if it has a start handler
		specifically for this element type. Failing that, see
		if it has a default start handler
		"""
		for s in self.Sinks:
			if hasattr(s,"start_%s" % etn):
				getattr(s,"start_%s" % etn)(etn,attrs)
			elif hasattr(s,"default_start"):
				getattr(s,"default_start")(etn,attrs)

	def default_end(self,etn):
		"""
		For each registered sink, see if it has an end handler
		specifically for this element type. Failing that, see
		if it has a default end handler
		"""
		for s in self.Sinks:
			if hasattr(s,"end_%s" % etn):
				getattr(s,"end_%s" % etn)(etn)
			elif hasattr(s,"default_end"):
				getattr(s,"default_end")(etn)

	def characters(self,data):
		"""
		For each registered sink, see if it has a character
		handler.
		"""
		for s in self.Sinks:
			if hasattr(s,"characters"):
				getattr(s,"characters")(data)

	def processinginstruction(self,target,data):
		"""
		For each registered sink, see if it has a processing
		instruction handler.
		"""
		for s in self.Sinks:
			if hasattr(s,"processinginstruction"):
				getattr(s,"processinginstruction")(data)

# End of xDispatchMultiplexor Class

def Elements(nodelist,elist=[]):
	"""
	Filter a node list to xElement nodes
	"""
	res = []
	for n in nodelist:
		if isinstance(n,xElement):
			if elist==[] or (n.ElementTypeName in elist):
				res.append (n)
	return res

def ElementTypeNames(nodelist):
	"""
	Filter a node list to the names of its xElement nodes
	"""
	res = []
	for n in nodelist:
		if isinstance(n,xElement):
			res.append (n.ElementTypeName)
	return res

def DataNodes(nodelist):
	"""
	Filter a node list to the character data nodes
	"""
	res = []
	for n in nodelist:
		if isinstance(n,xData):
			res.append (n)
	return res


def NormalizeWhiteSpaceSMG(t):
	"""
	Normalize white space SMG (Sean McGrath style)
	
	A SGML-ish white space processing algorithm for xTree objects.
	
	- A line end immediately after a start-tag is ignored
	- A line end immediately before an end-tag is ignored
	- all other line ends are treated as spaces
	- No white space processing performed anywhere in an element
	- where xml:space=="preserve" anywhere in the ancestry of
	- the element
	"""
	global PreserveWhiteSpace
	PreserveWhiteSpace = 0
	
	def NWS(t,StartOrEnd):
		global PreserveWhiteSpace
		if t.AtElement():
			if t.AttributeValues.get("xml:space") == "preserve":
				# on way down tree
				if StartOrEnd:
					PreserveWhiteSpace = PreserveWhiteSpace + 1
				# on way up tree
				else:
					PreserveWhiteSpace = PreserveWhiteSpace - 1

		elif t.AtData():
			# maybe nothing to do
			if PreserveWhiteSpace > 0 or StartOrEnd == 0:
				return

			# is xData node first child, or first one after an xElement sibling?
			if (t.CurPos.Left == None or
				t.CurPos.Left.__class__.__name__ == 'xElement'):
				# get rid of leading NL, if any
				if t.CurPos.Data[0] == "\n":
					t.CurPos.Data = t.CurPos.Data[1:]

			# nothing more to do if no data left
			if not t.CurPos.Data: return

			# is xData node last child, or last one before an xElement sibling?
			if (t.CurPos.Right == None or
				t.CurPos.Right.__class__.__name__ == 'xElement'):
				# get rid of trailing NL, if any
				if t.CurPos.Data[-1] == "\n":
					t.CurPos.Data = t.CurPos.Data[:-1]

			# nothing more to do if no data left
			if not t.CurPos.Data: return

			# change all NLs to SPACEs
			t.CurPos.Data = string.replace(t.CurPos.Data,"\n"," ")

	t.PushPos()
	t.Home()
	t.Walk (NWS)
	t.Home()
	# Traverse tree for empty data nodes and remove them
	for n in t:
		t.Seek(n)
		if t.AtData() and t.Data == "":
			t.Cut()
	t.PopPos()
	return t

def PYXEncoder(s):
	"""
	Replace any tab or newline characters with escaped forms
	"""
	s = string.replace(s,"\\","\\\\")
	s = string.replace(s,"\n","\\n")
	s = string.replace(s,"\t","\\t")
	return s

def PYXDecoder(s):
	"""
	Replace any escaped tab or newline characters with literlal tabs
	and newlines
	"""
	s = string.replace(s,"\\n","\n")
	s = string.replace(s,"\\t","\t")
	s = string.replace(s,"\\\n","\\n")
	s = string.replace(s,"\\\t","\\t")

	s = string.replace(s,"\\\\","\\")

	return s

def PYX2xTree(f):
	"""
	Build an xTree from a file-like object.
	Input in PYX format
	returns xTree
	Optionally, root the new tree at a specified root node
	"""
	return xTree().PYX2xTree(f)

def String2xTree (str):
	"""
	Create an xTree from an XML instance provided in a string
	Uses PyExpat as the XML parser
	"""
	return PYX2xTree (PYExpat2PYX(StringIO.StringIO(str)))

def String2PYX(str):
	"""
	Return a PYX source from an XML instance provided in a string
	Uses PyExpat as the XML parser
	"""
	return PYExpat2PYX(StringIO.StringIO(str))

def File2xTree(filename):
	"""
	Return an xTree built from the XML in the specified file.
	Uses PyExpat as the XML parser
	"""
	return PYX2xTree (PYExpat2PYX(open(filename,"r")))

def File2PYX(filename):
	"""
	Return a PYX source built from the XML in the specified file.
	Uses PyExpat as the XML parser
	"""
	return PYExpat2PYX(open(filename,"r"))

def PYExpat2PYX(fo):
	"""
	Utility function to create PYX notation from a SAX
	parser
	"""
	try:
		from xml.parsers import pyexpat
	except ImportError:
		from xml.parsers import expat
		pyexpat = expat
	import tempfile,codecs
	tempfilename = tempfile.mktemp()
	#global tfo
	tfo = codecs.open (tempfilename,encoding='ISO-8859-1',mode="w")
	
	def StartElementHandler(tfo,name,attrs):
		#global tfo
		tfo.write ("(%s\n" % name)
		if type (attrs) == types.ListType:
			# Early pyexpat uses list for attributes
			i = 0
			while i < len(attrs):
				tfo.write ("A%s %s\n" % (attrs[i] , attrs[i+1]))
				i = i + 2
		else:
			# Later pyexpat uses dict for attributes
			for (k,v) in attrs.items():
				tfo.write ("A%s %s\n" % (k ,v))
				
			
	def EndElementHandler(tfo,name):
		#global tfo
		tfo.write (")%s\n" % name)
		
	def CharacterDataHandler(tfo,data):
		#global tfo
		tfo.write ("-%s\n" % PYXEncoder(data))
		
	def ProcessingInstructionHandler(tfo,target,data):
		#global tfo
		tfo.write ("?%s %s\n" % (target,data))
		
	#Parser = pyexpat.ParserCreate()
	Parser = pyexpat.ParserCreate( 'ISO-8859-1' )
	#Parser.StartElementHandler = StartElementHandler
	#Parser.EndElementHandler = EndElementHandler
	#Parser.CharacterDataHandler = CharacterDataHandler
	#Parser.ProcessingInstructionHandler = ProcessingInstructionHandler
	Parser.StartElementHandler = lambda n,a: StartElementHandler(tfo,n,a)
	Parser.EndElementHandler = lambda n: EndElementHandler(tfo,n)
	Parser.CharacterDataHandler = lambda d: CharacterDataHandler(tfo,d)
	Parser.ProcessingInstructionHandler = lambda t,d: ProcessingInstructionHandler(tfo,t,d)
	ParserStatus = Parser.Parse( fo.read(), 1)
	if ParserStatus == 0:
		raise PyxieException("Parse failed")
	tfo.close()
	#
	# The pyxie library creates a lot of temporary files in the /tmp
	# directory. In the standard version of pyxie.py these temporary
	# files are not removed automatically. However, I found the following
	# message on the pyxie mailing list:
	#
	# http://sourceforge.net/mailarchive/forum.php?thread_id=18675&forum_id=775
	# 
	# Python < 2.3 has tempfile.TemporaryFileWrapper,
	# python >= 2.3 has tempfile._TemporaryFileWrapper
	#
	try:
		tfo = tempfile.TemporaryFileWrapper(open (tempfilename,"r"), tempfilename)
	except:
		try:
			tfo = tempfile._TemporaryFileWrapper(open (tempfilename,"r"), tempfilename)
		except:
			raise
	return tfo


def SAX2PYX(fo,ParserSelection=None):
	from xml.sax import saxexts, saxlib, saxutils
	import tempfile
	tempfilename = tempfile.mktemp()

	class myHandler (saxlib.HandlerBase):
		def __init__(self,tempfilename):
			self.fo = open (tempfilename,"w")
			
		def startElement(self,Element,Attributes):
			self.fo.write("(%s\n" % Element)
			for i in range (0,Attributes.getLength()):
				self.fo.write("A%s %s\n" % (
					Attributes.getName(i),
					PYXEncoder(Attributes.getValue(i))))

		def characters(self,data,offset,length):
			self.fo.write("-%s\n" % (
				PYXEncoder(data[offset:offset+length])))

		def	processingInstruction (target, data):
			self.fo.write("?%s %s\n" % (target,data))

		def endElement(self,Element):
			self.fo.write(")%s\n"  % Element)

		def endDocument(self):
			self.fo.close()

	h = myHandler(tempfilename)
	parser = saxexts.make_parser("xml.sax.drivers.drv_" + ParserSelection)
	parser.setDocumentHandler(h)
	parser.parse (fo)
	#
	# The pyxie library creates a lot of temporary files in the /tmp
	# directory. In the standard version of pyxie.py these temporary
	# files are not removed automatically. However, I found the following
	# message on the pyxie mailing list:
	#
	# http://sourceforge.net/mailarchive/forum.php?thread_id=18675&forum_id=775
	#
	# Python < 2.3 has tempfile.TemporaryFileWrapper,
	# python >= 2.3 has tempfile._TemporaryFileWrapper
	#
	try:
		tfo = tempfile.TemporaryFileWrapper(open (tempfilename,"r"), tempfilename)
	except:
		try:
			tfo = tempfile._TemporaryFileWrapper(open (tempfilename,"r"), tempfilename)
		except:
			raise
	return tfo

def Envelope(t,SourceElementType,EnvelopeElementType):
	# Add an element envelope around all elements
	# of type SourceElementType in the xTree t.
	t.PushPos()
	t.Home()
	# Gather list of nodes to be processed
	NodesToProcess = []
	for n in t:
		if t.AtElement(SourceElementType):
			NodesToProcess.append (n)

	# Process each node in the collection
	for n in NodesToProcess:
		t.Seek(n)
		# Create a little xTree consisting of a single element
		t1= String2xTree ("<%s></%s>"  % (EnvelopeElementType,EnvelopeElementType))
		t.PasteRight (t1)
		t.Right()
		# Remember location of envelope
		t.PushPos()
		# Go back and chop out the existing sub-tree
		t.Left()
		t1 = t.Cut()
		t.PopPos()
		# Paste it into the envelope
		t.PasteDown(t1)
	t.PopPos()

if __name__ == "__main__":
	"""
	Test harness for Pyxie
	"""
	import sys,StringIO,tempfile
	tables = """
<test a = "b">
Some data content in foo
<table>
<tr><td>Table 1 r1c1</td><td>Table 1 r1c2</td></tr>
<tr><td>Table 1 r2c1</td><td>Table 1 r2c2</td></tr>
</table>
<B x = "42">
Some data content in B
</B>
<table>
<tr><td>Table 2 r1c1</td><td>Table 2 r1c2</td></tr>
<tr><td>Table 2 r2c1</td><td>Table 2 r2c2</td></tr>
</table>
Some more content in foo
<G></G>
</test>
"""
	try:
		print "-----------------------------"
		print "Pyxie %s Test Harness output:" % __version__
		print "-----------------------------"

		t = String2xTree (tables)
		print "-------------"
		print "Root element:"
		print t
		
		print "----------------"
		print "Serialized tree:"
		print `t`

		print "-------------------"
		print "Element Type Names:"
		for n in t:
			if t.AtElement():
				print t.ElementTypeName,
		print
				
		print "------------------------"
		print "Summarized Data content:"
		for n in t:
			if t.AtData():
				print PYXEncoder(`t.CurPos`)+"...",
		print
		
		print "---------------------------------"
		print "Tree with normalized white space:"
		NormalizeWhiteSpaceSMG(t)
		print `t`

		print "---------------------------------"
		print "Example of PYX event dispatching:"

		class myHandler (xDispatch):
			def __init__(self,fo):
				xDispatch.__init__(self,fo)
				self.Dispatch()
			def start_table(self,etn,attrs):
				print "starting ",etn
			def end_table(self,etn):
				print "ending ",etn
			def characters(self,data):
				print "data",data[:10],"...",
		myHandler(String2PYX(tables))
		print

		print "-----------------------------------"
		print "Example of xTree event dispatching:"

		t = String2xTree (tables)

		class foo:
			def __init__(self,t):
				self.Tree = t
				self.Tree.Dispatch(self)
				
			def handle_table(self,s):
				if s:
					print "table start"
				else:
					print "table end"
					
			def characters(self,s):
				if s:
					print PYXEncoder(self.Tree.Data[:5])

		foo (t)
		
		print "--------------------------------"
		print "Example of sparse tree building:"

		class myHandler (xDispatch):
			def __init__(self,fo):
				xDispatch.__init__(self,fo)
				self.Dispatch()
			def start_table(self,etn,attrs):
				print "sparse tree build of table element"
				# Push the table start-tag data back
				# to be re-dispatched
				self.PushElement(etn,attrs)
				t = PYX2xTree(self)
				print `t`
			def end_table(self,etn):
				print "ending table"

			def default_start(self,etn,attrs):
				print "start",etn
				
			def default_end(self,etn):
				print "end",etn
				
		myHandler(String2PYX(tables))
		
		print "--------------------------------"
		print "Example of event multiplexing:"

		class Sink1(xDispatch):
			def start_table(self,etn,attrs):
				print "Sink 1 - start table"
			def end_table(self,etn):
				print "Sink 1 - end table"
				
		class Sink2(xDispatch):
			def start_table(self,etn,attrs):
				print "Sink 2 - start table"
			def end_table(self,etn):
				print "Sink 2 - end table"

		mux = xDispatchMultiplexor()
		mux.RegisterSink (Sink1())
		mux.RegisterSink (Sink2())
		mux.Dispatch(String2PYX(tables))
		
	except PyxieException,e:
		print e.problem
			

