# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # """ Parsers for SGML and XML to dom. """ import sgmllib, xml.sax.handler from dom import * class Parser: def __init__(self): self.tree = Tree() self.node = self.tree self.nodes = [] def line(self, id, lineno, colno): while self.nodes: n = self.nodes.pop() n._line(id, lineno, colno) def add(self, node): self.node.add(node) self.nodes.append(node) def start(self, name, attrs): tag = Tag(name, *attrs) self.add(tag) self.node = tag def end(self, name): self.balance(name) self.node = self.node.parent def data(self, data): children = self.node.children if children and isinstance(children[-1], Data): children[-1].data += data else: self.add(Data(data)) def comment(self, comment): self.add(Comment(comment)) def entity(self, ref): self.add(Entity(ref)) def character(self, ref): self.add(Character(ref)) def balance(self, name = None): while self.node != self.tree and name != self.node.name: self.node.parent.extend(self.node.children) del self.node.children[:] self.node.singleton = True self.node = self.node.parent class SGMLParser(sgmllib.SGMLParser): def __init__(self, entitydefs = None): sgmllib.SGMLParser.__init__(self) if entitydefs == None: self.entitydefs = {} else: self.entitydefs = entitydefs self.parser = Parser() def unknown_starttag(self, name, attrs): self.parser.start(name, attrs) def handle_data(self, data): self.parser.data(data) def handle_comment(self, comment): self.parser.comment(comment) def unknown_entityref(self, ref): self.parser.entity(ref) def unknown_charref(self, ref): self.parser.character(ref) def unknown_endtag(self, name): self.parser.end(name) def close(self): sgmllib.SGMLParser.close(self) self.parser.balance() assert self.parser.node == self.parser.tree class XMLParser(xml.sax.handler.ContentHandler): def __init__(self): self.parser = Parser() self.locator = None def line(self): if self.locator != None: self.parser.line(self.locator.getSystemId(), self.locator.getLineNumber(), self.locator.getColumnNumber()) def setDocumentLocator(self, locator): self.locator = locator def startElement(self, name, attrs): self.parser.start(name, attrs.items()) self.line() def endElement(self, name): self.parser.end(name) self.line() def characters(self, content): self.parser.data(content) self.line() def skippedEntity(self, name): self.parser.entity(name) self.line()