added completly new version for haslach 2025
This commit is contained in:
244
.venv/lib/python3.7/site-packages/lxml/ElementInclude.py
Normal file
244
.venv/lib/python3.7/site-packages/lxml/ElementInclude.py
Normal file
@@ -0,0 +1,244 @@
|
||||
#
|
||||
# ElementTree
|
||||
# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
|
||||
#
|
||||
# limited xinclude support for element trees
|
||||
#
|
||||
# history:
|
||||
# 2003-08-15 fl created
|
||||
# 2003-11-14 fl fixed default loader
|
||||
#
|
||||
# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
|
||||
#
|
||||
# fredrik@pythonware.com
|
||||
# http://www.pythonware.com
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2004 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
Limited XInclude support for the ElementTree package.
|
||||
|
||||
While lxml.etree has full support for XInclude (see
|
||||
`etree.ElementTree.xinclude()`), this module provides a simpler, pure
|
||||
Python, ElementTree compatible implementation that supports a simple
|
||||
form of custom URL resolvers.
|
||||
"""
|
||||
|
||||
from lxml import etree
|
||||
try:
|
||||
from urlparse import urljoin
|
||||
from urllib2 import urlopen
|
||||
except ImportError:
|
||||
# Python 3
|
||||
from urllib.parse import urljoin
|
||||
from urllib.request import urlopen
|
||||
|
||||
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
|
||||
|
||||
XINCLUDE_INCLUDE = XINCLUDE + "include"
|
||||
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
|
||||
XINCLUDE_ITER_TAG = XINCLUDE + "*"
|
||||
|
||||
# For security reasons, the inclusion depth is limited to this read-only value by default.
|
||||
DEFAULT_MAX_INCLUSION_DEPTH = 6
|
||||
|
||||
|
||||
##
|
||||
# Fatal include error.
|
||||
|
||||
class FatalIncludeError(etree.LxmlSyntaxError):
|
||||
pass
|
||||
|
||||
|
||||
class LimitedRecursiveIncludeError(FatalIncludeError):
|
||||
pass
|
||||
|
||||
|
||||
##
|
||||
# ET compatible default loader.
|
||||
# This loader reads an included resource from disk.
|
||||
#
|
||||
# @param href Resource reference.
|
||||
# @param parse Parse mode. Either "xml" or "text".
|
||||
# @param encoding Optional text encoding.
|
||||
# @return The expanded resource. If the parse mode is "xml", this
|
||||
# is an ElementTree instance. If the parse mode is "text", this
|
||||
# is a Unicode string. If the loader fails, it can return None
|
||||
# or raise an IOError exception.
|
||||
# @throws IOError If the loader fails to load the resource.
|
||||
|
||||
def default_loader(href, parse, encoding=None):
|
||||
file = open(href, 'rb')
|
||||
if parse == "xml":
|
||||
data = etree.parse(file).getroot()
|
||||
else:
|
||||
data = file.read()
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
data = data.decode(encoding)
|
||||
file.close()
|
||||
return data
|
||||
|
||||
|
||||
##
|
||||
# Default loader used by lxml.etree - handles custom resolvers properly
|
||||
#
|
||||
|
||||
def _lxml_default_loader(href, parse, encoding=None, parser=None):
|
||||
if parse == "xml":
|
||||
data = etree.parse(href, parser).getroot()
|
||||
else:
|
||||
if "://" in href:
|
||||
f = urlopen(href)
|
||||
else:
|
||||
f = open(href, 'rb')
|
||||
data = f.read()
|
||||
f.close()
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
data = data.decode(encoding)
|
||||
return data
|
||||
|
||||
|
||||
##
|
||||
# Wrapper for ET compatibility - drops the parser
|
||||
|
||||
def _wrap_et_loader(loader):
|
||||
def load(href, parse, encoding=None, parser=None):
|
||||
return loader(href, parse, encoding)
|
||||
return load
|
||||
|
||||
|
||||
##
|
||||
# Expand XInclude directives.
|
||||
#
|
||||
# @param elem Root element.
|
||||
# @param loader Optional resource loader. If omitted, it defaults
|
||||
# to {@link default_loader}. If given, it should be a callable
|
||||
# that implements the same interface as <b>default_loader</b>.
|
||||
# @param base_url The base URL of the original file, to resolve
|
||||
# relative include file references.
|
||||
# @param max_depth The maximum number of recursive inclusions.
|
||||
# Limited to reduce the risk of malicious content explosion.
|
||||
# Pass None to disable the limitation.
|
||||
# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
|
||||
# @throws FatalIncludeError If the function fails to include a given
|
||||
# resource, or if the tree contains malformed XInclude elements.
|
||||
# @throws IOError If the function fails to load a given resource.
|
||||
# @returns the node or its replacement if it was an XInclude node
|
||||
|
||||
def include(elem, loader=None, base_url=None,
|
||||
max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
|
||||
if max_depth is None:
|
||||
max_depth = -1
|
||||
elif max_depth < 0:
|
||||
raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
|
||||
|
||||
if base_url is None:
|
||||
if hasattr(elem, 'getroot'):
|
||||
tree = elem
|
||||
elem = elem.getroot()
|
||||
else:
|
||||
tree = elem.getroottree()
|
||||
if hasattr(tree, 'docinfo'):
|
||||
base_url = tree.docinfo.URL
|
||||
elif hasattr(elem, 'getroot'):
|
||||
elem = elem.getroot()
|
||||
_include(elem, loader, base_url, max_depth)
|
||||
|
||||
|
||||
def _include(elem, loader=None, base_url=None,
|
||||
max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None):
|
||||
if loader is not None:
|
||||
load_include = _wrap_et_loader(loader)
|
||||
else:
|
||||
load_include = _lxml_default_loader
|
||||
|
||||
if _parent_hrefs is None:
|
||||
_parent_hrefs = set()
|
||||
|
||||
parser = elem.getroottree().parser
|
||||
|
||||
include_elements = list(
|
||||
elem.iter(XINCLUDE_ITER_TAG))
|
||||
|
||||
for e in include_elements:
|
||||
if e.tag == XINCLUDE_INCLUDE:
|
||||
# process xinclude directive
|
||||
href = urljoin(base_url, e.get("href"))
|
||||
parse = e.get("parse", "xml")
|
||||
parent = e.getparent()
|
||||
if parse == "xml":
|
||||
if href in _parent_hrefs:
|
||||
raise FatalIncludeError(
|
||||
"recursive include of %r detected" % href
|
||||
)
|
||||
if max_depth == 0:
|
||||
raise LimitedRecursiveIncludeError(
|
||||
"maximum xinclude depth reached when including file %s" % href)
|
||||
node = load_include(href, parse, parser=parser)
|
||||
if node is None:
|
||||
raise FatalIncludeError(
|
||||
"cannot load %r as %r" % (href, parse)
|
||||
)
|
||||
node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs)
|
||||
if e.tail:
|
||||
node.tail = (node.tail or "") + e.tail
|
||||
if parent is None:
|
||||
return node # replaced the root node!
|
||||
parent.replace(e, node)
|
||||
elif parse == "text":
|
||||
text = load_include(href, parse, encoding=e.get("encoding"))
|
||||
if text is None:
|
||||
raise FatalIncludeError(
|
||||
"cannot load %r as %r" % (href, parse)
|
||||
)
|
||||
predecessor = e.getprevious()
|
||||
if predecessor is not None:
|
||||
predecessor.tail = (predecessor.tail or "") + text
|
||||
elif parent is None:
|
||||
return text # replaced the root node!
|
||||
else:
|
||||
parent.text = (parent.text or "") + text + (e.tail or "")
|
||||
parent.remove(e)
|
||||
else:
|
||||
raise FatalIncludeError(
|
||||
"unknown parse type in xi:include tag (%r)" % parse
|
||||
)
|
||||
elif e.tag == XINCLUDE_FALLBACK:
|
||||
parent = e.getparent()
|
||||
if parent is not None and parent.tag != XINCLUDE_INCLUDE:
|
||||
raise FatalIncludeError(
|
||||
"xi:fallback tag must be child of xi:include (%r)" % e.tag
|
||||
)
|
||||
else:
|
||||
raise FatalIncludeError(
|
||||
"Invalid element found in XInclude namespace (%r)" % e.tag
|
||||
)
|
||||
return elem
|
||||
23
.venv/lib/python3.7/site-packages/lxml/__init__.py
Normal file
23
.venv/lib/python3.7/site-packages/lxml/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# this is a package
|
||||
|
||||
__version__ = "4.9.3"
|
||||
|
||||
|
||||
def get_include():
|
||||
"""
|
||||
Returns a list of header include paths (for lxml itself, libxml2
|
||||
and libxslt) needed to compile C code against lxml if it was built
|
||||
with statically linked libraries.
|
||||
"""
|
||||
import os
|
||||
lxml_path = __path__[0]
|
||||
include_path = os.path.join(lxml_path, 'includes')
|
||||
includes = [include_path, lxml_path]
|
||||
|
||||
for name in os.listdir(include_path):
|
||||
path = os.path.join(include_path, name)
|
||||
if os.path.isdir(path):
|
||||
includes.append(path)
|
||||
|
||||
return includes
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
345
.venv/lib/python3.7/site-packages/lxml/_elementpath.py
Normal file
345
.venv/lib/python3.7/site-packages/lxml/_elementpath.py
Normal file
@@ -0,0 +1,345 @@
|
||||
# cython: language_level=2
|
||||
|
||||
#
|
||||
# ElementTree
|
||||
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
|
||||
#
|
||||
# limited xpath support for element trees
|
||||
#
|
||||
# history:
|
||||
# 2003-05-23 fl created
|
||||
# 2003-05-28 fl added support for // etc
|
||||
# 2003-08-27 fl fixed parsing of periods in element names
|
||||
# 2007-09-10 fl new selection engine
|
||||
# 2007-09-12 fl fixed parent selector
|
||||
# 2007-09-13 fl added iterfind; changed findall to return a list
|
||||
# 2007-11-30 fl added namespaces support
|
||||
# 2009-10-30 fl added child element value filter
|
||||
#
|
||||
# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
|
||||
#
|
||||
# fredrik@pythonware.com
|
||||
# http://www.pythonware.com
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2009 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
##
|
||||
# Implementation module for XPath support. There's usually no reason
|
||||
# to import this module directly; the <b>ElementTree</b> does this for
|
||||
# you, if needed.
|
||||
##
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import re
|
||||
|
||||
xpath_tokenizer_re = re.compile(
|
||||
"("
|
||||
"'[^']*'|\"[^\"]*\"|"
|
||||
"::|"
|
||||
"//?|"
|
||||
r"\.\.|"
|
||||
r"\(\)|"
|
||||
r"[/.*:\[\]\(\)@=])|"
|
||||
r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
|
||||
r"\s+"
|
||||
)
|
||||
|
||||
def xpath_tokenizer(pattern, namespaces=None):
|
||||
# ElementTree uses '', lxml used None originally.
|
||||
default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
|
||||
parsing_attribute = False
|
||||
for token in xpath_tokenizer_re.findall(pattern):
|
||||
ttype, tag = token
|
||||
if tag and tag[0] != "{":
|
||||
if ":" in tag:
|
||||
prefix, uri = tag.split(":", 1)
|
||||
try:
|
||||
if not namespaces:
|
||||
raise KeyError
|
||||
yield ttype, "{%s}%s" % (namespaces[prefix], uri)
|
||||
except KeyError:
|
||||
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
||||
elif default_namespace and not parsing_attribute:
|
||||
yield ttype, "{%s}%s" % (default_namespace, tag)
|
||||
else:
|
||||
yield token
|
||||
parsing_attribute = False
|
||||
else:
|
||||
yield token
|
||||
parsing_attribute = ttype == '@'
|
||||
|
||||
|
||||
def prepare_child(next, token):
|
||||
tag = token[1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for e in elem.iterchildren(tag):
|
||||
yield e
|
||||
return select
|
||||
|
||||
def prepare_star(next, token):
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for e in elem.iterchildren('*'):
|
||||
yield e
|
||||
return select
|
||||
|
||||
def prepare_self(next, token):
|
||||
def select(result):
|
||||
return result
|
||||
return select
|
||||
|
||||
def prepare_descendant(next, token):
|
||||
token = next()
|
||||
if token[0] == "*":
|
||||
tag = "*"
|
||||
elif not token[0]:
|
||||
tag = token[1]
|
||||
else:
|
||||
raise SyntaxError("invalid descendant")
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for e in elem.iterdescendants(tag):
|
||||
yield e
|
||||
return select
|
||||
|
||||
def prepare_parent(next, token):
|
||||
def select(result):
|
||||
for elem in result:
|
||||
parent = elem.getparent()
|
||||
if parent is not None:
|
||||
yield parent
|
||||
return select
|
||||
|
||||
def prepare_predicate(next, token):
|
||||
# FIXME: replace with real parser!!! refs:
|
||||
# http://effbot.org/zone/simple-iterator-parser.htm
|
||||
# http://javascript.crockford.com/tdop/tdop.html
|
||||
signature = ''
|
||||
predicate = []
|
||||
while 1:
|
||||
token = next()
|
||||
if token[0] == "]":
|
||||
break
|
||||
if token == ('', ''):
|
||||
# ignore whitespace
|
||||
continue
|
||||
if token[0] and token[0][:1] in "'\"":
|
||||
token = "'", token[0][1:-1]
|
||||
signature += token[0] or "-"
|
||||
predicate.append(token[1])
|
||||
|
||||
# use signature to determine predicate type
|
||||
if signature == "@-":
|
||||
# [@attribute] predicate
|
||||
key = predicate[1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
if elem.get(key) is not None:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "@-='":
|
||||
# [@attribute='value']
|
||||
key = predicate[1]
|
||||
value = predicate[-1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
if elem.get(key) == value:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
|
||||
# [tag]
|
||||
tag = predicate[0]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for _ in elem.iterchildren(tag):
|
||||
yield elem
|
||||
break
|
||||
return select
|
||||
if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
|
||||
# [.='value'] or [tag='value']
|
||||
tag = predicate[0]
|
||||
value = predicate[-1]
|
||||
if tag:
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for e in elem.iterchildren(tag):
|
||||
if "".join(e.itertext()) == value:
|
||||
yield elem
|
||||
break
|
||||
else:
|
||||
def select(result):
|
||||
for elem in result:
|
||||
if "".join(elem.itertext()) == value:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "-" or signature == "-()" or signature == "-()-":
|
||||
# [index] or [last()] or [last()-index]
|
||||
if signature == "-":
|
||||
# [index]
|
||||
index = int(predicate[0]) - 1
|
||||
if index < 0:
|
||||
if index == -1:
|
||||
raise SyntaxError(
|
||||
"indices in path predicates are 1-based, not 0-based")
|
||||
else:
|
||||
raise SyntaxError("path index >= 1 expected")
|
||||
else:
|
||||
if predicate[0] != "last":
|
||||
raise SyntaxError("unsupported function")
|
||||
if signature == "-()-":
|
||||
try:
|
||||
index = int(predicate[2]) - 1
|
||||
except ValueError:
|
||||
raise SyntaxError("unsupported expression")
|
||||
else:
|
||||
index = -1
|
||||
def select(result):
|
||||
for elem in result:
|
||||
parent = elem.getparent()
|
||||
if parent is None:
|
||||
continue
|
||||
try:
|
||||
# FIXME: what if the selector is "*" ?
|
||||
elems = list(parent.iterchildren(elem.tag))
|
||||
if elems[index] is elem:
|
||||
yield elem
|
||||
except IndexError:
|
||||
pass
|
||||
return select
|
||||
raise SyntaxError("invalid predicate")
|
||||
|
||||
ops = {
|
||||
"": prepare_child,
|
||||
"*": prepare_star,
|
||||
".": prepare_self,
|
||||
"..": prepare_parent,
|
||||
"//": prepare_descendant,
|
||||
"[": prepare_predicate,
|
||||
}
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
_cache = {}
|
||||
|
||||
|
||||
def _build_path_iterator(path, namespaces):
|
||||
"""compile selector pattern"""
|
||||
if path[-1:] == "/":
|
||||
path += "*" # implicit all (FIXME: keep this?)
|
||||
|
||||
cache_key = (path,)
|
||||
if namespaces:
|
||||
# lxml originally used None for the default namespace but ElementTree uses the
|
||||
# more convenient (all-strings-dict) empty string, so we support both here,
|
||||
# preferring the more convenient '', as long as they aren't ambiguous.
|
||||
if None in namespaces:
|
||||
if '' in namespaces and namespaces[None] != namespaces['']:
|
||||
raise ValueError("Ambiguous default namespace provided: %r versus %r" % (
|
||||
namespaces[None], namespaces['']))
|
||||
cache_key += (namespaces[None],) + tuple(sorted(
|
||||
item for item in namespaces.items() if item[0] is not None))
|
||||
else:
|
||||
cache_key += tuple(sorted(namespaces.items()))
|
||||
|
||||
try:
|
||||
return _cache[cache_key]
|
||||
except KeyError:
|
||||
pass
|
||||
if len(_cache) > 100:
|
||||
_cache.clear()
|
||||
|
||||
if path[:1] == "/":
|
||||
raise SyntaxError("cannot use absolute path on element")
|
||||
stream = iter(xpath_tokenizer(path, namespaces))
|
||||
try:
|
||||
_next = stream.next
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
_next = stream.__next__
|
||||
try:
|
||||
token = _next()
|
||||
except StopIteration:
|
||||
raise SyntaxError("empty path expression")
|
||||
selector = []
|
||||
while 1:
|
||||
try:
|
||||
selector.append(ops[token[0]](_next, token))
|
||||
except StopIteration:
|
||||
raise SyntaxError("invalid path")
|
||||
try:
|
||||
token = _next()
|
||||
if token[0] == "/":
|
||||
token = _next()
|
||||
except StopIteration:
|
||||
break
|
||||
_cache[cache_key] = selector
|
||||
return selector
|
||||
|
||||
|
||||
##
|
||||
# Iterate over the matching nodes
|
||||
|
||||
def iterfind(elem, path, namespaces=None):
|
||||
selector = _build_path_iterator(path, namespaces)
|
||||
result = iter((elem,))
|
||||
for select in selector:
|
||||
result = select(result)
|
||||
return result
|
||||
|
||||
|
||||
##
|
||||
# Find first matching object.
|
||||
|
||||
def find(elem, path, namespaces=None):
|
||||
it = iterfind(elem, path, namespaces)
|
||||
try:
|
||||
return next(it)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
|
||||
##
|
||||
# Find all matching objects.
|
||||
|
||||
def findall(elem, path, namespaces=None):
|
||||
return list(iterfind(elem, path, namespaces))
|
||||
|
||||
|
||||
##
|
||||
# Find text for first matching object.
|
||||
|
||||
def findtext(elem, path, default=None, namespaces=None):
|
||||
el = find(elem, path, namespaces)
|
||||
if el is None:
|
||||
return default
|
||||
else:
|
||||
return el.text or ''
|
||||
1819
.venv/lib/python3.7/site-packages/lxml/apihelpers.pxi
Normal file
1819
.venv/lib/python3.7/site-packages/lxml/apihelpers.pxi
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
233
.venv/lib/python3.7/site-packages/lxml/builder.py
Normal file
233
.venv/lib/python3.7/site-packages/lxml/builder.py
Normal file
@@ -0,0 +1,233 @@
|
||||
# cython: language_level=2
|
||||
|
||||
#
|
||||
# Element generator factory by Fredrik Lundh.
|
||||
#
|
||||
# Source:
|
||||
# http://online.effbot.org/2006_11_01_archive.htm#et-builder
|
||||
# http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2004 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
The ``E`` Element factory for generating XML documents.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import lxml.etree as ET
|
||||
_QName = ET.QName
|
||||
|
||||
from functools import partial
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
unicode = str
|
||||
|
||||
|
||||
class ElementMaker(object):
|
||||
"""Element generator factory.
|
||||
|
||||
Unlike the ordinary Element factory, the E factory allows you to pass in
|
||||
more than just a tag and some optional attributes; you can also pass in
|
||||
text and other elements. The text is added as either text or tail
|
||||
attributes, and elements are inserted at the right spot. Some small
|
||||
examples::
|
||||
|
||||
>>> from lxml import etree as ET
|
||||
>>> from lxml.builder import E
|
||||
|
||||
>>> ET.tostring(E("tag"))
|
||||
'<tag/>'
|
||||
>>> ET.tostring(E("tag", "text"))
|
||||
'<tag>text</tag>'
|
||||
>>> ET.tostring(E("tag", "text", key="value"))
|
||||
'<tag key="value">text</tag>'
|
||||
>>> ET.tostring(E("tag", E("subtag", "text"), "tail"))
|
||||
'<tag><subtag>text</subtag>tail</tag>'
|
||||
|
||||
For simple tags, the factory also allows you to write ``E.tag(...)`` instead
|
||||
of ``E('tag', ...)``::
|
||||
|
||||
>>> ET.tostring(E.tag())
|
||||
'<tag/>'
|
||||
>>> ET.tostring(E.tag("text"))
|
||||
'<tag>text</tag>'
|
||||
>>> ET.tostring(E.tag(E.subtag("text"), "tail"))
|
||||
'<tag><subtag>text</subtag>tail</tag>'
|
||||
|
||||
Here's a somewhat larger example; this shows how to generate HTML
|
||||
documents, using a mix of prepared factory functions for inline elements,
|
||||
nested ``E.tag`` calls, and embedded XHTML fragments::
|
||||
|
||||
# some common inline elements
|
||||
A = E.a
|
||||
I = E.i
|
||||
B = E.b
|
||||
|
||||
def CLASS(v):
|
||||
# helper function, 'class' is a reserved word
|
||||
return {'class': v}
|
||||
|
||||
page = (
|
||||
E.html(
|
||||
E.head(
|
||||
E.title("This is a sample document")
|
||||
),
|
||||
E.body(
|
||||
E.h1("Hello!", CLASS("title")),
|
||||
E.p("This is a paragraph with ", B("bold"), " text in it!"),
|
||||
E.p("This is another paragraph, with a ",
|
||||
A("link", href="http://www.python.org"), "."),
|
||||
E.p("Here are some reserved characters: <spam&egg>."),
|
||||
ET.XML("<p>And finally, here is an embedded XHTML fragment.</p>"),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
print ET.tostring(page)
|
||||
|
||||
Here's a prettyprinted version of the output from the above script::
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>This is a sample document</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1 class="title">Hello!</h1>
|
||||
<p>This is a paragraph with <b>bold</b> text in it!</p>
|
||||
<p>This is another paragraph, with <a href="http://www.python.org">link</a>.</p>
|
||||
<p>Here are some reserved characters: <spam&egg>.</p>
|
||||
<p>And finally, here is an embedded XHTML fragment.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
For namespace support, you can pass a namespace map (``nsmap``)
|
||||
and/or a specific target ``namespace`` to the ElementMaker class::
|
||||
|
||||
>>> E = ElementMaker(namespace="http://my.ns/")
|
||||
>>> print(ET.tostring( E.test ))
|
||||
<test xmlns="http://my.ns/"/>
|
||||
|
||||
>>> E = ElementMaker(namespace="http://my.ns/", nsmap={'p':'http://my.ns/'})
|
||||
>>> print(ET.tostring( E.test ))
|
||||
<p:test xmlns:p="http://my.ns/"/>
|
||||
"""
|
||||
|
||||
def __init__(self, typemap=None,
|
||||
namespace=None, nsmap=None, makeelement=None):
|
||||
self._namespace = '{' + namespace + '}' if namespace is not None else None
|
||||
self._nsmap = dict(nsmap) if nsmap else None
|
||||
|
||||
assert makeelement is None or callable(makeelement)
|
||||
self._makeelement = makeelement if makeelement is not None else ET.Element
|
||||
|
||||
# initialize the default type map functions for this element factory
|
||||
typemap = dict(typemap) if typemap else {}
|
||||
|
||||
def add_text(elem, item):
|
||||
try:
|
||||
last_child = elem[-1]
|
||||
except IndexError:
|
||||
elem.text = (elem.text or "") + item
|
||||
else:
|
||||
last_child.tail = (last_child.tail or "") + item
|
||||
|
||||
def add_cdata(elem, cdata):
|
||||
if elem.text:
|
||||
raise ValueError("Can't add a CDATA section. Element already has some text: %r" % elem.text)
|
||||
elem.text = cdata
|
||||
|
||||
if str not in typemap:
|
||||
typemap[str] = add_text
|
||||
if unicode not in typemap:
|
||||
typemap[unicode] = add_text
|
||||
if ET.CDATA not in typemap:
|
||||
typemap[ET.CDATA] = add_cdata
|
||||
|
||||
def add_dict(elem, item):
|
||||
attrib = elem.attrib
|
||||
for k, v in item.items():
|
||||
if isinstance(v, basestring):
|
||||
attrib[k] = v
|
||||
else:
|
||||
attrib[k] = typemap[type(v)](None, v)
|
||||
|
||||
if dict not in typemap:
|
||||
typemap[dict] = add_dict
|
||||
|
||||
self._typemap = typemap
|
||||
|
||||
def __call__(self, tag, *children, **attrib):
|
||||
typemap = self._typemap
|
||||
|
||||
# We'll usually get a 'str', and the compiled type check is very fast.
|
||||
if not isinstance(tag, str) and isinstance(tag, _QName):
|
||||
# A QName is explicitly qualified, do not look at self._namespace.
|
||||
tag = tag.text
|
||||
elif self._namespace is not None and tag[0] != '{':
|
||||
tag = self._namespace + tag
|
||||
elem = self._makeelement(tag, nsmap=self._nsmap)
|
||||
if attrib:
|
||||
typemap[dict](elem, attrib)
|
||||
|
||||
for item in children:
|
||||
if callable(item):
|
||||
item = item()
|
||||
t = typemap.get(type(item))
|
||||
if t is None:
|
||||
if ET.iselement(item):
|
||||
elem.append(item)
|
||||
continue
|
||||
for basetype in type(item).__mro__:
|
||||
# See if the typemap knows of any of this type's bases.
|
||||
t = typemap.get(basetype)
|
||||
if t is not None:
|
||||
break
|
||||
else:
|
||||
raise TypeError("bad argument type: %s(%r)" %
|
||||
(type(item).__name__, item))
|
||||
v = t(elem, item)
|
||||
if v:
|
||||
typemap.get(type(v))(elem, v)
|
||||
|
||||
return elem
|
||||
|
||||
def __getattr__(self, tag):
|
||||
return partial(self, tag)
|
||||
|
||||
|
||||
# create factory object
|
||||
E = ElementMaker()
|
||||
580
.venv/lib/python3.7/site-packages/lxml/classlookup.pxi
Normal file
580
.venv/lib/python3.7/site-packages/lxml/classlookup.pxi
Normal file
@@ -0,0 +1,580 @@
|
||||
# Configurable Element class lookup
|
||||
|
||||
################################################################################
|
||||
# Custom Element classes
|
||||
|
||||
cdef public class ElementBase(_Element) [ type LxmlElementBaseType,
|
||||
object LxmlElementBase ]:
|
||||
u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
||||
|
||||
The public Element class. All custom Element classes must inherit
|
||||
from this one. To create an Element, use the `Element()` factory.
|
||||
|
||||
BIG FAT WARNING: Subclasses *must not* override __init__ or
|
||||
__new__ as it is absolutely undefined when these objects will be
|
||||
created or destroyed. All persistent state of Elements must be
|
||||
stored in the underlying XML. If you really need to initialize
|
||||
the object after creation, you can implement an ``_init(self)``
|
||||
method that will be called directly after object creation.
|
||||
|
||||
Subclasses of this class can be instantiated to create a new
|
||||
Element. By default, the tag name will be the class name and the
|
||||
namespace will be empty. You can modify this with the following
|
||||
class attributes:
|
||||
|
||||
* TAG - the tag name, possibly containing a namespace in Clark
|
||||
notation
|
||||
|
||||
* NAMESPACE - the default namespace URI, unless provided as part
|
||||
of the TAG attribute.
|
||||
|
||||
* HTML - flag if the class is an HTML tag, as opposed to an XML
|
||||
tag. This only applies to un-namespaced tags and defaults to
|
||||
false (i.e. XML).
|
||||
|
||||
* PARSER - the parser that provides the configuration for the
|
||||
newly created document. Providing an HTML parser here will
|
||||
default to creating an HTML element.
|
||||
|
||||
In user code, the latter three are commonly inherited in class
|
||||
hierarchies that implement a common namespace.
|
||||
"""
|
||||
def __init__(self, *children, attrib=None, nsmap=None, **_extra):
|
||||
u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
||||
"""
|
||||
cdef bint is_html = 0
|
||||
cdef _BaseParser parser
|
||||
cdef _Element last_child
|
||||
# don't use normal attribute access as it might be overridden
|
||||
_getattr = object.__getattribute__
|
||||
try:
|
||||
namespace = _utf8(_getattr(self, 'NAMESPACE'))
|
||||
except AttributeError:
|
||||
namespace = None
|
||||
try:
|
||||
ns, tag = _getNsTag(_getattr(self, 'TAG'))
|
||||
if ns is not None:
|
||||
namespace = ns
|
||||
except AttributeError:
|
||||
tag = _utf8(_getattr(_getattr(self, '__class__'), '__name__'))
|
||||
if b'.' in tag:
|
||||
tag = tag.split(b'.')[-1]
|
||||
try:
|
||||
parser = _getattr(self, 'PARSER')
|
||||
except AttributeError:
|
||||
parser = None
|
||||
for child in children:
|
||||
if isinstance(child, _Element):
|
||||
parser = (<_Element>child)._doc._parser
|
||||
break
|
||||
if isinstance(parser, HTMLParser):
|
||||
is_html = 1
|
||||
if namespace is None:
|
||||
try:
|
||||
is_html = _getattr(self, 'HTML')
|
||||
except AttributeError:
|
||||
pass
|
||||
_initNewElement(self, is_html, tag, namespace, parser,
|
||||
attrib, nsmap, _extra)
|
||||
last_child = None
|
||||
for child in children:
|
||||
if _isString(child):
|
||||
if last_child is None:
|
||||
_setNodeText(self._c_node,
|
||||
(_collectText(self._c_node.children) or '') + child)
|
||||
else:
|
||||
_setTailText(last_child._c_node,
|
||||
(_collectText(last_child._c_node.next) or '') + child)
|
||||
elif isinstance(child, _Element):
|
||||
last_child = child
|
||||
_appendChild(self, last_child)
|
||||
elif isinstance(child, type) and issubclass(child, ElementBase):
|
||||
last_child = child()
|
||||
_appendChild(self, last_child)
|
||||
else:
|
||||
raise TypeError, f"Invalid child type: {type(child)!r}"
|
||||
|
||||
cdef class CommentBase(_Comment):
|
||||
u"""All custom Comment classes must inherit from this one.
|
||||
|
||||
To create an XML Comment instance, use the ``Comment()`` factory.
|
||||
|
||||
Subclasses *must not* override __init__ or __new__ as it is
|
||||
absolutely undefined when these objects will be created or
|
||||
destroyed. All persistent state of Comments must be stored in the
|
||||
underlying XML. If you really need to initialize the object after
|
||||
creation, you can implement an ``_init(self)`` method that will be
|
||||
called after object creation.
|
||||
"""
|
||||
def __init__(self, text):
|
||||
# copied from Comment() factory
|
||||
cdef _Document doc
|
||||
cdef xmlDoc* c_doc
|
||||
if text is None:
|
||||
text = b''
|
||||
else:
|
||||
text = _utf8(text)
|
||||
c_doc = _newXMLDoc()
|
||||
doc = _documentFactory(c_doc, None)
|
||||
self._c_node = _createComment(c_doc, _xcstr(text))
|
||||
if self._c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
|
||||
_registerProxy(self, doc, self._c_node)
|
||||
self._init()
|
||||
|
||||
cdef class PIBase(_ProcessingInstruction):
|
||||
u"""All custom Processing Instruction classes must inherit from this one.
|
||||
|
||||
To create an XML ProcessingInstruction instance, use the ``PI()``
|
||||
factory.
|
||||
|
||||
Subclasses *must not* override __init__ or __new__ as it is
|
||||
absolutely undefined when these objects will be created or
|
||||
destroyed. All persistent state of PIs must be stored in the
|
||||
underlying XML. If you really need to initialize the object after
|
||||
creation, you can implement an ``_init(self)`` method that will be
|
||||
called after object creation.
|
||||
"""
|
||||
def __init__(self, target, text=None):
|
||||
# copied from PI() factory
|
||||
cdef _Document doc
|
||||
cdef xmlDoc* c_doc
|
||||
target = _utf8(target)
|
||||
if text is None:
|
||||
text = b''
|
||||
else:
|
||||
text = _utf8(text)
|
||||
c_doc = _newXMLDoc()
|
||||
doc = _documentFactory(c_doc, None)
|
||||
self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text))
|
||||
if self._c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
|
||||
_registerProxy(self, doc, self._c_node)
|
||||
self._init()
|
||||
|
||||
cdef class EntityBase(_Entity):
|
||||
u"""All custom Entity classes must inherit from this one.
|
||||
|
||||
To create an XML Entity instance, use the ``Entity()`` factory.
|
||||
|
||||
Subclasses *must not* override __init__ or __new__ as it is
|
||||
absolutely undefined when these objects will be created or
|
||||
destroyed. All persistent state of Entities must be stored in the
|
||||
underlying XML. If you really need to initialize the object after
|
||||
creation, you can implement an ``_init(self)`` method that will be
|
||||
called after object creation.
|
||||
"""
|
||||
def __init__(self, name):
|
||||
cdef _Document doc
|
||||
cdef xmlDoc* c_doc
|
||||
name_utf = _utf8(name)
|
||||
c_name = _xcstr(name_utf)
|
||||
if c_name[0] == c'#':
|
||||
if not _characterReferenceIsValid(c_name + 1):
|
||||
raise ValueError, f"Invalid character reference: '{name}'"
|
||||
elif not _xmlNameIsValid(c_name):
|
||||
raise ValueError, f"Invalid entity reference: '{name}'"
|
||||
c_doc = _newXMLDoc()
|
||||
doc = _documentFactory(c_doc, None)
|
||||
self._c_node = _createEntity(c_doc, c_name)
|
||||
if self._c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
|
||||
_registerProxy(self, doc, self._c_node)
|
||||
self._init()
|
||||
|
||||
|
||||
cdef int _validateNodeClass(xmlNode* c_node, cls) except -1:
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
expected = ElementBase
|
||||
elif c_node.type == tree.XML_COMMENT_NODE:
|
||||
expected = CommentBase
|
||||
elif c_node.type == tree.XML_ENTITY_REF_NODE:
|
||||
expected = EntityBase
|
||||
elif c_node.type == tree.XML_PI_NODE:
|
||||
expected = PIBase
|
||||
else:
|
||||
assert False, f"Unknown node type: {c_node.type}"
|
||||
|
||||
if not (isinstance(cls, type) and issubclass(cls, expected)):
|
||||
raise TypeError(
|
||||
f"result of class lookup must be subclass of {type(expected)}, got {type(cls)}")
|
||||
return 0
|
||||
|
||||
|
||||
################################################################################
|
||||
# Element class lookup
|
||||
|
||||
ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*)
|
||||
|
||||
# class to store element class lookup functions
|
||||
cdef public class ElementClassLookup [ type LxmlElementClassLookupType,
|
||||
object LxmlElementClassLookup ]:
|
||||
u"""ElementClassLookup(self)
|
||||
Superclass of Element class lookups.
|
||||
"""
|
||||
cdef _element_class_lookup_function _lookup_function
|
||||
|
||||
|
||||
cdef public class FallbackElementClassLookup(ElementClassLookup) \
|
||||
[ type LxmlFallbackElementClassLookupType,
|
||||
object LxmlFallbackElementClassLookup ]:
|
||||
u"""FallbackElementClassLookup(self, fallback=None)
|
||||
|
||||
Superclass of Element class lookups with additional fallback.
|
||||
"""
|
||||
cdef readonly ElementClassLookup fallback
|
||||
cdef _element_class_lookup_function _fallback_function
|
||||
def __cinit__(self):
|
||||
# fall back to default lookup
|
||||
self._fallback_function = _lookupDefaultElementClass
|
||||
|
||||
def __init__(self, ElementClassLookup fallback=None):
|
||||
if fallback is not None:
|
||||
self._setFallback(fallback)
|
||||
else:
|
||||
self._fallback_function = _lookupDefaultElementClass
|
||||
|
||||
cdef void _setFallback(self, ElementClassLookup lookup):
|
||||
u"""Sets the fallback scheme for this lookup method.
|
||||
"""
|
||||
self.fallback = lookup
|
||||
self._fallback_function = lookup._lookup_function
|
||||
if self._fallback_function is NULL:
|
||||
self._fallback_function = _lookupDefaultElementClass
|
||||
|
||||
def set_fallback(self, ElementClassLookup lookup not None):
|
||||
u"""set_fallback(self, lookup)
|
||||
|
||||
Sets the fallback scheme for this lookup method.
|
||||
"""
|
||||
self._setFallback(lookup)
|
||||
|
||||
cdef inline object _callLookupFallback(FallbackElementClassLookup lookup,
|
||||
_Document doc, xmlNode* c_node):
|
||||
return lookup._fallback_function(lookup.fallback, doc, c_node)
|
||||
|
||||
|
||||
################################################################################
|
||||
# default lookup scheme
|
||||
|
||||
cdef class ElementDefaultClassLookup(ElementClassLookup):
|
||||
u"""ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None)
|
||||
Element class lookup scheme that always returns the default Element
|
||||
class.
|
||||
|
||||
The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
|
||||
accept the respective Element classes.
|
||||
"""
|
||||
cdef readonly object element_class
|
||||
cdef readonly object comment_class
|
||||
cdef readonly object pi_class
|
||||
cdef readonly object entity_class
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _lookupDefaultElementClass
|
||||
|
||||
def __init__(self, element=None, comment=None, pi=None, entity=None):
|
||||
if element is None:
|
||||
self.element_class = _Element
|
||||
elif issubclass(element, ElementBase):
|
||||
self.element_class = element
|
||||
else:
|
||||
raise TypeError, u"element class must be subclass of ElementBase"
|
||||
|
||||
if comment is None:
|
||||
self.comment_class = _Comment
|
||||
elif issubclass(comment, CommentBase):
|
||||
self.comment_class = comment
|
||||
else:
|
||||
raise TypeError, u"comment class must be subclass of CommentBase"
|
||||
|
||||
if entity is None:
|
||||
self.entity_class = _Entity
|
||||
elif issubclass(entity, EntityBase):
|
||||
self.entity_class = entity
|
||||
else:
|
||||
raise TypeError, u"Entity class must be subclass of EntityBase"
|
||||
|
||||
if pi is None:
|
||||
self.pi_class = None # special case, see below
|
||||
elif issubclass(pi, PIBase):
|
||||
self.pi_class = pi
|
||||
else:
|
||||
raise TypeError, u"PI class must be subclass of PIBase"
|
||||
|
||||
cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node):
|
||||
u"Trivial class lookup function that always returns the default class."
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
if state is not None:
|
||||
return (<ElementDefaultClassLookup>state).element_class
|
||||
else:
|
||||
return _Element
|
||||
elif c_node.type == tree.XML_COMMENT_NODE:
|
||||
if state is not None:
|
||||
return (<ElementDefaultClassLookup>state).comment_class
|
||||
else:
|
||||
return _Comment
|
||||
elif c_node.type == tree.XML_ENTITY_REF_NODE:
|
||||
if state is not None:
|
||||
return (<ElementDefaultClassLookup>state).entity_class
|
||||
else:
|
||||
return _Entity
|
||||
elif c_node.type == tree.XML_PI_NODE:
|
||||
if state is None or (<ElementDefaultClassLookup>state).pi_class is None:
|
||||
# special case XSLT-PI
|
||||
if c_node.name is not NULL and c_node.content is not NULL:
|
||||
if tree.xmlStrcmp(c_node.name, <unsigned char*>"xml-stylesheet") == 0:
|
||||
if tree.xmlStrstr(c_node.content, <unsigned char*>"text/xsl") is not NULL or \
|
||||
tree.xmlStrstr(c_node.content, <unsigned char*>"text/xml") is not NULL:
|
||||
return _XSLTProcessingInstruction
|
||||
return _ProcessingInstruction
|
||||
else:
|
||||
return (<ElementDefaultClassLookup>state).pi_class
|
||||
else:
|
||||
assert False, f"Unknown node type: {c_node.type}"
|
||||
|
||||
|
||||
################################################################################
|
||||
# attribute based lookup scheme
|
||||
|
||||
cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup):
|
||||
u"""AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None)
|
||||
Checks an attribute of an Element and looks up the value in a
|
||||
class dictionary.
|
||||
|
||||
Arguments:
|
||||
- attribute name - '{ns}name' style string
|
||||
- class mapping - Python dict mapping attribute values to Element classes
|
||||
- fallback - optional fallback lookup mechanism
|
||||
|
||||
A None key in the class mapping will be checked if the attribute is
|
||||
missing.
|
||||
"""
|
||||
cdef object _class_mapping
|
||||
cdef tuple _pytag
|
||||
cdef const_xmlChar* _c_ns
|
||||
cdef const_xmlChar* _c_name
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _attribute_class_lookup
|
||||
|
||||
def __init__(self, attribute_name, class_mapping,
|
||||
ElementClassLookup fallback=None):
|
||||
self._pytag = _getNsTag(attribute_name)
|
||||
ns, name = self._pytag
|
||||
if ns is None:
|
||||
self._c_ns = NULL
|
||||
else:
|
||||
self._c_ns = _xcstr(ns)
|
||||
self._c_name = _xcstr(name)
|
||||
self._class_mapping = dict(class_mapping)
|
||||
|
||||
FallbackElementClassLookup.__init__(self, fallback)
|
||||
|
||||
cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node):
|
||||
cdef AttributeBasedElementClassLookup lookup
|
||||
cdef python.PyObject* dict_result
|
||||
|
||||
lookup = <AttributeBasedElementClassLookup>state
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
value = _attributeValueFromNsName(
|
||||
c_node, lookup._c_ns, lookup._c_name)
|
||||
dict_result = python.PyDict_GetItem(lookup._class_mapping, value)
|
||||
if dict_result is not NULL:
|
||||
cls = <object>dict_result
|
||||
_validateNodeClass(c_node, cls)
|
||||
return cls
|
||||
return _callLookupFallback(lookup, doc, c_node)
|
||||
|
||||
|
||||
################################################################################
|
||||
# per-parser lookup scheme
|
||||
|
||||
cdef class ParserBasedElementClassLookup(FallbackElementClassLookup):
|
||||
u"""ParserBasedElementClassLookup(self, fallback=None)
|
||||
Element class lookup based on the XML parser.
|
||||
"""
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _parser_class_lookup
|
||||
|
||||
cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node):
|
||||
if doc._parser._class_lookup is not None:
|
||||
return doc._parser._class_lookup._lookup_function(
|
||||
doc._parser._class_lookup, doc, c_node)
|
||||
return _callLookupFallback(<FallbackElementClassLookup>state, doc, c_node)
|
||||
|
||||
|
||||
################################################################################
|
||||
# custom class lookup based on node type, namespace, name
|
||||
|
||||
cdef class CustomElementClassLookup(FallbackElementClassLookup):
|
||||
u"""CustomElementClassLookup(self, fallback=None)
|
||||
Element class lookup based on a subclass method.
|
||||
|
||||
You can inherit from this class and override the method::
|
||||
|
||||
lookup(self, type, doc, namespace, name)
|
||||
|
||||
to lookup the element class for a node. Arguments of the method:
|
||||
* type: one of 'element', 'comment', 'PI', 'entity'
|
||||
* doc: document that the node is in
|
||||
* namespace: namespace URI of the node (or None for comments/PIs/entities)
|
||||
* name: name of the element/entity, None for comments, target for PIs
|
||||
|
||||
If you return None from this method, the fallback will be called.
|
||||
"""
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _custom_class_lookup
|
||||
|
||||
def lookup(self, type, doc, namespace, name):
|
||||
u"lookup(self, type, doc, namespace, name)"
|
||||
return None
|
||||
|
||||
cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node):
|
||||
cdef CustomElementClassLookup lookup
|
||||
|
||||
lookup = <CustomElementClassLookup>state
|
||||
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
element_type = u"element"
|
||||
elif c_node.type == tree.XML_COMMENT_NODE:
|
||||
element_type = u"comment"
|
||||
elif c_node.type == tree.XML_PI_NODE:
|
||||
element_type = u"PI"
|
||||
elif c_node.type == tree.XML_ENTITY_REF_NODE:
|
||||
element_type = u"entity"
|
||||
else:
|
||||
element_type = u"element"
|
||||
if c_node.name is NULL:
|
||||
name = None
|
||||
else:
|
||||
name = funicode(c_node.name)
|
||||
c_str = tree._getNs(c_node)
|
||||
ns = funicode(c_str) if c_str is not NULL else None
|
||||
|
||||
cls = lookup.lookup(element_type, doc, ns, name)
|
||||
if cls is not None:
|
||||
_validateNodeClass(c_node, cls)
|
||||
return cls
|
||||
return _callLookupFallback(lookup, doc, c_node)
|
||||
|
||||
|
||||
################################################################################
|
||||
# read-only tree based class lookup
|
||||
|
||||
cdef class PythonElementClassLookup(FallbackElementClassLookup):
|
||||
u"""PythonElementClassLookup(self, fallback=None)
|
||||
Element class lookup based on a subclass method.
|
||||
|
||||
This class lookup scheme allows access to the entire XML tree in
|
||||
read-only mode. To use it, re-implement the ``lookup(self, doc,
|
||||
root)`` method in a subclass::
|
||||
|
||||
from lxml import etree, pyclasslookup
|
||||
|
||||
class MyElementClass(etree.ElementBase):
|
||||
honkey = True
|
||||
|
||||
class MyLookup(pyclasslookup.PythonElementClassLookup):
|
||||
def lookup(self, doc, root):
|
||||
if root.tag == "sometag":
|
||||
return MyElementClass
|
||||
else:
|
||||
for child in root:
|
||||
if child.tag == "someothertag":
|
||||
return MyElementClass
|
||||
# delegate to default
|
||||
return None
|
||||
|
||||
If you return None from this method, the fallback will be called.
|
||||
|
||||
The first argument is the opaque document instance that contains
|
||||
the Element. The second argument is a lightweight Element proxy
|
||||
implementation that is only valid during the lookup. Do not try
|
||||
to keep a reference to it. Once the lookup is done, the proxy
|
||||
will be invalid.
|
||||
|
||||
Also, you cannot wrap such a read-only Element in an ElementTree,
|
||||
and you must take care not to keep a reference to them outside of
|
||||
the `lookup()` method.
|
||||
|
||||
Note that the API of the Element objects is not complete. It is
|
||||
purely read-only and does not support all features of the normal
|
||||
`lxml.etree` API (such as XPath, extended slicing or some
|
||||
iteration methods).
|
||||
|
||||
See https://lxml.de/element_classes.html
|
||||
"""
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _python_class_lookup
|
||||
|
||||
def lookup(self, doc, element):
|
||||
u"""lookup(self, doc, element)
|
||||
|
||||
Override this method to implement your own lookup scheme.
|
||||
"""
|
||||
return None
|
||||
|
||||
cdef object _python_class_lookup(state, _Document doc, tree.xmlNode* c_node):
|
||||
cdef PythonElementClassLookup lookup
|
||||
cdef _ReadOnlyProxy proxy
|
||||
lookup = <PythonElementClassLookup>state
|
||||
|
||||
proxy = _newReadOnlyProxy(None, c_node)
|
||||
cls = lookup.lookup(doc, proxy)
|
||||
_freeReadOnlyProxies(proxy)
|
||||
|
||||
if cls is not None:
|
||||
_validateNodeClass(c_node, cls)
|
||||
return cls
|
||||
return _callLookupFallback(lookup, doc, c_node)
|
||||
|
||||
################################################################################
|
||||
# Global setup
|
||||
|
||||
cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS
|
||||
cdef object ELEMENT_CLASS_LOOKUP_STATE
|
||||
|
||||
cdef void _setElementClassLookupFunction(
|
||||
_element_class_lookup_function function, object state):
|
||||
global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE
|
||||
if function is NULL:
|
||||
state = DEFAULT_ELEMENT_CLASS_LOOKUP
|
||||
function = DEFAULT_ELEMENT_CLASS_LOOKUP._lookup_function
|
||||
|
||||
ELEMENT_CLASS_LOOKUP_STATE = state
|
||||
LOOKUP_ELEMENT_CLASS = function
|
||||
|
||||
def set_element_class_lookup(ElementClassLookup lookup = None):
|
||||
u"""set_element_class_lookup(lookup = None)
|
||||
|
||||
Set the global element class lookup method.
|
||||
|
||||
This defines the main entry point for looking up element implementations.
|
||||
The standard implementation uses the :class:`ParserBasedElementClassLookup`
|
||||
to delegate to different lookup schemes for each parser.
|
||||
|
||||
.. warning::
|
||||
|
||||
This should only be changed by applications, not by library packages.
|
||||
In most cases, parser specific lookups should be preferred,
|
||||
which can be configured via
|
||||
:meth:`~lxml.etree.XMLParser.set_element_class_lookup`
|
||||
(and the same for HTML parsers).
|
||||
|
||||
Globally replacing the element class lookup by something other than a
|
||||
:class:`ParserBasedElementClassLookup` will prevent parser specific lookup
|
||||
schemes from working. Several tools rely on parser specific lookups,
|
||||
including :mod:`lxml.html` and :mod:`lxml.objectify`.
|
||||
"""
|
||||
if lookup is None or lookup._lookup_function is NULL:
|
||||
_setElementClassLookupFunction(NULL, None)
|
||||
else:
|
||||
_setElementClassLookupFunction(lookup._lookup_function, lookup)
|
||||
|
||||
# default setup: parser delegation
|
||||
cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP
|
||||
DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup()
|
||||
|
||||
set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP)
|
||||
215
.venv/lib/python3.7/site-packages/lxml/cleanup.pxi
Normal file
215
.venv/lib/python3.7/site-packages/lxml/cleanup.pxi
Normal file
@@ -0,0 +1,215 @@
|
||||
# functions for tree cleanup and removing elements from subtrees
|
||||
|
||||
def cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None):
|
||||
u"""cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None)
|
||||
|
||||
Remove all namespace declarations from a subtree that are not used
|
||||
by any of the elements or attributes in that tree.
|
||||
|
||||
If a 'top_nsmap' is provided, it must be a mapping from prefixes
|
||||
to namespace URIs. These namespaces will be declared on the top
|
||||
element of the subtree before running the cleanup, which allows
|
||||
moving namespace declarations to the top of the tree.
|
||||
|
||||
If a 'keep_ns_prefixes' is provided, it must be a list of prefixes.
|
||||
These prefixes will not be removed as part of the cleanup.
|
||||
"""
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
c_element = element._c_node
|
||||
|
||||
if top_nsmap:
|
||||
doc = element._doc
|
||||
# declare namespaces from nsmap, then apply them to the subtree
|
||||
_setNodeNamespaces(c_element, doc, None, top_nsmap)
|
||||
moveNodeToDocument(doc, c_element.doc, c_element)
|
||||
|
||||
keep_ns_prefixes = (
|
||||
set([_utf8(prefix) for prefix in keep_ns_prefixes])
|
||||
if keep_ns_prefixes else None)
|
||||
|
||||
_removeUnusedNamespaceDeclarations(c_element, keep_ns_prefixes)
|
||||
|
||||
|
||||
def strip_attributes(tree_or_element, *attribute_names):
|
||||
u"""strip_attributes(tree_or_element, *attribute_names)
|
||||
|
||||
Delete all attributes with the provided attribute names from an
|
||||
Element (or ElementTree) and its descendants.
|
||||
|
||||
Attribute names can contain wildcards as in `_Element.iter`.
|
||||
|
||||
Example usage::
|
||||
|
||||
strip_attributes(root_element,
|
||||
'simpleattr',
|
||||
'{http://some/ns}attrname',
|
||||
'{http://other/ns}*')
|
||||
"""
|
||||
cdef _MultiTagMatcher matcher
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
if not attribute_names:
|
||||
return
|
||||
|
||||
matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, attribute_names)
|
||||
matcher.cacheTags(element._doc)
|
||||
if matcher.rejectsAllAttributes():
|
||||
return
|
||||
_strip_attributes(element._c_node, matcher)
|
||||
|
||||
|
||||
cdef _strip_attributes(xmlNode* c_node, _MultiTagMatcher matcher):
|
||||
cdef xmlAttr* c_attr
|
||||
cdef xmlAttr* c_next_attr
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
c_attr = c_node.properties
|
||||
while c_attr is not NULL:
|
||||
c_next_attr = c_attr.next
|
||||
if matcher.matchesAttribute(c_attr):
|
||||
tree.xmlRemoveProp(c_attr)
|
||||
c_attr = c_next_attr
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
||||
|
||||
|
||||
def strip_elements(tree_or_element, *tag_names, bint with_tail=True):
|
||||
u"""strip_elements(tree_or_element, *tag_names, with_tail=True)
|
||||
|
||||
Delete all elements with the provided tag names from a tree or
|
||||
subtree. This will remove the elements and their entire subtree,
|
||||
including all their attributes, text content and descendants. It
|
||||
will also remove the tail text of the element unless you
|
||||
explicitly set the ``with_tail`` keyword argument option to False.
|
||||
|
||||
Tag names can contain wildcards as in `_Element.iter`.
|
||||
|
||||
Note that this will not delete the element (or ElementTree root
|
||||
element) that you passed even if it matches. It will only treat
|
||||
its descendants. If you want to include the root element, check
|
||||
its tag name directly before even calling this function.
|
||||
|
||||
Example usage::
|
||||
|
||||
strip_elements(some_element,
|
||||
'simpletagname', # non-namespaced tag
|
||||
'{http://some/ns}tagname', # namespaced tag
|
||||
'{http://some/other/ns}*' # any tag from a namespace
|
||||
lxml.etree.Comment # comments
|
||||
)
|
||||
"""
|
||||
cdef _MultiTagMatcher matcher
|
||||
doc = _documentOrRaise(tree_or_element)
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
if not tag_names:
|
||||
return
|
||||
|
||||
matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
|
||||
matcher.cacheTags(doc)
|
||||
if matcher.rejectsAll():
|
||||
return
|
||||
|
||||
if isinstance(tree_or_element, _ElementTree):
|
||||
# include PIs and comments next to the root node
|
||||
if matcher.matchesType(tree.XML_COMMENT_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail)
|
||||
if matcher.matchesType(tree.XML_PI_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail)
|
||||
_strip_elements(doc, element._c_node, matcher, with_tail)
|
||||
|
||||
cdef _strip_elements(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher,
|
||||
bint with_tail):
|
||||
cdef xmlNode* c_child
|
||||
cdef xmlNode* c_next
|
||||
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
# we run through the children here to prevent any problems
|
||||
# with the tree iteration which would occur if we unlinked the
|
||||
# c_node itself
|
||||
c_child = _findChildForwards(c_node, 0)
|
||||
while c_child is not NULL:
|
||||
c_next = _nextElement(c_child)
|
||||
if matcher.matches(c_child):
|
||||
if c_child.type == tree.XML_ELEMENT_NODE:
|
||||
if not with_tail:
|
||||
tree.xmlUnlinkNode(c_child)
|
||||
_removeNode(doc, c_child)
|
||||
else:
|
||||
if with_tail:
|
||||
_removeText(c_child.next)
|
||||
tree.xmlUnlinkNode(c_child)
|
||||
attemptDeallocation(c_child)
|
||||
c_child = c_next
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
||||
|
||||
|
||||
def strip_tags(tree_or_element, *tag_names):
|
||||
u"""strip_tags(tree_or_element, *tag_names)
|
||||
|
||||
Delete all elements with the provided tag names from a tree or
|
||||
subtree. This will remove the elements and their attributes, but
|
||||
*not* their text/tail content or descendants. Instead, it will
|
||||
merge the text content and children of the element into its
|
||||
parent.
|
||||
|
||||
Tag names can contain wildcards as in `_Element.iter`.
|
||||
|
||||
Note that this will not delete the element (or ElementTree root
|
||||
element) that you passed even if it matches. It will only treat
|
||||
its descendants.
|
||||
|
||||
Example usage::
|
||||
|
||||
strip_tags(some_element,
|
||||
'simpletagname', # non-namespaced tag
|
||||
'{http://some/ns}tagname', # namespaced tag
|
||||
'{http://some/other/ns}*' # any tag from a namespace
|
||||
Comment # comments (including their text!)
|
||||
)
|
||||
"""
|
||||
cdef _MultiTagMatcher matcher
|
||||
doc = _documentOrRaise(tree_or_element)
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
if not tag_names:
|
||||
return
|
||||
|
||||
matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
|
||||
matcher.cacheTags(doc)
|
||||
if matcher.rejectsAll():
|
||||
return
|
||||
|
||||
if isinstance(tree_or_element, _ElementTree):
|
||||
# include PIs and comments next to the root node
|
||||
if matcher.matchesType(tree.XML_COMMENT_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0)
|
||||
if matcher.matchesType(tree.XML_PI_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_PI_NODE, 0)
|
||||
_strip_tags(doc, element._c_node, matcher)
|
||||
|
||||
cdef _strip_tags(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher):
|
||||
cdef xmlNode* c_child
|
||||
cdef xmlNode* c_next
|
||||
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
# we run through the children here to prevent any problems
|
||||
# with the tree iteration which would occur if we unlinked the
|
||||
# c_node itself
|
||||
c_child = _findChildForwards(c_node, 0)
|
||||
while c_child is not NULL:
|
||||
if not matcher.matches(c_child):
|
||||
c_child = _nextElement(c_child)
|
||||
continue
|
||||
if c_child.type == tree.XML_ELEMENT_NODE:
|
||||
c_next = _findChildForwards(c_child, 0) or _nextElement(c_child)
|
||||
_replaceNodeByChildren(doc, c_child)
|
||||
if not attemptDeallocation(c_child):
|
||||
if c_child.nsDef is not NULL:
|
||||
# make namespaces absolute
|
||||
moveNodeToDocument(doc, doc._c_doc, c_child)
|
||||
c_child = c_next
|
||||
else:
|
||||
c_next = _nextElement(c_child)
|
||||
tree.xmlUnlinkNode(c_child)
|
||||
attemptDeallocation(c_child)
|
||||
c_child = c_next
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
||||
102
.venv/lib/python3.7/site-packages/lxml/cssselect.py
Normal file
102
.venv/lib/python3.7/site-packages/lxml/cssselect.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""CSS Selectors based on XPath.
|
||||
|
||||
This module supports selecting XML/HTML tags based on CSS selectors.
|
||||
See the `CSSSelector` class for details.
|
||||
|
||||
This is a thin wrapper around cssselect 0.7 or later.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from . import etree
|
||||
try:
|
||||
import cssselect as external_cssselect
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
'cssselect does not seem to be installed. '
|
||||
'See https://pypi.org/project/cssselect/')
|
||||
|
||||
|
||||
SelectorSyntaxError = external_cssselect.SelectorSyntaxError
|
||||
ExpressionError = external_cssselect.ExpressionError
|
||||
SelectorError = external_cssselect.SelectorError
|
||||
|
||||
|
||||
__all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
|
||||
'CSSSelector']
|
||||
|
||||
|
||||
class LxmlTranslator(external_cssselect.GenericTranslator):
|
||||
"""
|
||||
A custom CSS selector to XPath translator with lxml-specific extensions.
|
||||
"""
|
||||
def xpath_contains_function(self, xpath, function):
|
||||
# Defined there, removed in later drafts:
|
||||
# http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
|
||||
if function.argument_types() not in (['STRING'], ['IDENT']):
|
||||
raise ExpressionError(
|
||||
"Expected a single string or ident for :contains(), got %r"
|
||||
% function.arguments)
|
||||
value = function.arguments[0].value
|
||||
return xpath.add_condition(
|
||||
'contains(__lxml_internal_css:lower-case(string(.)), %s)'
|
||||
% self.xpath_literal(value.lower()))
|
||||
|
||||
|
||||
class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
|
||||
"""
|
||||
lxml extensions + HTML support.
|
||||
"""
|
||||
|
||||
|
||||
def _make_lower_case(context, s):
|
||||
return s.lower()
|
||||
|
||||
ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
|
||||
ns.prefix = '__lxml_internal_css'
|
||||
ns['lower-case'] = _make_lower_case
|
||||
|
||||
|
||||
class CSSSelector(etree.XPath):
|
||||
"""A CSS selector.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from lxml import etree, cssselect
|
||||
>>> select = cssselect.CSSSelector("a tag > child")
|
||||
|
||||
>>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
|
||||
>>> [ el.tag for el in select(root) ]
|
||||
['child']
|
||||
|
||||
To use CSS namespaces, you need to pass a prefix-to-namespace
|
||||
mapping as ``namespaces`` keyword argument::
|
||||
|
||||
>>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
||||
>>> select_ns = cssselect.CSSSelector('root > rdf|Description',
|
||||
... namespaces={'rdf': rdfns})
|
||||
|
||||
>>> rdf = etree.XML((
|
||||
... '<root xmlns:rdf="%s">'
|
||||
... '<rdf:Description>blah</rdf:Description>'
|
||||
... '</root>') % rdfns)
|
||||
>>> [(el.tag, el.text) for el in select_ns(rdf)]
|
||||
[('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
|
||||
|
||||
"""
|
||||
def __init__(self, css, namespaces=None, translator='xml'):
|
||||
if translator == 'xml':
|
||||
translator = LxmlTranslator()
|
||||
elif translator == 'html':
|
||||
translator = LxmlHTMLTranslator()
|
||||
elif translator == 'xhtml':
|
||||
translator = LxmlHTMLTranslator(xhtml=True)
|
||||
path = translator.css_to_xpath(css)
|
||||
etree.XPath.__init__(self, path, namespaces=namespaces)
|
||||
self.css = css
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s %s for %r>' % (
|
||||
self.__class__.__name__,
|
||||
hex(abs(id(self)))[2:],
|
||||
self.css)
|
||||
91
.venv/lib/python3.7/site-packages/lxml/debug.pxi
Normal file
91
.venv/lib/python3.7/site-packages/lxml/debug.pxi
Normal file
@@ -0,0 +1,91 @@
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _MemDebug:
|
||||
"""Debugging support for the memory allocation in libxml2.
|
||||
"""
|
||||
def bytes_used(self):
|
||||
"""bytes_used(self)
|
||||
|
||||
Returns the total amount of memory (in bytes) currently used by libxml2.
|
||||
Note that libxml2 constrains this value to a C int, which limits
|
||||
the accuracy on 64 bit systems.
|
||||
"""
|
||||
return tree.xmlMemUsed()
|
||||
|
||||
def blocks_used(self):
|
||||
"""blocks_used(self)
|
||||
|
||||
Returns the total number of memory blocks currently allocated by libxml2.
|
||||
Note that libxml2 constrains this value to a C int, which limits
|
||||
the accuracy on 64 bit systems.
|
||||
"""
|
||||
return tree.xmlMemBlocks()
|
||||
|
||||
def dict_size(self):
|
||||
"""dict_size(self)
|
||||
|
||||
Returns the current size of the global name dictionary used by libxml2
|
||||
for the current thread. Each thread has its own dictionary.
|
||||
"""
|
||||
c_dict = __GLOBAL_PARSER_CONTEXT._getThreadDict(NULL)
|
||||
if c_dict is NULL:
|
||||
raise MemoryError()
|
||||
return tree.xmlDictSize(c_dict)
|
||||
|
||||
def dump(self, output_file=None, byte_count=None):
|
||||
"""dump(self, output_file=None, byte_count=None)
|
||||
|
||||
Dumps the current memory blocks allocated by libxml2 to a file.
|
||||
|
||||
The optional parameter 'output_file' specifies the file path. It defaults
|
||||
to the file ".memorylist" in the current directory.
|
||||
|
||||
The optional parameter 'byte_count' limits the number of bytes in the dump.
|
||||
Note that this parameter is ignored when lxml is compiled against a libxml2
|
||||
version before 2.7.0.
|
||||
"""
|
||||
cdef Py_ssize_t c_count
|
||||
if output_file is None:
|
||||
output_file = b'.memorylist'
|
||||
elif isinstance(output_file, unicode):
|
||||
output_file.encode(sys.getfilesystemencoding())
|
||||
|
||||
f = stdio.fopen(output_file, "w")
|
||||
if f is NULL:
|
||||
raise IOError(f"Failed to create file {output_file.decode(sys.getfilesystemencoding())}")
|
||||
try:
|
||||
if byte_count is None:
|
||||
tree.xmlMemDisplay(f)
|
||||
else:
|
||||
c_count = byte_count
|
||||
tree.xmlMemDisplayLast(f, c_count)
|
||||
finally:
|
||||
stdio.fclose(f)
|
||||
|
||||
def show(self, output_file=None, block_count=None):
|
||||
"""show(self, output_file=None, block_count=None)
|
||||
|
||||
Dumps the current memory blocks allocated by libxml2 to a file.
|
||||
The output file format is suitable for line diffing.
|
||||
|
||||
The optional parameter 'output_file' specifies the file path. It defaults
|
||||
to the file ".memorydump" in the current directory.
|
||||
|
||||
The optional parameter 'block_count' limits the number of blocks
|
||||
in the dump.
|
||||
"""
|
||||
if output_file is None:
|
||||
output_file = b'.memorydump'
|
||||
elif isinstance(output_file, unicode):
|
||||
output_file.encode(sys.getfilesystemencoding())
|
||||
|
||||
f = stdio.fopen(output_file, "w")
|
||||
if f is NULL:
|
||||
raise IOError(f"Failed to create file {output_file.decode(sys.getfilesystemencoding())}")
|
||||
try:
|
||||
tree.xmlMemShow(f, block_count if block_count is not None else tree.xmlMemBlocks())
|
||||
finally:
|
||||
stdio.fclose(f)
|
||||
|
||||
memory_debugger = _MemDebug()
|
||||
178
.venv/lib/python3.7/site-packages/lxml/docloader.pxi
Normal file
178
.venv/lib/python3.7/site-packages/lxml/docloader.pxi
Normal file
@@ -0,0 +1,178 @@
|
||||
# Custom resolver API
|
||||
|
||||
ctypedef enum _InputDocumentDataType:
|
||||
PARSER_DATA_INVALID
|
||||
PARSER_DATA_EMPTY
|
||||
PARSER_DATA_STRING
|
||||
PARSER_DATA_FILENAME
|
||||
PARSER_DATA_FILE
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _InputDocument:
|
||||
cdef _InputDocumentDataType _type
|
||||
cdef bytes _data_bytes
|
||||
cdef object _filename
|
||||
cdef object _file
|
||||
cdef bint _close_file
|
||||
|
||||
def __cinit__(self):
|
||||
self._type = PARSER_DATA_INVALID
|
||||
|
||||
|
||||
cdef class Resolver:
|
||||
u"This is the base class of all resolvers."
|
||||
def resolve(self, system_url, public_id, context):
|
||||
u"""resolve(self, system_url, public_id, context)
|
||||
|
||||
Override this method to resolve an external source by
|
||||
``system_url`` and ``public_id``. The third argument is an
|
||||
opaque context object.
|
||||
|
||||
Return the result of one of the ``resolve_*()`` methods.
|
||||
"""
|
||||
return None
|
||||
|
||||
def resolve_empty(self, context):
|
||||
u"""resolve_empty(self, context)
|
||||
|
||||
Return an empty input document.
|
||||
|
||||
Pass context as parameter.
|
||||
"""
|
||||
cdef _InputDocument doc_ref
|
||||
doc_ref = _InputDocument()
|
||||
doc_ref._type = PARSER_DATA_EMPTY
|
||||
return doc_ref
|
||||
|
||||
def resolve_string(self, string, context, *, base_url=None):
|
||||
u"""resolve_string(self, string, context, base_url=None)
|
||||
|
||||
Return a parsable string as input document.
|
||||
|
||||
Pass data string and context as parameters. You can pass the
|
||||
source URL or filename through the ``base_url`` keyword
|
||||
argument.
|
||||
"""
|
||||
cdef _InputDocument doc_ref
|
||||
if isinstance(string, unicode):
|
||||
string = (<unicode>string).encode('utf8')
|
||||
elif not isinstance(string, bytes):
|
||||
raise TypeError, "argument must be a byte string or unicode string"
|
||||
doc_ref = _InputDocument()
|
||||
doc_ref._type = PARSER_DATA_STRING
|
||||
doc_ref._data_bytes = string
|
||||
if base_url is not None:
|
||||
doc_ref._filename = _encodeFilename(base_url)
|
||||
return doc_ref
|
||||
|
||||
def resolve_filename(self, filename, context):
|
||||
u"""resolve_filename(self, filename, context)
|
||||
|
||||
Return the name of a parsable file as input document.
|
||||
|
||||
Pass filename and context as parameters. You can also pass a
|
||||
URL with an HTTP, FTP or file target.
|
||||
"""
|
||||
cdef _InputDocument doc_ref
|
||||
doc_ref = _InputDocument()
|
||||
doc_ref._type = PARSER_DATA_FILENAME
|
||||
doc_ref._filename = _encodeFilename(filename)
|
||||
return doc_ref
|
||||
|
||||
def resolve_file(self, f, context, *, base_url=None, bint close=True):
|
||||
u"""resolve_file(self, f, context, base_url=None, close=True)
|
||||
|
||||
Return an open file-like object as input document.
|
||||
|
||||
Pass open file and context as parameters. You can pass the
|
||||
base URL or filename of the file through the ``base_url``
|
||||
keyword argument. If the ``close`` flag is True (the
|
||||
default), the file will be closed after reading.
|
||||
|
||||
Note that using ``.resolve_filename()`` is more efficient,
|
||||
especially in threaded environments.
|
||||
"""
|
||||
cdef _InputDocument doc_ref
|
||||
try:
|
||||
f.read
|
||||
except AttributeError:
|
||||
raise TypeError, u"Argument is not a file-like object"
|
||||
doc_ref = _InputDocument()
|
||||
doc_ref._type = PARSER_DATA_FILE
|
||||
if base_url is not None:
|
||||
doc_ref._filename = _encodeFilename(base_url)
|
||||
else:
|
||||
doc_ref._filename = _getFilenameForFile(f)
|
||||
doc_ref._close_file = close
|
||||
doc_ref._file = f
|
||||
return doc_ref
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _ResolverRegistry:
|
||||
cdef object _resolvers
|
||||
cdef Resolver _default_resolver
|
||||
def __cinit__(self, Resolver default_resolver=None):
|
||||
self._resolvers = set()
|
||||
self._default_resolver = default_resolver
|
||||
|
||||
def add(self, Resolver resolver not None):
|
||||
u"""add(self, resolver)
|
||||
|
||||
Register a resolver.
|
||||
|
||||
For each requested entity, the 'resolve' method of the resolver will
|
||||
be called and the result will be passed to the parser. If this method
|
||||
returns None, the request will be delegated to other resolvers or the
|
||||
default resolver. The resolvers will be tested in an arbitrary order
|
||||
until the first match is found.
|
||||
"""
|
||||
self._resolvers.add(resolver)
|
||||
|
||||
def remove(self, resolver):
|
||||
u"remove(self, resolver)"
|
||||
self._resolvers.discard(resolver)
|
||||
|
||||
cdef _ResolverRegistry _copy(self):
|
||||
cdef _ResolverRegistry registry
|
||||
registry = _ResolverRegistry(self._default_resolver)
|
||||
registry._resolvers = self._resolvers.copy()
|
||||
return registry
|
||||
|
||||
def copy(self):
|
||||
u"copy(self)"
|
||||
return self._copy()
|
||||
|
||||
def resolve(self, system_url, public_id, context):
|
||||
u"resolve(self, system_url, public_id, context)"
|
||||
for resolver in self._resolvers:
|
||||
result = resolver.resolve(system_url, public_id, context)
|
||||
if result is not None:
|
||||
return result
|
||||
if self._default_resolver is None:
|
||||
return None
|
||||
return self._default_resolver.resolve(system_url, public_id, context)
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self._resolvers)
|
||||
|
||||
|
||||
@cython.internal
|
||||
cdef class _ResolverContext(_ExceptionContext):
|
||||
cdef _ResolverRegistry _resolvers
|
||||
cdef _TempStore _storage
|
||||
|
||||
cdef int clear(self) except -1:
|
||||
_ExceptionContext.clear(self)
|
||||
self._storage.clear()
|
||||
return 0
|
||||
|
||||
|
||||
cdef _initResolverContext(_ResolverContext context,
|
||||
_ResolverRegistry resolvers):
|
||||
if resolvers is None:
|
||||
context._resolvers = _ResolverRegistry()
|
||||
else:
|
||||
context._resolvers = resolvers
|
||||
context._storage = _TempStore()
|
||||
507
.venv/lib/python3.7/site-packages/lxml/doctestcompare.py
Normal file
507
.venv/lib/python3.7/site-packages/lxml/doctestcompare.py
Normal file
@@ -0,0 +1,507 @@
|
||||
"""
|
||||
lxml-based doctest output comparison.
|
||||
|
||||
Note: normally, you should just import the `lxml.usedoctest` and
|
||||
`lxml.html.usedoctest` modules from within a doctest, instead of this
|
||||
one::
|
||||
|
||||
>>> import lxml.usedoctest # for XML output
|
||||
|
||||
>>> import lxml.html.usedoctest # for HTML output
|
||||
|
||||
To use this module directly, you must call ``lxmldoctest.install()``,
|
||||
which will cause doctest to use this in all subsequent calls.
|
||||
|
||||
This changes the way output is checked and comparisons are made for
|
||||
XML or HTML-like content.
|
||||
|
||||
XML or HTML content is noticed because the example starts with ``<``
|
||||
(it's HTML if it starts with ``<html``). You can also use the
|
||||
``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
|
||||
|
||||
Some rough wildcard-like things are allowed. Whitespace is generally
|
||||
ignored (except in attributes). In text (attributes and text in the
|
||||
body) you can use ``...`` as a wildcard. In an example it also
|
||||
matches any trailing tags in the element, though it does not match
|
||||
leading tags. You may create a tag ``<any>`` or include an ``any``
|
||||
attribute in the tag. An ``any`` tag matches any tag, while the
|
||||
attribute matches any and all attributes.
|
||||
|
||||
When a match fails, the reformatted example and gotten text is
|
||||
displayed (indented), and a rough diff-like output is given. Anything
|
||||
marked with ``+`` is in the output but wasn't supposed to be, and
|
||||
similarly ``-`` means its in the example but wasn't in the output.
|
||||
|
||||
You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
|
||||
"""
|
||||
|
||||
from lxml import etree
|
||||
import sys
|
||||
import re
|
||||
import doctest
|
||||
try:
|
||||
from html import escape as html_escape
|
||||
except ImportError:
|
||||
from cgi import escape as html_escape
|
||||
|
||||
__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
|
||||
'LHTMLOutputChecker', 'install', 'temp_install']
|
||||
|
||||
try:
|
||||
_basestring = basestring
|
||||
except NameError:
|
||||
_basestring = (str, bytes)
|
||||
|
||||
_IS_PYTHON_3 = sys.version_info[0] >= 3
|
||||
|
||||
PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
|
||||
PARSE_XML = doctest.register_optionflag('PARSE_XML')
|
||||
NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
|
||||
|
||||
OutputChecker = doctest.OutputChecker
|
||||
|
||||
def strip(v):
|
||||
if v is None:
|
||||
return None
|
||||
else:
|
||||
return v.strip()
|
||||
|
||||
def norm_whitespace(v):
|
||||
return _norm_whitespace_re.sub(' ', v)
|
||||
|
||||
_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
|
||||
|
||||
def html_fromstring(html):
|
||||
return etree.fromstring(html, _html_parser)
|
||||
|
||||
# We use this to distinguish repr()s from elements:
|
||||
_repr_re = re.compile(r'^<[^>]+ (at|object) ')
|
||||
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
|
||||
|
||||
class LXMLOutputChecker(OutputChecker):
|
||||
|
||||
empty_tags = (
|
||||
'param', 'img', 'area', 'br', 'basefont', 'input',
|
||||
'base', 'meta', 'link', 'col')
|
||||
|
||||
def get_default_parser(self):
|
||||
return etree.XML
|
||||
|
||||
def check_output(self, want, got, optionflags):
|
||||
alt_self = getattr(self, '_temp_override_self', None)
|
||||
if alt_self is not None:
|
||||
super_method = self._temp_call_super_check_output
|
||||
self = alt_self
|
||||
else:
|
||||
super_method = OutputChecker.check_output
|
||||
parser = self.get_parser(want, got, optionflags)
|
||||
if not parser:
|
||||
return super_method(
|
||||
self, want, got, optionflags)
|
||||
try:
|
||||
want_doc = parser(want)
|
||||
except etree.XMLSyntaxError:
|
||||
return False
|
||||
try:
|
||||
got_doc = parser(got)
|
||||
except etree.XMLSyntaxError:
|
||||
return False
|
||||
return self.compare_docs(want_doc, got_doc)
|
||||
|
||||
def get_parser(self, want, got, optionflags):
|
||||
parser = None
|
||||
if NOPARSE_MARKUP & optionflags:
|
||||
return None
|
||||
if PARSE_HTML & optionflags:
|
||||
parser = html_fromstring
|
||||
elif PARSE_XML & optionflags:
|
||||
parser = etree.XML
|
||||
elif (want.strip().lower().startswith('<html')
|
||||
and got.strip().startswith('<html')):
|
||||
parser = html_fromstring
|
||||
elif (self._looks_like_markup(want)
|
||||
and self._looks_like_markup(got)):
|
||||
parser = self.get_default_parser()
|
||||
return parser
|
||||
|
||||
def _looks_like_markup(self, s):
|
||||
s = s.strip()
|
||||
return (s.startswith('<')
|
||||
and not _repr_re.search(s))
|
||||
|
||||
def compare_docs(self, want, got):
|
||||
if not self.tag_compare(want.tag, got.tag):
|
||||
return False
|
||||
if not self.text_compare(want.text, got.text, True):
|
||||
return False
|
||||
if not self.text_compare(want.tail, got.tail, True):
|
||||
return False
|
||||
if 'any' not in want.attrib:
|
||||
want_keys = sorted(want.attrib.keys())
|
||||
got_keys = sorted(got.attrib.keys())
|
||||
if want_keys != got_keys:
|
||||
return False
|
||||
for key in want_keys:
|
||||
if not self.text_compare(want.attrib[key], got.attrib[key], False):
|
||||
return False
|
||||
if want.text != '...' or len(want):
|
||||
want_children = list(want)
|
||||
got_children = list(got)
|
||||
while want_children or got_children:
|
||||
if not want_children or not got_children:
|
||||
return False
|
||||
want_first = want_children.pop(0)
|
||||
got_first = got_children.pop(0)
|
||||
if not self.compare_docs(want_first, got_first):
|
||||
return False
|
||||
if not got_children and want_first.tail == '...':
|
||||
break
|
||||
return True
|
||||
|
||||
def text_compare(self, want, got, strip):
|
||||
want = want or ''
|
||||
got = got or ''
|
||||
if strip:
|
||||
want = norm_whitespace(want).strip()
|
||||
got = norm_whitespace(got).strip()
|
||||
want = '^%s$' % re.escape(want)
|
||||
want = want.replace(r'\.\.\.', '.*')
|
||||
if re.search(want, got):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def tag_compare(self, want, got):
|
||||
if want == 'any':
|
||||
return True
|
||||
if (not isinstance(want, _basestring)
|
||||
or not isinstance(got, _basestring)):
|
||||
return want == got
|
||||
want = want or ''
|
||||
got = got or ''
|
||||
if want.startswith('{...}'):
|
||||
# Ellipsis on the namespace
|
||||
return want.split('}')[-1] == got.split('}')[-1]
|
||||
else:
|
||||
return want == got
|
||||
|
||||
def output_difference(self, example, got, optionflags):
|
||||
want = example.want
|
||||
parser = self.get_parser(want, got, optionflags)
|
||||
errors = []
|
||||
if parser is not None:
|
||||
try:
|
||||
want_doc = parser(want)
|
||||
except etree.XMLSyntaxError:
|
||||
e = sys.exc_info()[1]
|
||||
errors.append('In example: %s' % e)
|
||||
try:
|
||||
got_doc = parser(got)
|
||||
except etree.XMLSyntaxError:
|
||||
e = sys.exc_info()[1]
|
||||
errors.append('In actual output: %s' % e)
|
||||
if parser is None or errors:
|
||||
value = OutputChecker.output_difference(
|
||||
self, example, got, optionflags)
|
||||
if errors:
|
||||
errors.append(value)
|
||||
return '\n'.join(errors)
|
||||
else:
|
||||
return value
|
||||
html = parser is html_fromstring
|
||||
diff_parts = ['Expected:',
|
||||
self.format_doc(want_doc, html, 2),
|
||||
'Got:',
|
||||
self.format_doc(got_doc, html, 2),
|
||||
'Diff:',
|
||||
self.collect_diff(want_doc, got_doc, html, 2)]
|
||||
return '\n'.join(diff_parts)
|
||||
|
||||
def html_empty_tag(self, el, html=True):
|
||||
if not html:
|
||||
return False
|
||||
if el.tag not in self.empty_tags:
|
||||
return False
|
||||
if el.text or len(el):
|
||||
# This shouldn't happen (contents in an empty tag)
|
||||
return False
|
||||
return True
|
||||
|
||||
def format_doc(self, doc, html, indent, prefix=''):
|
||||
parts = []
|
||||
if not len(doc):
|
||||
# No children...
|
||||
parts.append(' '*indent)
|
||||
parts.append(prefix)
|
||||
parts.append(self.format_tag(doc))
|
||||
if not self.html_empty_tag(doc, html):
|
||||
if strip(doc.text):
|
||||
parts.append(self.format_text(doc.text))
|
||||
parts.append(self.format_end_tag(doc))
|
||||
if strip(doc.tail):
|
||||
parts.append(self.format_text(doc.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
parts.append(' '*indent)
|
||||
parts.append(prefix)
|
||||
parts.append(self.format_tag(doc))
|
||||
if not self.html_empty_tag(doc, html):
|
||||
parts.append('\n')
|
||||
if strip(doc.text):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.format_text(doc.text))
|
||||
parts.append('\n')
|
||||
for el in doc:
|
||||
parts.append(self.format_doc(el, html, indent+2))
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.format_end_tag(doc))
|
||||
parts.append('\n')
|
||||
if strip(doc.tail):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.format_text(doc.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
|
||||
def format_text(self, text, strip=True):
|
||||
if text is None:
|
||||
return ''
|
||||
if strip:
|
||||
text = text.strip()
|
||||
return html_escape(text, 1)
|
||||
|
||||
def format_tag(self, el):
|
||||
attrs = []
|
||||
if isinstance(el, etree.CommentBase):
|
||||
# FIXME: probably PIs should be handled specially too?
|
||||
return '<!--'
|
||||
for name, value in sorted(el.attrib.items()):
|
||||
attrs.append('%s="%s"' % (name, self.format_text(value, False)))
|
||||
if not attrs:
|
||||
return '<%s>' % el.tag
|
||||
return '<%s %s>' % (el.tag, ' '.join(attrs))
|
||||
|
||||
def format_end_tag(self, el):
|
||||
if isinstance(el, etree.CommentBase):
|
||||
# FIXME: probably PIs should be handled specially too?
|
||||
return '-->'
|
||||
return '</%s>' % el.tag
|
||||
|
||||
def collect_diff(self, want, got, html, indent):
|
||||
parts = []
|
||||
if not len(want) and not len(got):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_tag(want, got))
|
||||
if not self.html_empty_tag(got, html):
|
||||
parts.append(self.collect_diff_text(want.text, got.text))
|
||||
parts.append(self.collect_diff_end_tag(want, got))
|
||||
parts.append(self.collect_diff_text(want.tail, got.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_tag(want, got))
|
||||
parts.append('\n')
|
||||
if strip(want.text) or strip(got.text):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_text(want.text, got.text))
|
||||
parts.append('\n')
|
||||
want_children = list(want)
|
||||
got_children = list(got)
|
||||
while want_children or got_children:
|
||||
if not want_children:
|
||||
parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
|
||||
continue
|
||||
if not got_children:
|
||||
parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
|
||||
continue
|
||||
parts.append(self.collect_diff(
|
||||
want_children.pop(0), got_children.pop(0), html, indent+2))
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_end_tag(want, got))
|
||||
parts.append('\n')
|
||||
if strip(want.tail) or strip(got.tail):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_text(want.tail, got.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
|
||||
def collect_diff_tag(self, want, got):
|
||||
if not self.tag_compare(want.tag, got.tag):
|
||||
tag = '%s (got: %s)' % (want.tag, got.tag)
|
||||
else:
|
||||
tag = got.tag
|
||||
attrs = []
|
||||
any = want.tag == 'any' or 'any' in want.attrib
|
||||
for name, value in sorted(got.attrib.items()):
|
||||
if name not in want.attrib and not any:
|
||||
attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
|
||||
else:
|
||||
if name in want.attrib:
|
||||
text = self.collect_diff_text(want.attrib[name], value, False)
|
||||
else:
|
||||
text = self.format_text(value, False)
|
||||
attrs.append('%s="%s"' % (name, text))
|
||||
if not any:
|
||||
for name, value in sorted(want.attrib.items()):
|
||||
if name in got.attrib:
|
||||
continue
|
||||
attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
|
||||
if attrs:
|
||||
tag = '<%s %s>' % (tag, ' '.join(attrs))
|
||||
else:
|
||||
tag = '<%s>' % tag
|
||||
return tag
|
||||
|
||||
def collect_diff_end_tag(self, want, got):
|
||||
if want.tag != got.tag:
|
||||
tag = '%s (got: %s)' % (want.tag, got.tag)
|
||||
else:
|
||||
tag = got.tag
|
||||
return '</%s>' % tag
|
||||
|
||||
def collect_diff_text(self, want, got, strip=True):
|
||||
if self.text_compare(want, got, strip):
|
||||
if not got:
|
||||
return ''
|
||||
return self.format_text(got, strip)
|
||||
text = '%s (got: %s)' % (want, got)
|
||||
return self.format_text(text, strip)
|
||||
|
||||
class LHTMLOutputChecker(LXMLOutputChecker):
|
||||
def get_default_parser(self):
|
||||
return html_fromstring
|
||||
|
||||
def install(html=False):
|
||||
"""
|
||||
Install doctestcompare for all future doctests.
|
||||
|
||||
If html is true, then by default the HTML parser will be used;
|
||||
otherwise the XML parser is used.
|
||||
"""
|
||||
if html:
|
||||
doctest.OutputChecker = LHTMLOutputChecker
|
||||
else:
|
||||
doctest.OutputChecker = LXMLOutputChecker
|
||||
|
||||
def temp_install(html=False, del_module=None):
|
||||
"""
|
||||
Use this *inside* a doctest to enable this checker for this
|
||||
doctest only.
|
||||
|
||||
If html is true, then by default the HTML parser will be used;
|
||||
otherwise the XML parser is used.
|
||||
"""
|
||||
if html:
|
||||
Checker = LHTMLOutputChecker
|
||||
else:
|
||||
Checker = LXMLOutputChecker
|
||||
frame = _find_doctest_frame()
|
||||
dt_self = frame.f_locals['self']
|
||||
checker = Checker()
|
||||
old_checker = dt_self._checker
|
||||
dt_self._checker = checker
|
||||
# The unfortunate thing is that there is a local variable 'check'
|
||||
# in the function that runs the doctests, that is a bound method
|
||||
# into the output checker. We have to update that. We can't
|
||||
# modify the frame, so we have to modify the object in place. The
|
||||
# only way to do this is to actually change the func_code
|
||||
# attribute of the method. We change it, and then wait for
|
||||
# __record_outcome to be run, which signals the end of the __run
|
||||
# method, at which point we restore the previous check_output
|
||||
# implementation.
|
||||
if _IS_PYTHON_3:
|
||||
check_func = frame.f_locals['check'].__func__
|
||||
checker_check_func = checker.check_output.__func__
|
||||
else:
|
||||
check_func = frame.f_locals['check'].im_func
|
||||
checker_check_func = checker.check_output.im_func
|
||||
# Because we can't patch up func_globals, this is the only global
|
||||
# in check_output that we care about:
|
||||
doctest.etree = etree
|
||||
_RestoreChecker(dt_self, old_checker, checker,
|
||||
check_func, checker_check_func,
|
||||
del_module)
|
||||
|
||||
class _RestoreChecker(object):
|
||||
def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
|
||||
del_module):
|
||||
self.dt_self = dt_self
|
||||
self.checker = old_checker
|
||||
self.checker._temp_call_super_check_output = self.call_super
|
||||
self.checker._temp_override_self = new_checker
|
||||
self.check_func = check_func
|
||||
self.clone_func = clone_func
|
||||
self.del_module = del_module
|
||||
self.install_clone()
|
||||
self.install_dt_self()
|
||||
def install_clone(self):
|
||||
if _IS_PYTHON_3:
|
||||
self.func_code = self.check_func.__code__
|
||||
self.func_globals = self.check_func.__globals__
|
||||
self.check_func.__code__ = self.clone_func.__code__
|
||||
else:
|
||||
self.func_code = self.check_func.func_code
|
||||
self.func_globals = self.check_func.func_globals
|
||||
self.check_func.func_code = self.clone_func.func_code
|
||||
def uninstall_clone(self):
|
||||
if _IS_PYTHON_3:
|
||||
self.check_func.__code__ = self.func_code
|
||||
else:
|
||||
self.check_func.func_code = self.func_code
|
||||
def install_dt_self(self):
|
||||
self.prev_func = self.dt_self._DocTestRunner__record_outcome
|
||||
self.dt_self._DocTestRunner__record_outcome = self
|
||||
def uninstall_dt_self(self):
|
||||
self.dt_self._DocTestRunner__record_outcome = self.prev_func
|
||||
def uninstall_module(self):
|
||||
if self.del_module:
|
||||
import sys
|
||||
del sys.modules[self.del_module]
|
||||
if '.' in self.del_module:
|
||||
package, module = self.del_module.rsplit('.', 1)
|
||||
package_mod = sys.modules[package]
|
||||
delattr(package_mod, module)
|
||||
def __call__(self, *args, **kw):
|
||||
self.uninstall_clone()
|
||||
self.uninstall_dt_self()
|
||||
del self.checker._temp_override_self
|
||||
del self.checker._temp_call_super_check_output
|
||||
result = self.prev_func(*args, **kw)
|
||||
self.uninstall_module()
|
||||
return result
|
||||
def call_super(self, *args, **kw):
|
||||
self.uninstall_clone()
|
||||
try:
|
||||
return self.check_func(*args, **kw)
|
||||
finally:
|
||||
self.install_clone()
|
||||
|
||||
def _find_doctest_frame():
|
||||
import sys
|
||||
frame = sys._getframe(1)
|
||||
while frame:
|
||||
l = frame.f_locals
|
||||
if 'BOOM' in l:
|
||||
# Sign of doctest
|
||||
return frame
|
||||
frame = frame.f_back
|
||||
raise LookupError(
|
||||
"Could not find doctest (only use this function *inside* a doctest)")
|
||||
|
||||
__test__ = {
|
||||
'basic': '''
|
||||
>>> temp_install()
|
||||
>>> print """<xml a="1" b="2">stuff</xml>"""
|
||||
<xml b="2" a="1">...</xml>
|
||||
>>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
|
||||
<xml xmlns="...">
|
||||
<tag attr="..." />
|
||||
</xml>
|
||||
>>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
|
||||
<xml>...foo /></xml>
|
||||
'''}
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
||||
|
||||
478
.venv/lib/python3.7/site-packages/lxml/dtd.pxi
Normal file
478
.venv/lib/python3.7/site-packages/lxml/dtd.pxi
Normal file
@@ -0,0 +1,478 @@
|
||||
# support for DTD validation
|
||||
from lxml.includes cimport dtdvalid
|
||||
|
||||
cdef class DTDError(LxmlError):
|
||||
"""Base class for DTD errors.
|
||||
"""
|
||||
|
||||
cdef class DTDParseError(DTDError):
|
||||
"""Error while parsing a DTD.
|
||||
"""
|
||||
|
||||
cdef class DTDValidateError(DTDError):
|
||||
"""Error while validating an XML document with a DTD.
|
||||
"""
|
||||
|
||||
|
||||
cdef inline int _assertValidDTDNode(node, void *c_node) except -1:
|
||||
assert c_node is not NULL, u"invalid DTD proxy at %s" % id(node)
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.freelist(8)
|
||||
cdef class _DTDElementContentDecl:
|
||||
cdef DTD _dtd
|
||||
cdef tree.xmlElementContent* _c_node
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s object name=%r type=%r occur=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.type, self.occur, id(self))
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.name)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int type = self._c_node.type
|
||||
if type == tree.XML_ELEMENT_CONTENT_PCDATA:
|
||||
return "pcdata"
|
||||
elif type == tree.XML_ELEMENT_CONTENT_ELEMENT:
|
||||
return "element"
|
||||
elif type == tree.XML_ELEMENT_CONTENT_SEQ:
|
||||
return "seq"
|
||||
elif type == tree.XML_ELEMENT_CONTENT_OR:
|
||||
return "or"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def occur(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int occur = self._c_node.ocur
|
||||
if occur == tree.XML_ELEMENT_CONTENT_ONCE:
|
||||
return "once"
|
||||
elif occur == tree.XML_ELEMENT_CONTENT_OPT:
|
||||
return "opt"
|
||||
elif occur == tree.XML_ELEMENT_CONTENT_MULT:
|
||||
return "mult"
|
||||
elif occur == tree.XML_ELEMENT_CONTENT_PLUS:
|
||||
return "plus"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def left(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
c1 = self._c_node.c1
|
||||
if c1:
|
||||
node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
|
||||
node._dtd = self._dtd
|
||||
node._c_node = <tree.xmlElementContent*>c1
|
||||
return node
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def right(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
c2 = self._c_node.c2
|
||||
if c2:
|
||||
node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
|
||||
node._dtd = self._dtd
|
||||
node._c_node = <tree.xmlElementContent*>c2
|
||||
return node
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.freelist(8)
|
||||
cdef class _DTDAttributeDecl:
|
||||
cdef DTD _dtd
|
||||
cdef tree.xmlAttribute* _c_node
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s object name=%r elemname=%r prefix=%r type=%r default=%r default_value=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.elemname, self.prefix, self.type, self.default, self.default_value, id(self))
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.name)
|
||||
|
||||
@property
|
||||
def elemname(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.elem)
|
||||
|
||||
@property
|
||||
def prefix(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.prefix)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int type = self._c_node.atype
|
||||
if type == tree.XML_ATTRIBUTE_CDATA:
|
||||
return "cdata"
|
||||
elif type == tree.XML_ATTRIBUTE_ID:
|
||||
return "id"
|
||||
elif type == tree.XML_ATTRIBUTE_IDREF:
|
||||
return "idref"
|
||||
elif type == tree.XML_ATTRIBUTE_IDREFS:
|
||||
return "idrefs"
|
||||
elif type == tree.XML_ATTRIBUTE_ENTITY:
|
||||
return "entity"
|
||||
elif type == tree.XML_ATTRIBUTE_ENTITIES:
|
||||
return "entities"
|
||||
elif type == tree.XML_ATTRIBUTE_NMTOKEN:
|
||||
return "nmtoken"
|
||||
elif type == tree.XML_ATTRIBUTE_NMTOKENS:
|
||||
return "nmtokens"
|
||||
elif type == tree.XML_ATTRIBUTE_ENUMERATION:
|
||||
return "enumeration"
|
||||
elif type == tree.XML_ATTRIBUTE_NOTATION:
|
||||
return "notation"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def default(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int default = self._c_node.def_
|
||||
if default == tree.XML_ATTRIBUTE_NONE:
|
||||
return "none"
|
||||
elif default == tree.XML_ATTRIBUTE_REQUIRED:
|
||||
return "required"
|
||||
elif default == tree.XML_ATTRIBUTE_IMPLIED:
|
||||
return "implied"
|
||||
elif default == tree.XML_ATTRIBUTE_FIXED:
|
||||
return "fixed"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def default_value(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.defaultValue)
|
||||
|
||||
def itervalues(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef tree.xmlEnumeration *c_node = self._c_node.tree
|
||||
while c_node is not NULL:
|
||||
yield funicode(c_node.name)
|
||||
c_node = c_node.next
|
||||
|
||||
def values(self):
|
||||
return list(self.itervalues())
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.freelist(8)
|
||||
cdef class _DTDElementDecl:
|
||||
cdef DTD _dtd
|
||||
cdef tree.xmlElement* _c_node
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s object name=%r prefix=%r type=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.prefix, self.type, id(self))
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.name)
|
||||
|
||||
@property
|
||||
def prefix(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.prefix)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int type = self._c_node.etype
|
||||
if type == tree.XML_ELEMENT_TYPE_UNDEFINED:
|
||||
return "undefined"
|
||||
elif type == tree.XML_ELEMENT_TYPE_EMPTY:
|
||||
return "empty"
|
||||
elif type == tree.XML_ELEMENT_TYPE_ANY:
|
||||
return "any"
|
||||
elif type == tree.XML_ELEMENT_TYPE_MIXED:
|
||||
return "mixed"
|
||||
elif type == tree.XML_ELEMENT_TYPE_ELEMENT:
|
||||
return "element"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef tree.xmlElementContent *content = self._c_node.content
|
||||
if content:
|
||||
node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
|
||||
node._dtd = self._dtd
|
||||
node._c_node = content
|
||||
return node
|
||||
else:
|
||||
return None
|
||||
|
||||
def iterattributes(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef tree.xmlAttribute *c_node = self._c_node.attributes
|
||||
while c_node:
|
||||
node = <_DTDAttributeDecl>_DTDAttributeDecl.__new__(_DTDAttributeDecl)
|
||||
node._dtd = self._dtd
|
||||
node._c_node = c_node
|
||||
yield node
|
||||
c_node = c_node.nexth
|
||||
|
||||
def attributes(self):
|
||||
return list(self.iterattributes())
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.freelist(8)
|
||||
cdef class _DTDEntityDecl:
|
||||
cdef DTD _dtd
|
||||
cdef tree.xmlEntity* _c_node
|
||||
def __repr__(self):
|
||||
return "<%s.%s object name=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.name)
|
||||
|
||||
@property
|
||||
def orig(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.orig)
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.content)
|
||||
|
||||
@property
|
||||
def system_url(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.SystemID)
|
||||
|
||||
|
||||
################################################################################
|
||||
# DTD
|
||||
|
||||
cdef class DTD(_Validator):
|
||||
u"""DTD(self, file=None, external_id=None)
|
||||
A DTD validator.
|
||||
|
||||
Can load from filesystem directly given a filename or file-like object.
|
||||
Alternatively, pass the keyword parameter ``external_id`` to load from a
|
||||
catalog.
|
||||
"""
|
||||
cdef tree.xmlDtd* _c_dtd
|
||||
def __init__(self, file=None, *, external_id=None):
|
||||
_Validator.__init__(self)
|
||||
if file is not None:
|
||||
file = _getFSPathOrObject(file)
|
||||
if _isString(file):
|
||||
file = _encodeFilename(file)
|
||||
with self._error_log:
|
||||
orig_loader = _register_document_loader()
|
||||
self._c_dtd = xmlparser.xmlParseDTD(NULL, _xcstr(file))
|
||||
_reset_document_loader(orig_loader)
|
||||
elif hasattr(file, 'read'):
|
||||
orig_loader = _register_document_loader()
|
||||
self._c_dtd = _parseDtdFromFilelike(file)
|
||||
_reset_document_loader(orig_loader)
|
||||
else:
|
||||
raise DTDParseError, u"file must be a filename, file-like or path-like object"
|
||||
elif external_id is not None:
|
||||
with self._error_log:
|
||||
orig_loader = _register_document_loader()
|
||||
self._c_dtd = xmlparser.xmlParseDTD(<const_xmlChar*>external_id, NULL)
|
||||
_reset_document_loader(orig_loader)
|
||||
else:
|
||||
raise DTDParseError, u"either filename or external ID required"
|
||||
|
||||
if self._c_dtd is NULL:
|
||||
raise DTDParseError(
|
||||
self._error_log._buildExceptionMessage(u"error parsing DTD"),
|
||||
self._error_log)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
if self._c_dtd is NULL:
|
||||
return None
|
||||
return funicodeOrNone(self._c_dtd.name)
|
||||
|
||||
@property
|
||||
def external_id(self):
|
||||
if self._c_dtd is NULL:
|
||||
return None
|
||||
return funicodeOrNone(self._c_dtd.ExternalID)
|
||||
|
||||
@property
|
||||
def system_url(self):
|
||||
if self._c_dtd is NULL:
|
||||
return None
|
||||
return funicodeOrNone(self._c_dtd.SystemID)
|
||||
|
||||
def iterelements(self):
|
||||
cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
|
||||
while c_node is not NULL:
|
||||
if c_node.type == tree.XML_ELEMENT_DECL:
|
||||
node = _DTDElementDecl()
|
||||
node._dtd = self
|
||||
node._c_node = <tree.xmlElement*>c_node
|
||||
yield node
|
||||
c_node = c_node.next
|
||||
|
||||
def elements(self):
|
||||
return list(self.iterelements())
|
||||
|
||||
def iterentities(self):
|
||||
cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
|
||||
while c_node is not NULL:
|
||||
if c_node.type == tree.XML_ENTITY_DECL:
|
||||
node = _DTDEntityDecl()
|
||||
node._dtd = self
|
||||
node._c_node = <tree.xmlEntity*>c_node
|
||||
yield node
|
||||
c_node = c_node.next
|
||||
|
||||
def entities(self):
|
||||
return list(self.iterentities())
|
||||
|
||||
def __dealloc__(self):
|
||||
tree.xmlFreeDtd(self._c_dtd)
|
||||
|
||||
def __call__(self, etree):
|
||||
u"""__call__(self, etree)
|
||||
|
||||
Validate doc using the DTD.
|
||||
|
||||
Returns true if the document is valid, false if not.
|
||||
"""
|
||||
cdef _Document doc
|
||||
cdef _Element root_node
|
||||
cdef xmlDoc* c_doc
|
||||
cdef dtdvalid.xmlValidCtxt* valid_ctxt
|
||||
cdef int ret = -1
|
||||
|
||||
assert self._c_dtd is not NULL, "DTD not initialised"
|
||||
doc = _documentOrRaise(etree)
|
||||
root_node = _rootNodeOrRaise(etree)
|
||||
|
||||
valid_ctxt = dtdvalid.xmlNewValidCtxt()
|
||||
if valid_ctxt is NULL:
|
||||
raise DTDError(u"Failed to create validation context")
|
||||
|
||||
# work around error reporting bug in libxml2 <= 2.9.1 (and later?)
|
||||
# https://bugzilla.gnome.org/show_bug.cgi?id=724903
|
||||
valid_ctxt.error = <dtdvalid.xmlValidityErrorFunc>_nullGenericErrorFunc
|
||||
valid_ctxt.userData = NULL
|
||||
|
||||
try:
|
||||
with self._error_log:
|
||||
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
|
||||
ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
|
||||
_destroyFakeDoc(doc._c_doc, c_doc)
|
||||
finally:
|
||||
dtdvalid.xmlFreeValidCtxt(valid_ctxt)
|
||||
|
||||
if ret == -1:
|
||||
raise DTDValidateError(u"Internal error in DTD validation",
|
||||
self._error_log)
|
||||
return ret == 1
|
||||
|
||||
|
||||
cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
|
||||
cdef _ExceptionContext exc_context
|
||||
cdef _FileReaderContext dtd_parser
|
||||
cdef _ErrorLog error_log
|
||||
cdef tree.xmlDtd* c_dtd = NULL
|
||||
exc_context = _ExceptionContext()
|
||||
dtd_parser = _FileReaderContext(file, exc_context, None)
|
||||
error_log = _ErrorLog()
|
||||
|
||||
with error_log:
|
||||
c_dtd = dtd_parser._readDtd()
|
||||
|
||||
exc_context._raise_if_stored()
|
||||
if c_dtd is NULL:
|
||||
raise DTDParseError(u"error parsing DTD", error_log)
|
||||
return c_dtd
|
||||
|
||||
cdef DTD _dtdFactory(tree.xmlDtd* c_dtd):
|
||||
# do not run through DTD.__init__()!
|
||||
cdef DTD dtd
|
||||
if c_dtd is NULL:
|
||||
return None
|
||||
dtd = DTD.__new__(DTD)
|
||||
dtd._c_dtd = _copyDtd(c_dtd)
|
||||
_Validator.__init__(dtd)
|
||||
return dtd
|
||||
|
||||
|
||||
cdef tree.xmlDtd* _copyDtd(tree.xmlDtd* c_orig_dtd) except NULL:
|
||||
"""
|
||||
Copy a DTD. libxml2 (currently) fails to set up the element->attributes
|
||||
links when copying DTDs, so we have to rebuild them here.
|
||||
"""
|
||||
c_dtd = tree.xmlCopyDtd(c_orig_dtd)
|
||||
if not c_dtd:
|
||||
raise MemoryError
|
||||
cdef tree.xmlNode* c_node = c_dtd.children
|
||||
while c_node:
|
||||
if c_node.type == tree.XML_ATTRIBUTE_DECL:
|
||||
_linkDtdAttribute(c_dtd, <tree.xmlAttribute*>c_node)
|
||||
c_node = c_node.next
|
||||
return c_dtd
|
||||
|
||||
|
||||
cdef void _linkDtdAttribute(tree.xmlDtd* c_dtd, tree.xmlAttribute* c_attr):
|
||||
"""
|
||||
Create the link to the DTD attribute declaration from the corresponding
|
||||
element declaration.
|
||||
"""
|
||||
c_elem = dtdvalid.xmlGetDtdElementDesc(c_dtd, c_attr.elem)
|
||||
if not c_elem:
|
||||
# no such element? something is wrong with the DTD ...
|
||||
return
|
||||
c_pos = c_elem.attributes
|
||||
if not c_pos:
|
||||
c_elem.attributes = c_attr
|
||||
c_attr.nexth = NULL
|
||||
return
|
||||
# libxml2 keeps namespace declarations first, and we need to make
|
||||
# sure we don't re-insert attributes that are already there
|
||||
if _isDtdNsDecl(c_attr):
|
||||
if not _isDtdNsDecl(c_pos):
|
||||
c_elem.attributes = c_attr
|
||||
c_attr.nexth = c_pos
|
||||
return
|
||||
while c_pos != c_attr and c_pos.nexth and _isDtdNsDecl(c_pos.nexth):
|
||||
c_pos = c_pos.nexth
|
||||
else:
|
||||
# append at end
|
||||
while c_pos != c_attr and c_pos.nexth:
|
||||
c_pos = c_pos.nexth
|
||||
if c_pos == c_attr:
|
||||
return
|
||||
c_attr.nexth = c_pos.nexth
|
||||
c_pos.nexth = c_attr
|
||||
|
||||
|
||||
cdef bint _isDtdNsDecl(tree.xmlAttribute* c_attr):
|
||||
if cstring_h.strcmp(<const_char*>c_attr.name, "xmlns") == 0:
|
||||
return True
|
||||
if (c_attr.prefix is not NULL and
|
||||
cstring_h.strcmp(<const_char*>c_attr.prefix, "xmlns") == 0):
|
||||
return True
|
||||
return False
|
||||
BIN
.venv/lib/python3.7/site-packages/lxml/etree.cpython-37m-arm-linux-gnueabihf.so
Executable file
BIN
.venv/lib/python3.7/site-packages/lxml/etree.cpython-37m-arm-linux-gnueabihf.so
Executable file
Binary file not shown.
224
.venv/lib/python3.7/site-packages/lxml/etree.h
Normal file
224
.venv/lib/python3.7/site-packages/lxml/etree.h
Normal file
@@ -0,0 +1,224 @@
|
||||
/* Generated by Cython 0.29.36 */
|
||||
|
||||
#ifndef __PYX_HAVE__lxml__etree
|
||||
#define __PYX_HAVE__lxml__etree
|
||||
|
||||
#include "Python.h"
|
||||
struct LxmlDocument;
|
||||
struct LxmlElement;
|
||||
struct LxmlElementTree;
|
||||
struct LxmlElementTagMatcher;
|
||||
struct LxmlElementIterator;
|
||||
struct LxmlElementBase;
|
||||
struct LxmlElementClassLookup;
|
||||
struct LxmlFallbackElementClassLookup;
|
||||
|
||||
/* "lxml/etree.pyx":338
|
||||
*
|
||||
* # type of a function that steps from node to node
|
||||
* ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<<
|
||||
*
|
||||
*
|
||||
*/
|
||||
typedef xmlNode *(*_node_to_node_function)(xmlNode *);
|
||||
|
||||
/* "lxml/etree.pyx":354
|
||||
* @cython.final
|
||||
* @cython.freelist(8)
|
||||
* cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # <<<<<<<<<<<<<<
|
||||
* u"""Internal base class to reference a libxml document.
|
||||
*
|
||||
*/
|
||||
struct LxmlDocument {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab;
|
||||
int _ns_counter;
|
||||
PyObject *_prefix_tail;
|
||||
xmlDoc *_c_doc;
|
||||
struct __pyx_obj_4lxml_5etree__BaseParser *_parser;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":703
|
||||
*
|
||||
* @cython.no_gc_clear
|
||||
* cdef public class _Element [ type LxmlElementType, object LxmlElement ]: # <<<<<<<<<<<<<<
|
||||
* u"""Element class.
|
||||
*
|
||||
*/
|
||||
struct LxmlElement {
|
||||
PyObject_HEAD
|
||||
struct LxmlDocument *_doc;
|
||||
xmlNode *_c_node;
|
||||
PyObject *_tag;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":1872
|
||||
*
|
||||
*
|
||||
* cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementTree ]:
|
||||
* cdef _Document _doc
|
||||
*/
|
||||
struct LxmlElementTree {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab;
|
||||
struct LxmlDocument *_doc;
|
||||
struct LxmlElement *_context_node;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":2618
|
||||
*
|
||||
*
|
||||
* cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<<
|
||||
* type LxmlElementTagMatcherType ]:
|
||||
* """
|
||||
*/
|
||||
struct LxmlElementTagMatcher {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab;
|
||||
PyObject *_pystrings;
|
||||
int _node_type;
|
||||
char *_href;
|
||||
char *_name;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":2649
|
||||
* self._name = NULL
|
||||
*
|
||||
* cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<<
|
||||
* object LxmlElementIterator, type LxmlElementIteratorType ]:
|
||||
* """
|
||||
*/
|
||||
struct LxmlElementIterator {
|
||||
struct LxmlElementTagMatcher __pyx_base;
|
||||
struct LxmlElement *_node;
|
||||
_node_to_node_function _next_element;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":6
|
||||
* # Custom Element classes
|
||||
*
|
||||
* cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementBase ]:
|
||||
* u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
||||
*/
|
||||
struct LxmlElementBase {
|
||||
struct LxmlElement __pyx_base;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":210
|
||||
* # Element class lookup
|
||||
*
|
||||
* ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<<
|
||||
*
|
||||
* # class to store element class lookup functions
|
||||
*/
|
||||
typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *);
|
||||
|
||||
/* "src/lxml/classlookup.pxi":213
|
||||
*
|
||||
* # class to store element class lookup functions
|
||||
* cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementClassLookup ]:
|
||||
* u"""ElementClassLookup(self)
|
||||
*/
|
||||
struct LxmlElementClassLookup {
|
||||
PyObject_HEAD
|
||||
_element_class_lookup_function _lookup_function;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":221
|
||||
*
|
||||
*
|
||||
* cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<<
|
||||
* [ type LxmlFallbackElementClassLookupType,
|
||||
* object LxmlFallbackElementClassLookup ]:
|
||||
*/
|
||||
struct LxmlFallbackElementClassLookup {
|
||||
struct LxmlElementClassLookup __pyx_base;
|
||||
struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab;
|
||||
struct LxmlElementClassLookup *fallback;
|
||||
_element_class_lookup_function _fallback_function;
|
||||
};
|
||||
|
||||
#ifndef __PYX_HAVE_API__lxml__etree
|
||||
|
||||
#ifndef __PYX_EXTERN_C
|
||||
#ifdef __cplusplus
|
||||
#define __PYX_EXTERN_C extern "C"
|
||||
#else
|
||||
#define __PYX_EXTERN_C extern
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef DL_IMPORT
|
||||
#define DL_IMPORT(_T) _T
|
||||
#endif
|
||||
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType;
|
||||
|
||||
__PYX_EXTERN_C struct LxmlElement *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C struct LxmlElementTree *elementTreeFactory(struct LxmlElement *);
|
||||
__PYX_EXTERN_C struct LxmlElementTree *newElementTree(struct LxmlElement *, PyObject *);
|
||||
__PYX_EXTERN_C struct LxmlElementTree *adoptExternalDocument(xmlDoc *, PyObject *, int);
|
||||
__PYX_EXTERN_C struct LxmlElement *elementFactory(struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C struct LxmlElement *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C struct LxmlElement *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C void setElementClassLookupFunction(_element_class_lookup_function, PyObject *);
|
||||
__PYX_EXTERN_C PyObject *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C int tagMatches(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C struct LxmlDocument *documentOrRaise(PyObject *);
|
||||
__PYX_EXTERN_C struct LxmlElement *rootNodeOrRaise(PyObject *);
|
||||
__PYX_EXTERN_C int hasText(xmlNode *);
|
||||
__PYX_EXTERN_C int hasTail(xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *textOf(xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *tailOf(xmlNode *);
|
||||
__PYX_EXTERN_C int setNodeText(xmlNode *, PyObject *);
|
||||
__PYX_EXTERN_C int setTailText(xmlNode *, PyObject *);
|
||||
__PYX_EXTERN_C PyObject *attributeValue(xmlNode *, xmlAttr *);
|
||||
__PYX_EXTERN_C PyObject *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C PyObject *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C PyObject *iterattributes(struct LxmlElement *, int);
|
||||
__PYX_EXTERN_C PyObject *collectAttributes(xmlNode *, int);
|
||||
__PYX_EXTERN_C int setAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C int delAttribute(struct LxmlElement *, PyObject *);
|
||||
__PYX_EXTERN_C int delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C int hasChild(xmlNode *);
|
||||
__PYX_EXTERN_C xmlNode *findChild(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C xmlNode *findChildForwards(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C xmlNode *findChildBackwards(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C xmlNode *nextElement(xmlNode *);
|
||||
__PYX_EXTERN_C xmlNode *previousElement(xmlNode *);
|
||||
__PYX_EXTERN_C void appendChild(struct LxmlElement *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C int appendChildToElement(struct LxmlElement *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C PyObject *pyunicode(const xmlChar *);
|
||||
__PYX_EXTERN_C PyObject *utf8(PyObject *);
|
||||
__PYX_EXTERN_C PyObject *getNsTag(PyObject *);
|
||||
__PYX_EXTERN_C PyObject *getNsTagWithEmptyNs(PyObject *);
|
||||
__PYX_EXTERN_C PyObject *namespacedName(xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *namespacedNameFromNsName(const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C void iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C void initTagMatch(struct LxmlElementTagMatcher *, PyObject *);
|
||||
__PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *);
|
||||
|
||||
#endif /* !__PYX_HAVE_API__lxml__etree */
|
||||
|
||||
/* WARNING: the interface of the module init function changed in CPython 3.5. */
|
||||
/* It now returns a PyModuleDef instance instead of a PyModule instance. */
|
||||
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
PyMODINIT_FUNC initetree(void);
|
||||
#else
|
||||
PyMODINIT_FUNC PyInit_etree(void);
|
||||
#endif
|
||||
|
||||
#endif /* !__PYX_HAVE__lxml__etree */
|
||||
3683
.venv/lib/python3.7/site-packages/lxml/etree.pyx
Normal file
3683
.venv/lib/python3.7/site-packages/lxml/etree.pyx
Normal file
File diff suppressed because it is too large
Load Diff
219
.venv/lib/python3.7/site-packages/lxml/etree_api.h
Normal file
219
.venv/lib/python3.7/site-packages/lxml/etree_api.h
Normal file
@@ -0,0 +1,219 @@
|
||||
/* Generated by Cython 0.29.36 */
|
||||
|
||||
#ifndef __PYX_HAVE_API__lxml__etree
|
||||
#define __PYX_HAVE_API__lxml__etree
|
||||
#ifdef __MINGW64__
|
||||
#define MS_WIN64
|
||||
#endif
|
||||
#include "Python.h"
|
||||
#include "etree.h"
|
||||
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0;
|
||||
#define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0;
|
||||
#define newElementTree __pyx_api_f_4lxml_5etree_newElementTree
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_adoptExternalDocument)(xmlDoc *, PyObject *, int) = 0;
|
||||
#define adoptExternalDocument __pyx_api_f_4lxml_5etree_adoptExternalDocument
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define elementFactory __pyx_api_f_4lxml_5etree_elementFactory
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
||||
#define makeElement __pyx_api_f_4lxml_5etree_makeElement
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
||||
#define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement
|
||||
static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0;
|
||||
#define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
||||
#define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
||||
#define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback
|
||||
static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define tagMatches __pyx_api_f_4lxml_5etree_tagMatches
|
||||
static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0;
|
||||
#define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0;
|
||||
#define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0;
|
||||
#define hasText __pyx_api_f_4lxml_5etree_hasText
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0;
|
||||
#define hasTail __pyx_api_f_4lxml_5etree_hasTail
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0;
|
||||
#define textOf __pyx_api_f_4lxml_5etree_textOf
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0;
|
||||
#define tailOf __pyx_api_f_4lxml_5etree_tailOf
|
||||
static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0;
|
||||
#define setNodeText __pyx_api_f_4lxml_5etree_setNodeText
|
||||
static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0;
|
||||
#define setTailText __pyx_api_f_4lxml_5etree_setTailText
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0;
|
||||
#define attributeValue __pyx_api_f_4lxml_5etree_attributeValue
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
||||
#define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0;
|
||||
#define iterattributes __pyx_api_f_4lxml_5etree_iterattributes
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0;
|
||||
#define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes
|
||||
static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
||||
#define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue
|
||||
static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0;
|
||||
#define delAttribute __pyx_api_f_4lxml_5etree_delAttribute
|
||||
static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0;
|
||||
#define hasChild __pyx_api_f_4lxml_5etree_hasChild
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChild __pyx_api_f_4lxml_5etree_findChild
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0;
|
||||
#define nextElement __pyx_api_f_4lxml_5etree_nextElement
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0;
|
||||
#define previousElement __pyx_api_f_4lxml_5etree_previousElement
|
||||
static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0;
|
||||
#define appendChild __pyx_api_f_4lxml_5etree_appendChild
|
||||
static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0;
|
||||
#define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0;
|
||||
#define pyunicode __pyx_api_f_4lxml_5etree_pyunicode
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0;
|
||||
#define utf8 __pyx_api_f_4lxml_5etree_utf8
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0;
|
||||
#define getNsTag __pyx_api_f_4lxml_5etree_getNsTag
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0;
|
||||
#define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0;
|
||||
#define namespacedName __pyx_api_f_4lxml_5etree_namespacedName
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0;
|
||||
#define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName
|
||||
static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0;
|
||||
#define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext
|
||||
static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0;
|
||||
#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch
|
||||
static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix
|
||||
#if !defined(__Pyx_PyIdentifier_FromString)
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
#define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
|
||||
#else
|
||||
#define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef __PYX_HAVE_RT_ImportFunction_0_29_36
|
||||
#define __PYX_HAVE_RT_ImportFunction_0_29_36
|
||||
static int __Pyx_ImportFunction_0_29_36(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
|
||||
PyObject *d = 0;
|
||||
PyObject *cobj = 0;
|
||||
union {
|
||||
void (*fp)(void);
|
||||
void *p;
|
||||
} tmp;
|
||||
d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
|
||||
if (!d)
|
||||
goto bad;
|
||||
cobj = PyDict_GetItemString(d, funcname);
|
||||
if (!cobj) {
|
||||
PyErr_Format(PyExc_ImportError,
|
||||
"%.200s does not export expected C function %.200s",
|
||||
PyModule_GetName(module), funcname);
|
||||
goto bad;
|
||||
}
|
||||
#if PY_VERSION_HEX >= 0x02070000
|
||||
if (!PyCapsule_IsValid(cobj, sig)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
|
||||
PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
|
||||
goto bad;
|
||||
}
|
||||
tmp.p = PyCapsule_GetPointer(cobj, sig);
|
||||
#else
|
||||
{const char *desc, *s1, *s2;
|
||||
desc = (const char *)PyCObject_GetDesc(cobj);
|
||||
if (!desc)
|
||||
goto bad;
|
||||
s1 = desc; s2 = sig;
|
||||
while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
|
||||
if (*s1 != *s2) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
|
||||
PyModule_GetName(module), funcname, sig, desc);
|
||||
goto bad;
|
||||
}
|
||||
tmp.p = PyCObject_AsVoidPtr(cobj);}
|
||||
#endif
|
||||
*f = tmp.fp;
|
||||
if (!(*f))
|
||||
goto bad;
|
||||
Py_DECREF(d);
|
||||
return 0;
|
||||
bad:
|
||||
Py_XDECREF(d);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static int import_lxml__etree(void) {
|
||||
PyObject *module = 0;
|
||||
module = PyImport_ImportModule("lxml.etree");
|
||||
if (!module) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
Py_DECREF(module); module = 0;
|
||||
return 0;
|
||||
bad:
|
||||
Py_XDECREF(module);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif /* !__PYX_HAVE_API__lxml__etree */
|
||||
871
.venv/lib/python3.7/site-packages/lxml/extensions.pxi
Normal file
871
.venv/lib/python3.7/site-packages/lxml/extensions.pxi
Normal file
@@ -0,0 +1,871 @@
|
||||
# support for extension functions in XPath and XSLT
|
||||
|
||||
cdef class XPathError(LxmlError):
|
||||
"""Base class of all XPath errors.
|
||||
"""
|
||||
|
||||
cdef class XPathEvalError(XPathError):
|
||||
"""Error during XPath evaluation.
|
||||
"""
|
||||
|
||||
cdef class XPathFunctionError(XPathEvalError):
|
||||
"""Internal error looking up an XPath extension function.
|
||||
"""
|
||||
|
||||
cdef class XPathResultError(XPathEvalError):
|
||||
"""Error handling an XPath result.
|
||||
"""
|
||||
|
||||
|
||||
# forward declarations
|
||||
|
||||
ctypedef int (*_register_function)(void* ctxt, name_utf, ns_uri_utf)
|
||||
cdef class _ExsltRegExp
|
||||
|
||||
################################################################################
|
||||
# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
|
||||
|
||||
@cython.internal
|
||||
cdef class _BaseContext:
|
||||
cdef xpath.xmlXPathContext* _xpathCtxt
|
||||
cdef _Document _doc
|
||||
cdef dict _extensions
|
||||
cdef list _namespaces
|
||||
cdef list _global_namespaces
|
||||
cdef dict _utf_refs
|
||||
cdef dict _function_cache
|
||||
cdef dict _eval_context_dict
|
||||
cdef bint _build_smart_strings
|
||||
# for exception handling and temporary reference keeping:
|
||||
cdef _TempStore _temp_refs
|
||||
cdef set _temp_documents
|
||||
cdef _ExceptionContext _exc
|
||||
cdef _ErrorLog _error_log
|
||||
|
||||
def __cinit__(self):
|
||||
self._xpathCtxt = NULL
|
||||
|
||||
def __init__(self, namespaces, extensions, error_log, enable_regexp,
|
||||
build_smart_strings):
|
||||
cdef _ExsltRegExp _regexp
|
||||
cdef dict new_extensions
|
||||
cdef list ns
|
||||
self._utf_refs = {}
|
||||
self._global_namespaces = []
|
||||
self._function_cache = {}
|
||||
self._eval_context_dict = None
|
||||
self._error_log = error_log
|
||||
|
||||
if extensions is not None:
|
||||
# convert extensions to UTF-8
|
||||
if isinstance(extensions, dict):
|
||||
extensions = (extensions,)
|
||||
# format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function}
|
||||
new_extensions = {}
|
||||
for extension in extensions:
|
||||
for (ns_uri, name), function in extension.items():
|
||||
if name is None:
|
||||
raise ValueError, u"extensions must have non empty names"
|
||||
ns_utf = self._to_utf(ns_uri)
|
||||
name_utf = self._to_utf(name)
|
||||
new_extensions[(ns_utf, name_utf)] = function
|
||||
extensions = new_extensions or None
|
||||
|
||||
if namespaces is not None:
|
||||
if isinstance(namespaces, dict):
|
||||
namespaces = namespaces.items()
|
||||
if namespaces:
|
||||
ns = []
|
||||
for prefix, ns_uri in namespaces:
|
||||
if prefix is None or not prefix:
|
||||
raise TypeError, \
|
||||
u"empty namespace prefix is not supported in XPath"
|
||||
if ns_uri is None or not ns_uri:
|
||||
raise TypeError, \
|
||||
u"setting default namespace is not supported in XPath"
|
||||
prefix_utf = self._to_utf(prefix)
|
||||
ns_uri_utf = self._to_utf(ns_uri)
|
||||
ns.append( (prefix_utf, ns_uri_utf) )
|
||||
namespaces = ns
|
||||
else:
|
||||
namespaces = None
|
||||
|
||||
self._doc = None
|
||||
self._exc = _ExceptionContext()
|
||||
self._extensions = extensions
|
||||
self._namespaces = namespaces
|
||||
self._temp_refs = _TempStore()
|
||||
self._temp_documents = set()
|
||||
self._build_smart_strings = build_smart_strings
|
||||
|
||||
if enable_regexp:
|
||||
_regexp = _ExsltRegExp()
|
||||
_regexp._register_in_context(self)
|
||||
|
||||
cdef _BaseContext _copy(self):
|
||||
cdef _BaseContext context
|
||||
if self._namespaces is not None:
|
||||
namespaces = self._namespaces[:]
|
||||
else:
|
||||
namespaces = None
|
||||
context = self.__class__(namespaces, None, self._error_log, False,
|
||||
self._build_smart_strings)
|
||||
if self._extensions is not None:
|
||||
context._extensions = self._extensions.copy()
|
||||
return context
|
||||
|
||||
cdef bytes _to_utf(self, s):
|
||||
u"Convert to UTF-8 and keep a reference to the encoded string"
|
||||
cdef python.PyObject* dict_result
|
||||
if s is None:
|
||||
return None
|
||||
dict_result = python.PyDict_GetItem(self._utf_refs, s)
|
||||
if dict_result is not NULL:
|
||||
return <bytes>dict_result
|
||||
utf = _utf8(s)
|
||||
self._utf_refs[s] = utf
|
||||
if python.IS_PYPY:
|
||||
# use C level refs, PyPy refs are not enough!
|
||||
python.Py_INCREF(utf)
|
||||
return utf
|
||||
|
||||
cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt):
|
||||
self._xpathCtxt = xpathCtxt
|
||||
xpathCtxt.userData = <void*>self
|
||||
xpathCtxt.error = _receiveXPathError
|
||||
|
||||
@cython.final
|
||||
cdef _register_context(self, _Document doc):
|
||||
self._doc = doc
|
||||
self._exc.clear()
|
||||
|
||||
@cython.final
|
||||
cdef _cleanup_context(self):
|
||||
#xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
|
||||
#self.unregisterGlobalNamespaces()
|
||||
if python.IS_PYPY:
|
||||
# clean up double refs in PyPy (see "_to_utf()" method)
|
||||
for ref in self._utf_refs.itervalues():
|
||||
python.Py_DECREF(ref)
|
||||
self._utf_refs.clear()
|
||||
self._eval_context_dict = None
|
||||
self._doc = None
|
||||
|
||||
@cython.final
|
||||
cdef _release_context(self):
|
||||
if self._xpathCtxt is not NULL:
|
||||
self._xpathCtxt.userData = NULL
|
||||
self._xpathCtxt = NULL
|
||||
|
||||
# namespaces (internal UTF-8 methods with leading '_')
|
||||
|
||||
cdef addNamespace(self, prefix, ns_uri):
|
||||
cdef list namespaces
|
||||
if prefix is None:
|
||||
raise TypeError, u"empty prefix is not supported in XPath"
|
||||
prefix_utf = self._to_utf(prefix)
|
||||
ns_uri_utf = self._to_utf(ns_uri)
|
||||
new_item = (prefix_utf, ns_uri_utf)
|
||||
if self._namespaces is None:
|
||||
self._namespaces = [new_item]
|
||||
else:
|
||||
namespaces = []
|
||||
for item in self._namespaces:
|
||||
if item[0] == prefix_utf:
|
||||
item = new_item
|
||||
new_item = None
|
||||
namespaces.append(item)
|
||||
if new_item is not None:
|
||||
namespaces.append(new_item)
|
||||
self._namespaces = namespaces
|
||||
if self._xpathCtxt is not NULL:
|
||||
xpath.xmlXPathRegisterNs(
|
||||
self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
|
||||
|
||||
cdef registerNamespace(self, prefix, ns_uri):
|
||||
if prefix is None:
|
||||
raise TypeError, u"empty prefix is not supported in XPath"
|
||||
prefix_utf = self._to_utf(prefix)
|
||||
ns_uri_utf = self._to_utf(ns_uri)
|
||||
self._global_namespaces.append(prefix_utf)
|
||||
xpath.xmlXPathRegisterNs(self._xpathCtxt,
|
||||
_xcstr(prefix_utf), _xcstr(ns_uri_utf))
|
||||
|
||||
cdef registerLocalNamespaces(self):
|
||||
if self._namespaces is None:
|
||||
return
|
||||
for prefix_utf, ns_uri_utf in self._namespaces:
|
||||
xpath.xmlXPathRegisterNs(
|
||||
self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
|
||||
|
||||
cdef registerGlobalNamespaces(self):
|
||||
cdef list ns_prefixes = _find_all_extension_prefixes()
|
||||
if python.PyList_GET_SIZE(ns_prefixes) > 0:
|
||||
for prefix_utf, ns_uri_utf in ns_prefixes:
|
||||
self._global_namespaces.append(prefix_utf)
|
||||
xpath.xmlXPathRegisterNs(
|
||||
self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
|
||||
|
||||
cdef unregisterGlobalNamespaces(self):
|
||||
if python.PyList_GET_SIZE(self._global_namespaces) > 0:
|
||||
for prefix_utf in self._global_namespaces:
|
||||
xpath.xmlXPathRegisterNs(self._xpathCtxt,
|
||||
_xcstr(prefix_utf), NULL)
|
||||
del self._global_namespaces[:]
|
||||
|
||||
cdef void _unregisterNamespace(self, prefix_utf):
|
||||
xpath.xmlXPathRegisterNs(self._xpathCtxt,
|
||||
_xcstr(prefix_utf), NULL)
|
||||
|
||||
# extension functions
|
||||
|
||||
cdef int _addLocalExtensionFunction(self, ns_utf, name_utf, function) except -1:
|
||||
if self._extensions is None:
|
||||
self._extensions = {}
|
||||
self._extensions[(ns_utf, name_utf)] = function
|
||||
return 0
|
||||
|
||||
cdef registerGlobalFunctions(self, void* ctxt,
|
||||
_register_function reg_func):
|
||||
cdef python.PyObject* dict_result
|
||||
cdef dict d
|
||||
for ns_utf, ns_functions in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
|
||||
dict_result = python.PyDict_GetItem(
|
||||
self._function_cache, ns_utf)
|
||||
if dict_result is not NULL:
|
||||
d = <dict>dict_result
|
||||
else:
|
||||
d = {}
|
||||
self._function_cache[ns_utf] = d
|
||||
for name_utf, function in ns_functions.iteritems():
|
||||
d[name_utf] = function
|
||||
reg_func(ctxt, name_utf, ns_utf)
|
||||
|
||||
cdef registerLocalFunctions(self, void* ctxt,
|
||||
_register_function reg_func):
|
||||
cdef python.PyObject* dict_result
|
||||
cdef dict d
|
||||
if self._extensions is None:
|
||||
return # done
|
||||
last_ns = None
|
||||
d = None
|
||||
for (ns_utf, name_utf), function in self._extensions.iteritems():
|
||||
if ns_utf is not last_ns or d is None:
|
||||
last_ns = ns_utf
|
||||
dict_result = python.PyDict_GetItem(
|
||||
self._function_cache, ns_utf)
|
||||
if dict_result is not NULL:
|
||||
d = <dict>dict_result
|
||||
else:
|
||||
d = {}
|
||||
self._function_cache[ns_utf] = d
|
||||
d[name_utf] = function
|
||||
reg_func(ctxt, name_utf, ns_utf)
|
||||
|
||||
cdef unregisterAllFunctions(self, void* ctxt,
|
||||
_register_function unreg_func):
|
||||
for ns_utf, functions in self._function_cache.iteritems():
|
||||
for name_utf in functions:
|
||||
unreg_func(ctxt, name_utf, ns_utf)
|
||||
|
||||
cdef unregisterGlobalFunctions(self, void* ctxt,
|
||||
_register_function unreg_func):
|
||||
for ns_utf, functions in self._function_cache.items():
|
||||
for name_utf in functions:
|
||||
if self._extensions is None or \
|
||||
(ns_utf, name_utf) not in self._extensions:
|
||||
unreg_func(ctxt, name_utf, ns_utf)
|
||||
|
||||
@cython.final
|
||||
cdef _find_cached_function(self, const_xmlChar* c_ns_uri, const_xmlChar* c_name):
|
||||
u"""Lookup an extension function in the cache and return it.
|
||||
|
||||
Parameters: c_ns_uri may be NULL, c_name must not be NULL
|
||||
"""
|
||||
cdef python.PyObject* c_dict
|
||||
cdef python.PyObject* dict_result
|
||||
c_dict = python.PyDict_GetItem(
|
||||
self._function_cache, None if c_ns_uri is NULL else c_ns_uri)
|
||||
if c_dict is not NULL:
|
||||
dict_result = python.PyDict_GetItem(
|
||||
<object>c_dict, <unsigned char*>c_name)
|
||||
if dict_result is not NULL:
|
||||
return <object>dict_result
|
||||
return None
|
||||
|
||||
# Python access to the XPath context for extension functions
|
||||
|
||||
@property
|
||||
def context_node(self):
|
||||
cdef xmlNode* c_node
|
||||
if self._xpathCtxt is NULL:
|
||||
raise XPathError, \
|
||||
u"XPath context is only usable during the evaluation"
|
||||
c_node = self._xpathCtxt.node
|
||||
if c_node is NULL:
|
||||
raise XPathError, u"no context node"
|
||||
if c_node.doc != self._xpathCtxt.doc:
|
||||
raise XPathError, \
|
||||
u"document-external context nodes are not supported"
|
||||
if self._doc is None:
|
||||
raise XPathError, u"document context is missing"
|
||||
return _elementFactory(self._doc, c_node)
|
||||
|
||||
@property
|
||||
def eval_context(self):
|
||||
if self._eval_context_dict is None:
|
||||
self._eval_context_dict = {}
|
||||
return self._eval_context_dict
|
||||
|
||||
# Python reference keeping during XPath function evaluation
|
||||
|
||||
@cython.final
|
||||
cdef _release_temp_refs(self):
|
||||
u"Free temporarily referenced objects from this context."
|
||||
self._temp_refs.clear()
|
||||
self._temp_documents.clear()
|
||||
|
||||
@cython.final
|
||||
cdef _hold(self, obj):
|
||||
u"""A way to temporarily hold references to nodes in the evaluator.
|
||||
|
||||
This is needed because otherwise nodes created in XPath extension
|
||||
functions would be reference counted too soon, during the XPath
|
||||
evaluation. This is most important in the case of exceptions.
|
||||
"""
|
||||
cdef _Element element
|
||||
if isinstance(obj, _Element):
|
||||
self._temp_refs.add(obj)
|
||||
self._temp_documents.add((<_Element>obj)._doc)
|
||||
return
|
||||
elif _isString(obj) or not python.PySequence_Check(obj):
|
||||
return
|
||||
for o in obj:
|
||||
if isinstance(o, _Element):
|
||||
#print "Holding element:", <int>element._c_node
|
||||
self._temp_refs.add(o)
|
||||
#print "Holding document:", <int>element._doc._c_doc
|
||||
self._temp_documents.add((<_Element>o)._doc)
|
||||
|
||||
@cython.final
|
||||
cdef _Document _findDocumentForNode(self, xmlNode* c_node):
|
||||
u"""If an XPath expression returns an element from a different
|
||||
document than the current context document, we call this to
|
||||
see if it was possibly created by an extension and is a known
|
||||
document instance.
|
||||
"""
|
||||
cdef _Document doc
|
||||
for doc in self._temp_documents:
|
||||
if doc is not None and doc._c_doc is c_node.doc:
|
||||
return doc
|
||||
return None
|
||||
|
||||
|
||||
# libxml2 keeps these error messages in a static array in its code
|
||||
# and doesn't give us access to them ...
|
||||
|
||||
cdef tuple LIBXML2_XPATH_ERROR_MESSAGES = (
|
||||
b"Ok",
|
||||
b"Number encoding",
|
||||
b"Unfinished literal",
|
||||
b"Start of literal",
|
||||
b"Expected $ for variable reference",
|
||||
b"Undefined variable",
|
||||
b"Invalid predicate",
|
||||
b"Invalid expression",
|
||||
b"Missing closing curly brace",
|
||||
b"Unregistered function",
|
||||
b"Invalid operand",
|
||||
b"Invalid type",
|
||||
b"Invalid number of arguments",
|
||||
b"Invalid context size",
|
||||
b"Invalid context position",
|
||||
b"Memory allocation error",
|
||||
b"Syntax error",
|
||||
b"Resource error",
|
||||
b"Sub resource error",
|
||||
b"Undefined namespace prefix",
|
||||
b"Encoding error",
|
||||
b"Char out of XML range",
|
||||
b"Invalid or incomplete context",
|
||||
b"Stack usage error",
|
||||
b"Forbidden variable\n",
|
||||
b"?? Unknown error ??\n",
|
||||
)
|
||||
|
||||
cdef void _forwardXPathError(void* c_ctxt, xmlerror.xmlError* c_error) with gil:
|
||||
cdef xmlerror.xmlError error
|
||||
cdef int xpath_code
|
||||
if c_error.message is not NULL:
|
||||
error.message = c_error.message
|
||||
else:
|
||||
xpath_code = c_error.code - xmlerror.XML_XPATH_EXPRESSION_OK
|
||||
if 0 <= xpath_code < len(LIBXML2_XPATH_ERROR_MESSAGES):
|
||||
error.message = _cstr(LIBXML2_XPATH_ERROR_MESSAGES[xpath_code])
|
||||
else:
|
||||
error.message = b"unknown error"
|
||||
error.domain = c_error.domain
|
||||
error.code = c_error.code
|
||||
error.level = c_error.level
|
||||
error.line = c_error.line
|
||||
error.int2 = c_error.int1 # column
|
||||
error.file = c_error.file
|
||||
error.node = NULL
|
||||
|
||||
(<_BaseContext>c_ctxt)._error_log._receive(&error)
|
||||
|
||||
cdef void _receiveXPathError(void* c_context, xmlerror.xmlError* error) nogil:
|
||||
if not __DEBUG:
|
||||
return
|
||||
if c_context is NULL:
|
||||
_forwardError(NULL, error)
|
||||
else:
|
||||
_forwardXPathError(c_context, error)
|
||||
|
||||
|
||||
def Extension(module, function_mapping=None, *, ns=None):
|
||||
u"""Extension(module, function_mapping=None, ns=None)
|
||||
|
||||
Build a dictionary of extension functions from the functions
|
||||
defined in a module or the methods of an object.
|
||||
|
||||
As second argument, you can pass an additional mapping of
|
||||
attribute names to XPath function names, or a list of function
|
||||
names that should be taken.
|
||||
|
||||
The ``ns`` keyword argument accepts a namespace URI for the XPath
|
||||
functions.
|
||||
"""
|
||||
cdef dict functions = {}
|
||||
if isinstance(function_mapping, dict):
|
||||
for function_name, xpath_name in function_mapping.items():
|
||||
functions[(ns, xpath_name)] = getattr(module, function_name)
|
||||
else:
|
||||
if function_mapping is None:
|
||||
function_mapping = [ name for name in dir(module)
|
||||
if not name.startswith(u'_') ]
|
||||
for function_name in function_mapping:
|
||||
functions[(ns, function_name)] = getattr(module, function_name)
|
||||
return functions
|
||||
|
||||
################################################################################
|
||||
# EXSLT regexp implementation
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _ExsltRegExp:
|
||||
cdef dict _compile_map
|
||||
def __cinit__(self):
|
||||
self._compile_map = {}
|
||||
|
||||
cdef _make_string(self, value):
|
||||
if _isString(value):
|
||||
return value
|
||||
elif isinstance(value, list):
|
||||
# node set: take recursive text concatenation of first element
|
||||
if python.PyList_GET_SIZE(value) == 0:
|
||||
return u''
|
||||
firstnode = value[0]
|
||||
if _isString(firstnode):
|
||||
return firstnode
|
||||
elif isinstance(firstnode, _Element):
|
||||
c_text = tree.xmlNodeGetContent((<_Element>firstnode)._c_node)
|
||||
if c_text is NULL:
|
||||
raise MemoryError()
|
||||
try:
|
||||
return funicode(c_text)
|
||||
finally:
|
||||
tree.xmlFree(c_text)
|
||||
else:
|
||||
return unicode(firstnode)
|
||||
else:
|
||||
return unicode(value)
|
||||
|
||||
cdef _compile(self, rexp, ignore_case):
|
||||
cdef python.PyObject* c_result
|
||||
rexp = self._make_string(rexp)
|
||||
key = (rexp, ignore_case)
|
||||
c_result = python.PyDict_GetItem(self._compile_map, key)
|
||||
if c_result is not NULL:
|
||||
return <object>c_result
|
||||
py_flags = re.UNICODE
|
||||
if ignore_case:
|
||||
py_flags = py_flags | re.IGNORECASE
|
||||
rexp_compiled = re.compile(rexp, py_flags)
|
||||
self._compile_map[key] = rexp_compiled
|
||||
return rexp_compiled
|
||||
|
||||
def test(self, ctxt, s, rexp, flags=u''):
|
||||
flags = self._make_string(flags)
|
||||
s = self._make_string(s)
|
||||
rexpc = self._compile(rexp, u'i' in flags)
|
||||
if rexpc.search(s) is None:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def match(self, ctxt, s, rexp, flags=u''):
|
||||
cdef list result_list
|
||||
flags = self._make_string(flags)
|
||||
s = self._make_string(s)
|
||||
rexpc = self._compile(rexp, u'i' in flags)
|
||||
if u'g' in flags:
|
||||
results = rexpc.findall(s)
|
||||
if not results:
|
||||
return ()
|
||||
else:
|
||||
result = rexpc.search(s)
|
||||
if not result:
|
||||
return ()
|
||||
results = [ result.group() ]
|
||||
results.extend( result.groups(u'') )
|
||||
result_list = []
|
||||
root = Element(u'matches')
|
||||
join_groups = u''.join
|
||||
for s_match in results:
|
||||
if python.PyTuple_CheckExact(s_match):
|
||||
s_match = join_groups(s_match)
|
||||
elem = SubElement(root, u'match')
|
||||
elem.text = s_match
|
||||
result_list.append(elem)
|
||||
return result_list
|
||||
|
||||
def replace(self, ctxt, s, rexp, flags, replacement):
|
||||
replacement = self._make_string(replacement)
|
||||
flags = self._make_string(flags)
|
||||
s = self._make_string(s)
|
||||
rexpc = self._compile(rexp, u'i' in flags)
|
||||
if u'g' in flags:
|
||||
count = 0
|
||||
else:
|
||||
count = 1
|
||||
return rexpc.sub(replacement, s, count)
|
||||
|
||||
cdef _register_in_context(self, _BaseContext context):
|
||||
ns = b"http://exslt.org/regular-expressions"
|
||||
context._addLocalExtensionFunction(ns, b"test", self.test)
|
||||
context._addLocalExtensionFunction(ns, b"match", self.match)
|
||||
context._addLocalExtensionFunction(ns, b"replace", self.replace)
|
||||
|
||||
|
||||
################################################################################
|
||||
# helper functions
|
||||
|
||||
cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc,
|
||||
_BaseContext context) except NULL:
|
||||
cdef xpath.xmlNodeSet* resultSet
|
||||
cdef _Element fake_node = None
|
||||
cdef xmlNode* c_node
|
||||
|
||||
if isinstance(obj, unicode):
|
||||
obj = _utf8(obj)
|
||||
if isinstance(obj, bytes):
|
||||
# libxml2 copies the string value
|
||||
return xpath.xmlXPathNewCString(_cstr(obj))
|
||||
if isinstance(obj, bool):
|
||||
return xpath.xmlXPathNewBoolean(obj)
|
||||
if python.PyNumber_Check(obj):
|
||||
return xpath.xmlXPathNewFloat(obj)
|
||||
if obj is None:
|
||||
resultSet = xpath.xmlXPathNodeSetCreate(NULL)
|
||||
elif isinstance(obj, _Element):
|
||||
resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node)
|
||||
elif python.PySequence_Check(obj):
|
||||
resultSet = xpath.xmlXPathNodeSetCreate(NULL)
|
||||
try:
|
||||
for value in obj:
|
||||
if isinstance(value, _Element):
|
||||
if context is not None:
|
||||
context._hold(value)
|
||||
xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node)
|
||||
else:
|
||||
if context is None or doc is None:
|
||||
raise XPathResultError, \
|
||||
f"Non-Element values not supported at this point - got {value!r}"
|
||||
# support strings by appending text nodes to an Element
|
||||
if isinstance(value, unicode):
|
||||
value = _utf8(value)
|
||||
if isinstance(value, bytes):
|
||||
if fake_node is None:
|
||||
fake_node = _makeElement("text-root", NULL, doc, None,
|
||||
None, None, None, None, None)
|
||||
context._hold(fake_node)
|
||||
else:
|
||||
# append a comment node to keep the text nodes separate
|
||||
c_node = tree.xmlNewDocComment(doc._c_doc, <unsigned char*>"")
|
||||
if c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(fake_node._c_node, c_node)
|
||||
context._hold(value)
|
||||
c_node = tree.xmlNewDocText(doc._c_doc, _xcstr(value))
|
||||
if c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(fake_node._c_node, c_node)
|
||||
xpath.xmlXPathNodeSetAdd(resultSet, c_node)
|
||||
else:
|
||||
raise XPathResultError, \
|
||||
f"This is not a supported node-set result: {value!r}"
|
||||
except:
|
||||
xpath.xmlXPathFreeNodeSet(resultSet)
|
||||
raise
|
||||
else:
|
||||
raise XPathResultError, f"Unknown return type: {python._fqtypename(obj).decode('utf8')}"
|
||||
return xpath.xmlXPathWrapNodeSet(resultSet)
|
||||
|
||||
cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj,
|
||||
_Document doc, _BaseContext context):
|
||||
if xpathObj.type == xpath.XPATH_UNDEFINED:
|
||||
raise XPathResultError, u"Undefined xpath result"
|
||||
elif xpathObj.type == xpath.XPATH_NODESET:
|
||||
return _createNodeSetResult(xpathObj, doc, context)
|
||||
elif xpathObj.type == xpath.XPATH_BOOLEAN:
|
||||
return xpathObj.boolval
|
||||
elif xpathObj.type == xpath.XPATH_NUMBER:
|
||||
return xpathObj.floatval
|
||||
elif xpathObj.type == xpath.XPATH_STRING:
|
||||
stringval = funicode(xpathObj.stringval)
|
||||
if context._build_smart_strings:
|
||||
stringval = _elementStringResultFactory(
|
||||
stringval, None, None, 0)
|
||||
return stringval
|
||||
elif xpathObj.type == xpath.XPATH_POINT:
|
||||
raise NotImplementedError, u"XPATH_POINT"
|
||||
elif xpathObj.type == xpath.XPATH_RANGE:
|
||||
raise NotImplementedError, u"XPATH_RANGE"
|
||||
elif xpathObj.type == xpath.XPATH_LOCATIONSET:
|
||||
raise NotImplementedError, u"XPATH_LOCATIONSET"
|
||||
elif xpathObj.type == xpath.XPATH_USERS:
|
||||
raise NotImplementedError, u"XPATH_USERS"
|
||||
elif xpathObj.type == xpath.XPATH_XSLT_TREE:
|
||||
return _createNodeSetResult(xpathObj, doc, context)
|
||||
else:
|
||||
raise XPathResultError, f"Unknown xpath result {xpathObj.type}"
|
||||
|
||||
cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc,
|
||||
_BaseContext context):
|
||||
cdef xmlNode* c_node
|
||||
cdef int i
|
||||
cdef list result
|
||||
result = []
|
||||
if xpathObj.nodesetval is NULL:
|
||||
return result
|
||||
for i in range(xpathObj.nodesetval.nodeNr):
|
||||
c_node = xpathObj.nodesetval.nodeTab[i]
|
||||
_unpackNodeSetEntry(result, c_node, doc, context,
|
||||
xpathObj.type == xpath.XPATH_XSLT_TREE)
|
||||
return result
|
||||
|
||||
cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc,
|
||||
_BaseContext context, bint is_fragment):
|
||||
cdef xmlNode* c_child
|
||||
if _isElement(c_node):
|
||||
if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
|
||||
# XXX: works, but maybe not always the right thing to do?
|
||||
# XPath: only runs when extensions create or copy trees
|
||||
# -> we store Python refs to these, so that is OK
|
||||
# XSLT: can it leak when merging trees from multiple sources?
|
||||
c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
|
||||
# FIXME: call _instantiateElementFromXPath() instead?
|
||||
results.append(
|
||||
_fakeDocElementFactory(doc, c_node))
|
||||
elif c_node.type == tree.XML_TEXT_NODE or \
|
||||
c_node.type == tree.XML_CDATA_SECTION_NODE or \
|
||||
c_node.type == tree.XML_ATTRIBUTE_NODE:
|
||||
results.append(
|
||||
_buildElementStringResult(doc, c_node, context))
|
||||
elif c_node.type == tree.XML_NAMESPACE_DECL:
|
||||
results.append( (funicodeOrNone((<xmlNs*>c_node).prefix),
|
||||
funicodeOrNone((<xmlNs*>c_node).href)) )
|
||||
elif c_node.type == tree.XML_DOCUMENT_NODE or \
|
||||
c_node.type == tree.XML_HTML_DOCUMENT_NODE:
|
||||
# ignored for everything but result tree fragments
|
||||
if is_fragment:
|
||||
c_child = c_node.children
|
||||
while c_child is not NULL:
|
||||
_unpackNodeSetEntry(results, c_child, doc, context, 0)
|
||||
c_child = c_child.next
|
||||
elif c_node.type == tree.XML_XINCLUDE_START or \
|
||||
c_node.type == tree.XML_XINCLUDE_END:
|
||||
pass
|
||||
else:
|
||||
raise NotImplementedError, \
|
||||
f"Not yet implemented result node type: {c_node.type}"
|
||||
|
||||
cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj):
|
||||
u"""Free the XPath object, but *never* free the *content* of node sets.
|
||||
Python dealloc will do that for us.
|
||||
"""
|
||||
if xpathObj.nodesetval is not NULL:
|
||||
xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval)
|
||||
xpathObj.nodesetval = NULL
|
||||
xpath.xmlXPathFreeObject(xpathObj)
|
||||
|
||||
cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc,
|
||||
_BaseContext context):
|
||||
# NOTE: this may copy the element - only call this when it can't leak
|
||||
if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
|
||||
# not from the context document and not from a fake document
|
||||
# either => may still be from a known document, e.g. one
|
||||
# created by an extension function
|
||||
node_doc = context._findDocumentForNode(c_node)
|
||||
if node_doc is None:
|
||||
# not from a known document at all! => can only make a
|
||||
# safety copy here
|
||||
c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
|
||||
else:
|
||||
doc = node_doc
|
||||
return _fakeDocElementFactory(doc, c_node)
|
||||
|
||||
################################################################################
|
||||
# special str/unicode subclasses
|
||||
|
||||
@cython.final
|
||||
cdef class _ElementUnicodeResult(unicode):
|
||||
cdef _Element _parent
|
||||
cdef readonly object attrname
|
||||
cdef readonly bint is_tail
|
||||
cdef readonly bint is_text
|
||||
cdef readonly bint is_attribute
|
||||
|
||||
def getparent(self):
|
||||
return self._parent
|
||||
|
||||
cdef object _PyElementUnicodeResult
|
||||
if python.IS_PYPY:
|
||||
class _PyElementUnicodeResult(unicode):
|
||||
# we need to use a Python class here, or PyPy will crash on creation
|
||||
# https://bitbucket.org/pypy/pypy/issues/2021/pypy3-pytype_ready-crashes-for-extension
|
||||
def getparent(self):
|
||||
return self._parent
|
||||
|
||||
class _ElementStringResult(bytes):
|
||||
# we need to use a Python class here, bytes cannot be C-subclassed
|
||||
# in Pyrex/Cython
|
||||
def getparent(self):
|
||||
return self._parent
|
||||
|
||||
cdef object _elementStringResultFactory(string_value, _Element parent,
|
||||
attrname, bint is_tail):
|
||||
cdef _ElementUnicodeResult uresult
|
||||
cdef bint is_text
|
||||
cdef bint is_attribute = attrname is not None
|
||||
if parent is None:
|
||||
is_text = 0
|
||||
else:
|
||||
is_text = not (is_tail or is_attribute)
|
||||
|
||||
if type(string_value) is bytes:
|
||||
result = _ElementStringResult(string_value)
|
||||
result._parent = parent
|
||||
result.is_attribute = is_attribute
|
||||
result.is_tail = is_tail
|
||||
result.is_text = is_text
|
||||
result.attrname = attrname
|
||||
return result
|
||||
elif python.IS_PYPY:
|
||||
result = _PyElementUnicodeResult(string_value)
|
||||
result._parent = parent
|
||||
result.is_attribute = is_attribute
|
||||
result.is_tail = is_tail
|
||||
result.is_text = is_text
|
||||
result.attrname = attrname
|
||||
return result
|
||||
else:
|
||||
uresult = _ElementUnicodeResult(string_value)
|
||||
uresult._parent = parent
|
||||
uresult.is_attribute = is_attribute
|
||||
uresult.is_tail = is_tail
|
||||
uresult.is_text = is_text
|
||||
uresult.attrname = attrname
|
||||
return uresult
|
||||
|
||||
cdef object _buildElementStringResult(_Document doc, xmlNode* c_node,
|
||||
_BaseContext context):
|
||||
cdef _Element parent = None
|
||||
cdef object attrname = None
|
||||
cdef xmlNode* c_element
|
||||
cdef bint is_tail
|
||||
|
||||
if c_node.type == tree.XML_ATTRIBUTE_NODE:
|
||||
attrname = _namespacedName(c_node)
|
||||
is_tail = 0
|
||||
s = tree.xmlNodeGetContent(c_node)
|
||||
try:
|
||||
value = funicode(s)
|
||||
finally:
|
||||
tree.xmlFree(s)
|
||||
c_element = NULL
|
||||
else:
|
||||
#assert c_node.type == tree.XML_TEXT_NODE or c_node.type == tree.XML_CDATA_SECTION_NODE, "invalid node type"
|
||||
# may be tail text or normal text
|
||||
value = funicode(c_node.content)
|
||||
c_element = _previousElement(c_node)
|
||||
is_tail = c_element is not NULL
|
||||
|
||||
if not context._build_smart_strings:
|
||||
return value
|
||||
|
||||
if c_element is NULL:
|
||||
# non-tail text or attribute text
|
||||
c_element = c_node.parent
|
||||
while c_element is not NULL and not _isElement(c_element):
|
||||
c_element = c_element.parent
|
||||
|
||||
if c_element is not NULL:
|
||||
parent = _instantiateElementFromXPath(c_element, doc, context)
|
||||
|
||||
return _elementStringResultFactory(
|
||||
value, parent, attrname, is_tail)
|
||||
|
||||
################################################################################
|
||||
# callbacks for XPath/XSLT extension functions
|
||||
|
||||
cdef void _extension_function_call(_BaseContext context, function,
|
||||
xpath.xmlXPathParserContext* ctxt, int nargs):
|
||||
cdef _Document doc
|
||||
cdef xpath.xmlXPathObject* obj
|
||||
cdef list args
|
||||
cdef int i
|
||||
doc = context._doc
|
||||
try:
|
||||
args = []
|
||||
for i in range(nargs):
|
||||
obj = xpath.valuePop(ctxt)
|
||||
o = _unwrapXPathObject(obj, doc, context)
|
||||
_freeXPathObject(obj)
|
||||
args.append(o)
|
||||
args.reverse()
|
||||
|
||||
res = function(context, *args)
|
||||
# wrap result for XPath consumption
|
||||
obj = _wrapXPathObject(res, doc, context)
|
||||
# prevent Python from deallocating elements handed to libxml2
|
||||
context._hold(res)
|
||||
xpath.valuePush(ctxt, obj)
|
||||
except:
|
||||
xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR)
|
||||
context._exc._store_raised()
|
||||
finally:
|
||||
return # swallow any further exceptions
|
||||
|
||||
# lookup the function by name and call it
|
||||
|
||||
cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt,
|
||||
int nargs) with gil:
|
||||
cdef _BaseContext context
|
||||
cdef xpath.xmlXPathContext* rctxt = ctxt.context
|
||||
context = <_BaseContext> rctxt.userData
|
||||
try:
|
||||
function = context._find_cached_function(rctxt.functionURI, rctxt.function)
|
||||
if function is not None:
|
||||
_extension_function_call(context, function, ctxt, nargs)
|
||||
else:
|
||||
xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
|
||||
context._exc._store_exception(XPathFunctionError(
|
||||
f"XPath function '{_namespacedNameFromNsName(rctxt.functionURI, rctxt.function)}' not found"))
|
||||
except:
|
||||
# may not be the right error, but we need to tell libxml2 *something*
|
||||
xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
|
||||
context._exc._store_raised()
|
||||
finally:
|
||||
return # swallow any further exceptions
|
||||
10
.venv/lib/python3.7/site-packages/lxml/html/ElementSoup.py
Normal file
10
.venv/lib/python3.7/site-packages/lxml/html/ElementSoup.py
Normal file
@@ -0,0 +1,10 @@
|
||||
__doc__ = """Legacy interface to the BeautifulSoup HTML parser.
|
||||
"""
|
||||
|
||||
__all__ = ["parse", "convert_tree"]
|
||||
|
||||
from .soupparser import convert_tree, parse as _parse
|
||||
|
||||
def parse(file, beautifulsoup=None, makeelement=None):
|
||||
root = _parse(file, beautifulsoup=beautifulsoup, makeelement=makeelement)
|
||||
return root.getroot()
|
||||
1946
.venv/lib/python3.7/site-packages/lxml/html/__init__.py
Normal file
1946
.venv/lib/python3.7/site-packages/lxml/html/__init__.py
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
88
.venv/lib/python3.7/site-packages/lxml/html/_diffcommand.py
Normal file
88
.venv/lib/python3.7/site-packages/lxml/html/_diffcommand.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import optparse
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
from .diff import htmldiff
|
||||
|
||||
description = """\
|
||||
"""
|
||||
|
||||
parser = optparse.OptionParser(
|
||||
usage="%prog [OPTIONS] FILE1 FILE2\n"
|
||||
"%prog --annotate [OPTIONS] INFO1 FILE1 INFO2 FILE2 ...",
|
||||
description=description,
|
||||
)
|
||||
|
||||
parser.add_option(
|
||||
'-o', '--output',
|
||||
metavar="FILE",
|
||||
dest="output",
|
||||
default="-",
|
||||
help="File to write the difference to",
|
||||
)
|
||||
|
||||
parser.add_option(
|
||||
'-a', '--annotation',
|
||||
action="store_true",
|
||||
dest="annotation",
|
||||
help="Do an annotation")
|
||||
|
||||
def main(args=None):
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
options, args = parser.parse_args(args)
|
||||
if options.annotation:
|
||||
return annotate(options, args)
|
||||
if len(args) != 2:
|
||||
print('Error: you must give two files')
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
file1, file2 = args
|
||||
input1 = read_file(file1)
|
||||
input2 = read_file(file2)
|
||||
body1 = split_body(input1)[1]
|
||||
pre, body2, post = split_body(input2)
|
||||
result = htmldiff(body1, body2)
|
||||
result = pre + result + post
|
||||
if options.output == '-':
|
||||
if not result.endswith('\n'):
|
||||
result += '\n'
|
||||
sys.stdout.write(result)
|
||||
else:
|
||||
with open(options.output, 'wb') as f:
|
||||
f.write(result)
|
||||
|
||||
def read_file(filename):
|
||||
if filename == '-':
|
||||
c = sys.stdin.read()
|
||||
elif not os.path.exists(filename):
|
||||
raise OSError(
|
||||
"Input file %s does not exist" % filename)
|
||||
else:
|
||||
with open(filename, 'rb') as f:
|
||||
c = f.read()
|
||||
return c
|
||||
|
||||
body_start_re = re.compile(
|
||||
r"<body.*?>", re.I|re.S)
|
||||
body_end_re = re.compile(
|
||||
r"</body.*?>", re.I|re.S)
|
||||
|
||||
def split_body(html):
|
||||
pre = post = ''
|
||||
match = body_start_re.search(html)
|
||||
if match:
|
||||
pre = html[:match.end()]
|
||||
html = html[match.end():]
|
||||
match = body_end_re.search(html)
|
||||
if match:
|
||||
post = html[match.start():]
|
||||
html = html[:match.start()]
|
||||
return pre, html, post
|
||||
|
||||
def annotate(options, args):
|
||||
print("Not yet implemented")
|
||||
sys.exit(1)
|
||||
|
||||
100
.venv/lib/python3.7/site-packages/lxml/html/_html5builder.py
Normal file
100
.venv/lib/python3.7/site-packages/lxml/html/_html5builder.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Legacy module - don't use in new code!
|
||||
|
||||
html5lib now has its own proper implementation.
|
||||
|
||||
This module implements a tree builder for html5lib that generates lxml
|
||||
html element trees. This module uses camelCase as it follows the
|
||||
html5lib style guide.
|
||||
"""
|
||||
|
||||
from html5lib.treebuilders import _base, etree as etree_builders
|
||||
from lxml import html, etree
|
||||
|
||||
|
||||
class DocumentType(object):
|
||||
|
||||
def __init__(self, name, publicId, systemId):
|
||||
self.name = name
|
||||
self.publicId = publicId
|
||||
self.systemId = systemId
|
||||
|
||||
class Document(object):
|
||||
|
||||
def __init__(self):
|
||||
self._elementTree = None
|
||||
self.childNodes = []
|
||||
|
||||
def appendChild(self, element):
|
||||
self._elementTree.getroot().addnext(element._element)
|
||||
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = None
|
||||
commentClass = None
|
||||
fragmentClass = Document
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
html_builder = etree_builders.getETreeModule(html, fullTree=False)
|
||||
etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
|
||||
self.elementClass = html_builder.Element
|
||||
self.commentClass = etree_builder.Comment
|
||||
_base.TreeBuilder.__init__(self, *args, **kwargs)
|
||||
|
||||
def reset(self):
|
||||
_base.TreeBuilder.reset(self)
|
||||
self.rootInserted = False
|
||||
self.initialComments = []
|
||||
self.doctype = None
|
||||
|
||||
def getDocument(self):
|
||||
return self.document._elementTree
|
||||
|
||||
def getFragment(self):
|
||||
fragment = []
|
||||
element = self.openElements[0]._element
|
||||
if element.text:
|
||||
fragment.append(element.text)
|
||||
fragment.extend(element.getchildren())
|
||||
if element.tail:
|
||||
fragment.append(element.tail)
|
||||
return fragment
|
||||
|
||||
def insertDoctype(self, name, publicId, systemId):
|
||||
doctype = self.doctypeClass(name, publicId, systemId)
|
||||
self.doctype = doctype
|
||||
|
||||
def insertComment(self, data, parent=None):
|
||||
if not self.rootInserted:
|
||||
self.initialComments.append(data)
|
||||
else:
|
||||
_base.TreeBuilder.insertComment(self, data, parent)
|
||||
|
||||
def insertRoot(self, name):
|
||||
buf = []
|
||||
if self.doctype and self.doctype.name:
|
||||
buf.append('<!DOCTYPE %s' % self.doctype.name)
|
||||
if self.doctype.publicId is not None or self.doctype.systemId is not None:
|
||||
buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,
|
||||
self.doctype.systemId))
|
||||
buf.append('>')
|
||||
buf.append('<html></html>')
|
||||
root = html.fromstring(''.join(buf))
|
||||
|
||||
# Append the initial comments:
|
||||
for comment in self.initialComments:
|
||||
root.addprevious(etree.Comment(comment))
|
||||
|
||||
# Create the root document and add the ElementTree to it
|
||||
self.document = self.documentClass()
|
||||
self.document._elementTree = root.getroottree()
|
||||
|
||||
# Add the root element to the internal child/open data structures
|
||||
root_element = self.elementClass(name)
|
||||
root_element._element = root
|
||||
self.document.childNodes.append(root_element)
|
||||
self.openElements.append(root_element)
|
||||
|
||||
self.rootInserted = True
|
||||
56
.venv/lib/python3.7/site-packages/lxml/html/_setmixin.py
Normal file
56
.venv/lib/python3.7/site-packages/lxml/html/_setmixin.py
Normal file
@@ -0,0 +1,56 @@
|
||||
try:
|
||||
from collections.abc import MutableSet
|
||||
except ImportError:
|
||||
from collections import MutableSet
|
||||
|
||||
|
||||
class SetMixin(MutableSet):
|
||||
|
||||
"""
|
||||
Mix-in for sets. You must define __iter__, add, remove
|
||||
"""
|
||||
|
||||
def __len__(self):
|
||||
length = 0
|
||||
for item in self:
|
||||
length += 1
|
||||
return length
|
||||
|
||||
def __contains__(self, item):
|
||||
for has_item in self:
|
||||
if item == has_item:
|
||||
return True
|
||||
return False
|
||||
|
||||
issubset = MutableSet.__le__
|
||||
issuperset = MutableSet.__ge__
|
||||
|
||||
union = MutableSet.__or__
|
||||
intersection = MutableSet.__and__
|
||||
difference = MutableSet.__sub__
|
||||
symmetric_difference = MutableSet.__xor__
|
||||
|
||||
def copy(self):
|
||||
return set(self)
|
||||
|
||||
def update(self, other):
|
||||
self |= other
|
||||
|
||||
def intersection_update(self, other):
|
||||
self &= other
|
||||
|
||||
def difference_update(self, other):
|
||||
self -= other
|
||||
|
||||
def symmetric_difference_update(self, other):
|
||||
self ^= other
|
||||
|
||||
def discard(self, item):
|
||||
try:
|
||||
self.remove(item)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def _from_iterable(cls, it):
|
||||
return set(it)
|
||||
133
.venv/lib/python3.7/site-packages/lxml/html/builder.py
Normal file
133
.venv/lib/python3.7/site-packages/lxml/html/builder.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
# Copyright (c) 1999-2004 by Fredrik Lundh
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
A set of HTML generator tags for building HTML documents.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from lxml.html.builder import *
|
||||
>>> html = HTML(
|
||||
... HEAD( TITLE("Hello World") ),
|
||||
... BODY( CLASS("main"),
|
||||
... H1("Hello World !")
|
||||
... )
|
||||
... )
|
||||
|
||||
>>> import lxml.etree
|
||||
>>> print lxml.etree.tostring(html, pretty_print=True)
|
||||
<html>
|
||||
<head>
|
||||
<title>Hello World</title>
|
||||
</head>
|
||||
<body class="main">
|
||||
<h1>Hello World !</h1>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
"""
|
||||
|
||||
from lxml.builder import ElementMaker
|
||||
from lxml.html import html_parser
|
||||
|
||||
E = ElementMaker(makeelement=html_parser.makeelement)
|
||||
|
||||
# elements
|
||||
A = E.a #: anchor
|
||||
ABBR = E.abbr #: abbreviated form (e.g., WWW, HTTP, etc.)
|
||||
ACRONYM = E.acronym #:
|
||||
ADDRESS = E.address #: information on author
|
||||
APPLET = E.applet #: Java applet (DEPRECATED)
|
||||
AREA = E.area #: client-side image map area
|
||||
B = E.b #: bold text style
|
||||
BASE = E.base #: document base URI
|
||||
BASEFONT = E.basefont #: base font size (DEPRECATED)
|
||||
BDO = E.bdo #: I18N BiDi over-ride
|
||||
BIG = E.big #: large text style
|
||||
BLOCKQUOTE = E.blockquote #: long quotation
|
||||
BODY = E.body #: document body
|
||||
BR = E.br #: forced line break
|
||||
BUTTON = E.button #: push button
|
||||
CAPTION = E.caption #: table caption
|
||||
CENTER = E.center #: shorthand for DIV align=center (DEPRECATED)
|
||||
CITE = E.cite #: citation
|
||||
CODE = E.code #: computer code fragment
|
||||
COL = E.col #: table column
|
||||
COLGROUP = E.colgroup #: table column group
|
||||
DD = E.dd #: definition description
|
||||
DEL = getattr(E, 'del') #: deleted text
|
||||
DFN = E.dfn #: instance definition
|
||||
DIR = E.dir #: directory list (DEPRECATED)
|
||||
DIV = E.div #: generic language/style container
|
||||
DL = E.dl #: definition list
|
||||
DT = E.dt #: definition term
|
||||
EM = E.em #: emphasis
|
||||
FIELDSET = E.fieldset #: form control group
|
||||
FONT = E.font #: local change to font (DEPRECATED)
|
||||
FORM = E.form #: interactive form
|
||||
FRAME = E.frame #: subwindow
|
||||
FRAMESET = E.frameset #: window subdivision
|
||||
H1 = E.h1 #: heading
|
||||
H2 = E.h2 #: heading
|
||||
H3 = E.h3 #: heading
|
||||
H4 = E.h4 #: heading
|
||||
H5 = E.h5 #: heading
|
||||
H6 = E.h6 #: heading
|
||||
HEAD = E.head #: document head
|
||||
HR = E.hr #: horizontal rule
|
||||
HTML = E.html #: document root element
|
||||
I = E.i #: italic text style
|
||||
IFRAME = E.iframe #: inline subwindow
|
||||
IMG = E.img #: Embedded image
|
||||
INPUT = E.input #: form control
|
||||
INS = E.ins #: inserted text
|
||||
ISINDEX = E.isindex #: single line prompt (DEPRECATED)
|
||||
KBD = E.kbd #: text to be entered by the user
|
||||
LABEL = E.label #: form field label text
|
||||
LEGEND = E.legend #: fieldset legend
|
||||
LI = E.li #: list item
|
||||
LINK = E.link #: a media-independent link
|
||||
MAP = E.map #: client-side image map
|
||||
MENU = E.menu #: menu list (DEPRECATED)
|
||||
META = E.meta #: generic metainformation
|
||||
NOFRAMES = E.noframes #: alternate content container for non frame-based rendering
|
||||
NOSCRIPT = E.noscript #: alternate content container for non script-based rendering
|
||||
OBJECT = E.object #: generic embedded object
|
||||
OL = E.ol #: ordered list
|
||||
OPTGROUP = E.optgroup #: option group
|
||||
OPTION = E.option #: selectable choice
|
||||
P = E.p #: paragraph
|
||||
PARAM = E.param #: named property value
|
||||
PRE = E.pre #: preformatted text
|
||||
Q = E.q #: short inline quotation
|
||||
S = E.s #: strike-through text style (DEPRECATED)
|
||||
SAMP = E.samp #: sample program output, scripts, etc.
|
||||
SCRIPT = E.script #: script statements
|
||||
SELECT = E.select #: option selector
|
||||
SMALL = E.small #: small text style
|
||||
SPAN = E.span #: generic language/style container
|
||||
STRIKE = E.strike #: strike-through text (DEPRECATED)
|
||||
STRONG = E.strong #: strong emphasis
|
||||
STYLE = E.style #: style info
|
||||
SUB = E.sub #: subscript
|
||||
SUP = E.sup #: superscript
|
||||
TABLE = E.table #:
|
||||
TBODY = E.tbody #: table body
|
||||
TD = E.td #: table data cell
|
||||
TEXTAREA = E.textarea #: multi-line text field
|
||||
TFOOT = E.tfoot #: table footer
|
||||
TH = E.th #: table header cell
|
||||
THEAD = E.thead #: table header
|
||||
TITLE = E.title #: document title
|
||||
TR = E.tr #: table row
|
||||
TT = E.tt #: teletype or monospaced text style
|
||||
U = E.u #: underlined text style (DEPRECATED)
|
||||
UL = E.ul #: unordered list
|
||||
VAR = E.var #: instance of a variable or program argument
|
||||
|
||||
# attributes (only reserved words are included here)
|
||||
ATTR = dict
|
||||
def CLASS(v): return {'class': v}
|
||||
def FOR(v): return {'for': v}
|
||||
Binary file not shown.
785
.venv/lib/python3.7/site-packages/lxml/html/clean.py
Normal file
785
.venv/lib/python3.7/site-packages/lxml/html/clean.py
Normal file
@@ -0,0 +1,785 @@
|
||||
# cython: language_level=3str
|
||||
|
||||
"""A cleanup tool for HTML.
|
||||
|
||||
Removes unwanted tags and content. See the `Cleaner` class for
|
||||
details.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import copy
|
||||
import re
|
||||
import sys
|
||||
try:
|
||||
from urlparse import urlsplit
|
||||
from urllib import unquote_plus
|
||||
except ImportError:
|
||||
# Python 3
|
||||
from urllib.parse import urlsplit, unquote_plus
|
||||
from lxml import etree
|
||||
from lxml.html import defs
|
||||
from lxml.html import fromstring, XHTML_NAMESPACE
|
||||
from lxml.html import xhtml_to_html, _transform_result
|
||||
|
||||
try:
|
||||
unichr
|
||||
except NameError:
|
||||
# Python 3
|
||||
unichr = chr
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
# Python 3
|
||||
unicode = str
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = (str, bytes)
|
||||
|
||||
|
||||
__all__ = ['clean_html', 'clean', 'Cleaner', 'autolink', 'autolink_html',
|
||||
'word_break', 'word_break_html']
|
||||
|
||||
# Look at http://code.sixapart.com/trac/livejournal/browser/trunk/cgi-bin/cleanhtml.pl
|
||||
# Particularly the CSS cleaning; most of the tag cleaning is integrated now
|
||||
# I have multiple kinds of schemes searched; but should schemes be
|
||||
# whitelisted instead?
|
||||
# max height?
|
||||
# remove images? Also in CSS? background attribute?
|
||||
# Some way to whitelist object, iframe, etc (e.g., if you want to
|
||||
# allow *just* embedded YouTube movies)
|
||||
# Log what was deleted and why?
|
||||
# style="behavior: ..." might be bad in IE?
|
||||
# Should we have something for just <meta http-equiv>? That's the worst of the
|
||||
# metas.
|
||||
# UTF-7 detections? Example:
|
||||
# <HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-
|
||||
# you don't always have to have the charset set, if the page has no charset
|
||||
# and there's UTF7-like code in it.
|
||||
# Look at these tests: http://htmlpurifier.org/live/smoketests/xssAttacks.php
|
||||
|
||||
|
||||
# This is an IE-specific construct you can have in a stylesheet to
|
||||
# run some Javascript:
|
||||
_replace_css_javascript = re.compile(
|
||||
r'expression\s*\(.*?\)', re.S|re.I).sub
|
||||
|
||||
# Do I have to worry about @\nimport?
|
||||
_replace_css_import = re.compile(
|
||||
r'@\s*import', re.I).sub
|
||||
|
||||
_looks_like_tag_content = re.compile(
|
||||
r'</?[a-zA-Z]+|\son[a-zA-Z]+\s*=',
|
||||
*((re.ASCII,) if sys.version_info[0] >= 3 else ())).search
|
||||
|
||||
# All kinds of schemes besides just javascript: that can cause
|
||||
# execution:
|
||||
_find_image_dataurls = re.compile(
|
||||
r'data:image/(.+);base64,', re.I).findall
|
||||
_possibly_malicious_schemes = re.compile(
|
||||
r'(javascript|jscript|livescript|vbscript|data|about|mocha):',
|
||||
re.I).findall
|
||||
# SVG images can contain script content
|
||||
_is_unsafe_image_type = re.compile(r"(xml|svg)", re.I).search
|
||||
|
||||
def _has_javascript_scheme(s):
|
||||
safe_image_urls = 0
|
||||
for image_type in _find_image_dataurls(s):
|
||||
if _is_unsafe_image_type(image_type):
|
||||
return True
|
||||
safe_image_urls += 1
|
||||
return len(_possibly_malicious_schemes(s)) > safe_image_urls
|
||||
|
||||
_substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub
|
||||
|
||||
# FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx
|
||||
_conditional_comment_re = re.compile(
|
||||
r'\[if[\s\n\r]+.*?][\s\n\r]*>', re.I|re.S)
|
||||
|
||||
_find_styled_elements = etree.XPath(
|
||||
"descendant-or-self::*[@style]")
|
||||
|
||||
_find_external_links = etree.XPath(
|
||||
("descendant-or-self::a [normalize-space(@href) and substring(normalize-space(@href),1,1) != '#'] |"
|
||||
"descendant-or-self::x:a[normalize-space(@href) and substring(normalize-space(@href),1,1) != '#']"),
|
||||
namespaces={'x':XHTML_NAMESPACE})
|
||||
|
||||
|
||||
class Cleaner(object):
|
||||
"""
|
||||
Instances cleans the document of each of the possible offending
|
||||
elements. The cleaning is controlled by attributes; you can
|
||||
override attributes in a subclass, or set them in the constructor.
|
||||
|
||||
``scripts``:
|
||||
Removes any ``<script>`` tags.
|
||||
|
||||
``javascript``:
|
||||
Removes any Javascript, like an ``onclick`` attribute. Also removes stylesheets
|
||||
as they could contain Javascript.
|
||||
|
||||
``comments``:
|
||||
Removes any comments.
|
||||
|
||||
``style``:
|
||||
Removes any style tags.
|
||||
|
||||
``inline_style``
|
||||
Removes any style attributes. Defaults to the value of the ``style`` option.
|
||||
|
||||
``links``:
|
||||
Removes any ``<link>`` tags
|
||||
|
||||
``meta``:
|
||||
Removes any ``<meta>`` tags
|
||||
|
||||
``page_structure``:
|
||||
Structural parts of a page: ``<head>``, ``<html>``, ``<title>``.
|
||||
|
||||
``processing_instructions``:
|
||||
Removes any processing instructions.
|
||||
|
||||
``embedded``:
|
||||
Removes any embedded objects (flash, iframes)
|
||||
|
||||
``frames``:
|
||||
Removes any frame-related tags
|
||||
|
||||
``forms``:
|
||||
Removes any form tags
|
||||
|
||||
``annoying_tags``:
|
||||
Tags that aren't *wrong*, but are annoying. ``<blink>`` and ``<marquee>``
|
||||
|
||||
``remove_tags``:
|
||||
A list of tags to remove. Only the tags will be removed,
|
||||
their content will get pulled up into the parent tag.
|
||||
|
||||
``kill_tags``:
|
||||
A list of tags to kill. Killing also removes the tag's content,
|
||||
i.e. the whole subtree, not just the tag itself.
|
||||
|
||||
``allow_tags``:
|
||||
A list of tags to include (default include all).
|
||||
|
||||
``remove_unknown_tags``:
|
||||
Remove any tags that aren't standard parts of HTML.
|
||||
|
||||
``safe_attrs_only``:
|
||||
If true, only include 'safe' attributes (specifically the list
|
||||
from the feedparser HTML sanitisation web site).
|
||||
|
||||
``safe_attrs``:
|
||||
A set of attribute names to override the default list of attributes
|
||||
considered 'safe' (when safe_attrs_only=True).
|
||||
|
||||
``add_nofollow``:
|
||||
If true, then any <a> tags will have ``rel="nofollow"`` added to them.
|
||||
|
||||
``host_whitelist``:
|
||||
A list or set of hosts that you can use for embedded content
|
||||
(for content like ``<object>``, ``<link rel="stylesheet">``, etc).
|
||||
You can also implement/override the method
|
||||
``allow_embedded_url(el, url)`` or ``allow_element(el)`` to
|
||||
implement more complex rules for what can be embedded.
|
||||
Anything that passes this test will be shown, regardless of
|
||||
the value of (for instance) ``embedded``.
|
||||
|
||||
Note that this parameter might not work as intended if you do not
|
||||
make the links absolute before doing the cleaning.
|
||||
|
||||
Note that you may also need to set ``whitelist_tags``.
|
||||
|
||||
``whitelist_tags``:
|
||||
A set of tags that can be included with ``host_whitelist``.
|
||||
The default is ``iframe`` and ``embed``; you may wish to
|
||||
include other tags like ``script``, or you may want to
|
||||
implement ``allow_embedded_url`` for more control. Set to None to
|
||||
include all tags.
|
||||
|
||||
This modifies the document *in place*.
|
||||
"""
|
||||
|
||||
scripts = True
|
||||
javascript = True
|
||||
comments = True
|
||||
style = False
|
||||
inline_style = None
|
||||
links = True
|
||||
meta = True
|
||||
page_structure = True
|
||||
processing_instructions = True
|
||||
embedded = True
|
||||
frames = True
|
||||
forms = True
|
||||
annoying_tags = True
|
||||
remove_tags = None
|
||||
allow_tags = None
|
||||
kill_tags = None
|
||||
remove_unknown_tags = True
|
||||
safe_attrs_only = True
|
||||
safe_attrs = defs.safe_attrs
|
||||
add_nofollow = False
|
||||
host_whitelist = ()
|
||||
whitelist_tags = {'iframe', 'embed'}
|
||||
|
||||
def __init__(self, **kw):
|
||||
not_an_attribute = object()
|
||||
for name, value in kw.items():
|
||||
default = getattr(self, name, not_an_attribute)
|
||||
if (default is not None and default is not True and default is not False
|
||||
and not isinstance(default, (frozenset, set, tuple, list))):
|
||||
raise TypeError(
|
||||
"Unknown parameter: %s=%r" % (name, value))
|
||||
setattr(self, name, value)
|
||||
if self.inline_style is None and 'inline_style' not in kw:
|
||||
self.inline_style = self.style
|
||||
|
||||
if kw.get("allow_tags"):
|
||||
if kw.get("remove_unknown_tags"):
|
||||
raise ValueError("It does not make sense to pass in both "
|
||||
"allow_tags and remove_unknown_tags")
|
||||
self.remove_unknown_tags = False
|
||||
|
||||
# Used to lookup the primary URL for a given tag that is up for
|
||||
# removal:
|
||||
_tag_link_attrs = dict(
|
||||
script='src',
|
||||
link='href',
|
||||
# From: http://java.sun.com/j2se/1.4.2/docs/guide/misc/applet.html
|
||||
# From what I can tell, both attributes can contain a link:
|
||||
applet=['code', 'object'],
|
||||
iframe='src',
|
||||
embed='src',
|
||||
layer='src',
|
||||
# FIXME: there doesn't really seem like a general way to figure out what
|
||||
# links an <object> tag uses; links often go in <param> tags with values
|
||||
# that we don't really know. You'd have to have knowledge about specific
|
||||
# kinds of plugins (probably keyed off classid), and match against those.
|
||||
##object=?,
|
||||
# FIXME: not looking at the action currently, because it is more complex
|
||||
# than than -- if you keep the form, you should keep the form controls.
|
||||
##form='action',
|
||||
a='href',
|
||||
)
|
||||
|
||||
def __call__(self, doc):
|
||||
"""
|
||||
Cleans the document.
|
||||
"""
|
||||
try:
|
||||
getroot = doc.getroot
|
||||
except AttributeError:
|
||||
pass # Element instance
|
||||
else:
|
||||
doc = getroot() # ElementTree instance, instead of an element
|
||||
# convert XHTML to HTML
|
||||
xhtml_to_html(doc)
|
||||
# Normalize a case that IE treats <image> like <img>, and that
|
||||
# can confuse either this step or later steps.
|
||||
for el in doc.iter('image'):
|
||||
el.tag = 'img'
|
||||
if not self.comments:
|
||||
# Of course, if we were going to kill comments anyway, we don't
|
||||
# need to worry about this
|
||||
self.kill_conditional_comments(doc)
|
||||
|
||||
kill_tags = set(self.kill_tags or ())
|
||||
remove_tags = set(self.remove_tags or ())
|
||||
allow_tags = set(self.allow_tags or ())
|
||||
|
||||
if self.scripts:
|
||||
kill_tags.add('script')
|
||||
if self.safe_attrs_only:
|
||||
safe_attrs = set(self.safe_attrs)
|
||||
for el in doc.iter(etree.Element):
|
||||
attrib = el.attrib
|
||||
for aname in attrib.keys():
|
||||
if aname not in safe_attrs:
|
||||
del attrib[aname]
|
||||
if self.javascript:
|
||||
if not (self.safe_attrs_only and
|
||||
self.safe_attrs == defs.safe_attrs):
|
||||
# safe_attrs handles events attributes itself
|
||||
for el in doc.iter(etree.Element):
|
||||
attrib = el.attrib
|
||||
for aname in attrib.keys():
|
||||
if aname.startswith('on'):
|
||||
del attrib[aname]
|
||||
doc.rewrite_links(self._remove_javascript_link,
|
||||
resolve_base_href=False)
|
||||
# If we're deleting style then we don't have to remove JS links
|
||||
# from styles, otherwise...
|
||||
if not self.inline_style:
|
||||
for el in _find_styled_elements(doc):
|
||||
old = el.get('style')
|
||||
new = _replace_css_javascript('', old)
|
||||
new = _replace_css_import('', new)
|
||||
if self._has_sneaky_javascript(new):
|
||||
# Something tricky is going on...
|
||||
del el.attrib['style']
|
||||
elif new != old:
|
||||
el.set('style', new)
|
||||
if not self.style:
|
||||
for el in list(doc.iter('style')):
|
||||
if el.get('type', '').lower().strip() == 'text/javascript':
|
||||
el.drop_tree()
|
||||
continue
|
||||
old = el.text or ''
|
||||
new = _replace_css_javascript('', old)
|
||||
# The imported CSS can do anything; we just can't allow:
|
||||
new = _replace_css_import('', new)
|
||||
if self._has_sneaky_javascript(new):
|
||||
# Something tricky is going on...
|
||||
el.text = '/* deleted */'
|
||||
elif new != old:
|
||||
el.text = new
|
||||
if self.comments:
|
||||
kill_tags.add(etree.Comment)
|
||||
if self.processing_instructions:
|
||||
kill_tags.add(etree.ProcessingInstruction)
|
||||
if self.style:
|
||||
kill_tags.add('style')
|
||||
if self.inline_style:
|
||||
etree.strip_attributes(doc, 'style')
|
||||
if self.links:
|
||||
kill_tags.add('link')
|
||||
elif self.style or self.javascript:
|
||||
# We must get rid of included stylesheets if Javascript is not
|
||||
# allowed, as you can put Javascript in them
|
||||
for el in list(doc.iter('link')):
|
||||
if 'stylesheet' in el.get('rel', '').lower():
|
||||
# Note this kills alternate stylesheets as well
|
||||
if not self.allow_element(el):
|
||||
el.drop_tree()
|
||||
if self.meta:
|
||||
kill_tags.add('meta')
|
||||
if self.page_structure:
|
||||
remove_tags.update(('head', 'html', 'title'))
|
||||
if self.embedded:
|
||||
# FIXME: is <layer> really embedded?
|
||||
# We should get rid of any <param> tags not inside <applet>;
|
||||
# These are not really valid anyway.
|
||||
for el in list(doc.iter('param')):
|
||||
parent = el.getparent()
|
||||
while parent is not None and parent.tag not in ('applet', 'object'):
|
||||
parent = parent.getparent()
|
||||
if parent is None:
|
||||
el.drop_tree()
|
||||
kill_tags.update(('applet',))
|
||||
# The alternate contents that are in an iframe are a good fallback:
|
||||
remove_tags.update(('iframe', 'embed', 'layer', 'object', 'param'))
|
||||
if self.frames:
|
||||
# FIXME: ideally we should look at the frame links, but
|
||||
# generally frames don't mix properly with an HTML
|
||||
# fragment anyway.
|
||||
kill_tags.update(defs.frame_tags)
|
||||
if self.forms:
|
||||
remove_tags.add('form')
|
||||
kill_tags.update(('button', 'input', 'select', 'textarea'))
|
||||
if self.annoying_tags:
|
||||
remove_tags.update(('blink', 'marquee'))
|
||||
|
||||
_remove = []
|
||||
_kill = []
|
||||
for el in doc.iter():
|
||||
if el.tag in kill_tags:
|
||||
if self.allow_element(el):
|
||||
continue
|
||||
_kill.append(el)
|
||||
elif el.tag in remove_tags:
|
||||
if self.allow_element(el):
|
||||
continue
|
||||
_remove.append(el)
|
||||
|
||||
if _remove and _remove[0] == doc:
|
||||
# We have to drop the parent-most tag, which we can't
|
||||
# do. Instead we'll rewrite it:
|
||||
el = _remove.pop(0)
|
||||
el.tag = 'div'
|
||||
el.attrib.clear()
|
||||
elif _kill and _kill[0] == doc:
|
||||
# We have to drop the parent-most element, which we can't
|
||||
# do. Instead we'll clear it:
|
||||
el = _kill.pop(0)
|
||||
if el.tag != 'html':
|
||||
el.tag = 'div'
|
||||
el.clear()
|
||||
|
||||
_kill.reverse() # start with innermost tags
|
||||
for el in _kill:
|
||||
el.drop_tree()
|
||||
for el in _remove:
|
||||
el.drop_tag()
|
||||
|
||||
if self.remove_unknown_tags:
|
||||
if allow_tags:
|
||||
raise ValueError(
|
||||
"It does not make sense to pass in both allow_tags and remove_unknown_tags")
|
||||
allow_tags = set(defs.tags)
|
||||
if allow_tags:
|
||||
# make sure we do not remove comments/PIs if users want them (which is rare enough)
|
||||
if not self.comments:
|
||||
allow_tags.add(etree.Comment)
|
||||
if not self.processing_instructions:
|
||||
allow_tags.add(etree.ProcessingInstruction)
|
||||
|
||||
bad = []
|
||||
for el in doc.iter():
|
||||
if el.tag not in allow_tags:
|
||||
bad.append(el)
|
||||
if bad:
|
||||
if bad[0] is doc:
|
||||
el = bad.pop(0)
|
||||
el.tag = 'div'
|
||||
el.attrib.clear()
|
||||
for el in bad:
|
||||
el.drop_tag()
|
||||
if self.add_nofollow:
|
||||
for el in _find_external_links(doc):
|
||||
if not self.allow_follow(el):
|
||||
rel = el.get('rel')
|
||||
if rel:
|
||||
if ('nofollow' in rel
|
||||
and ' nofollow ' in (' %s ' % rel)):
|
||||
continue
|
||||
rel = '%s nofollow' % rel
|
||||
else:
|
||||
rel = 'nofollow'
|
||||
el.set('rel', rel)
|
||||
|
||||
def allow_follow(self, anchor):
|
||||
"""
|
||||
Override to suppress rel="nofollow" on some anchors.
|
||||
"""
|
||||
return False
|
||||
|
||||
def allow_element(self, el):
|
||||
"""
|
||||
Decide whether an element is configured to be accepted or rejected.
|
||||
|
||||
:param el: an element.
|
||||
:return: true to accept the element or false to reject/discard it.
|
||||
"""
|
||||
if el.tag not in self._tag_link_attrs:
|
||||
return False
|
||||
attr = self._tag_link_attrs[el.tag]
|
||||
if isinstance(attr, (list, tuple)):
|
||||
for one_attr in attr:
|
||||
url = el.get(one_attr)
|
||||
if not url:
|
||||
return False
|
||||
if not self.allow_embedded_url(el, url):
|
||||
return False
|
||||
return True
|
||||
else:
|
||||
url = el.get(attr)
|
||||
if not url:
|
||||
return False
|
||||
return self.allow_embedded_url(el, url)
|
||||
|
||||
def allow_embedded_url(self, el, url):
|
||||
"""
|
||||
Decide whether a URL that was found in an element's attributes or text
|
||||
if configured to be accepted or rejected.
|
||||
|
||||
:param el: an element.
|
||||
:param url: a URL found on the element.
|
||||
:return: true to accept the URL and false to reject it.
|
||||
"""
|
||||
if self.whitelist_tags is not None and el.tag not in self.whitelist_tags:
|
||||
return False
|
||||
parts = urlsplit(url)
|
||||
if parts.scheme not in ('http', 'https'):
|
||||
return False
|
||||
if parts.hostname in self.host_whitelist:
|
||||
return True
|
||||
return False
|
||||
|
||||
def kill_conditional_comments(self, doc):
|
||||
"""
|
||||
IE conditional comments basically embed HTML that the parser
|
||||
doesn't normally see. We can't allow anything like that, so
|
||||
we'll kill any comments that could be conditional.
|
||||
"""
|
||||
has_conditional_comment = _conditional_comment_re.search
|
||||
self._kill_elements(
|
||||
doc, lambda el: has_conditional_comment(el.text),
|
||||
etree.Comment)
|
||||
|
||||
def _kill_elements(self, doc, condition, iterate=None):
|
||||
bad = []
|
||||
for el in doc.iter(iterate):
|
||||
if condition(el):
|
||||
bad.append(el)
|
||||
for el in bad:
|
||||
el.drop_tree()
|
||||
|
||||
def _remove_javascript_link(self, link):
|
||||
# links like "j a v a s c r i p t:" might be interpreted in IE
|
||||
new = _substitute_whitespace('', unquote_plus(link))
|
||||
if _has_javascript_scheme(new):
|
||||
# FIXME: should this be None to delete?
|
||||
return ''
|
||||
return link
|
||||
|
||||
_substitute_comments = re.compile(r'/\*.*?\*/', re.S).sub
|
||||
|
||||
def _has_sneaky_javascript(self, style):
|
||||
"""
|
||||
Depending on the browser, stuff like ``e x p r e s s i o n(...)``
|
||||
can get interpreted, or ``expre/* stuff */ssion(...)``. This
|
||||
checks for attempt to do stuff like this.
|
||||
|
||||
Typically the response will be to kill the entire style; if you
|
||||
have just a bit of Javascript in the style another rule will catch
|
||||
that and remove only the Javascript from the style; this catches
|
||||
more sneaky attempts.
|
||||
"""
|
||||
style = self._substitute_comments('', style)
|
||||
style = style.replace('\\', '')
|
||||
style = _substitute_whitespace('', style)
|
||||
style = style.lower()
|
||||
if _has_javascript_scheme(style):
|
||||
return True
|
||||
if 'expression(' in style:
|
||||
return True
|
||||
if '@import' in style:
|
||||
return True
|
||||
if '</noscript' in style:
|
||||
# e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
|
||||
return True
|
||||
if _looks_like_tag_content(style):
|
||||
# e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
|
||||
return True
|
||||
return False
|
||||
|
||||
def clean_html(self, html):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
self(doc)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
clean = Cleaner()
|
||||
clean_html = clean.clean_html
|
||||
|
||||
############################################################
|
||||
## Autolinking
|
||||
############################################################
|
||||
|
||||
_link_regexes = [
|
||||
re.compile(r'(?P<body>https?://(?P<host>[a-z0-9._-]+)(?:/[/\-_.,a-z0-9%&?;=~]*)?(?:\([/\-_.,a-z0-9%&?;=~]*\))?)', re.I),
|
||||
# This is conservative, but autolinking can be a bit conservative:
|
||||
re.compile(r'mailto:(?P<body>[a-z0-9._-]+@(?P<host>[a-z0-9_.-]+[a-z]))', re.I),
|
||||
]
|
||||
|
||||
_avoid_elements = ['textarea', 'pre', 'code', 'head', 'select', 'a']
|
||||
|
||||
_avoid_hosts = [
|
||||
re.compile(r'^localhost', re.I),
|
||||
re.compile(r'\bexample\.(?:com|org|net)$', re.I),
|
||||
re.compile(r'^127\.0\.0\.1$'),
|
||||
]
|
||||
|
||||
_avoid_classes = ['nolink']
|
||||
|
||||
def autolink(el, link_regexes=_link_regexes,
|
||||
avoid_elements=_avoid_elements,
|
||||
avoid_hosts=_avoid_hosts,
|
||||
avoid_classes=_avoid_classes):
|
||||
"""
|
||||
Turn any URLs into links.
|
||||
|
||||
It will search for links identified by the given regular
|
||||
expressions (by default mailto and http(s) links).
|
||||
|
||||
It won't link text in an element in avoid_elements, or an element
|
||||
with a class in avoid_classes. It won't link to anything with a
|
||||
host that matches one of the regular expressions in avoid_hosts
|
||||
(default localhost and 127.0.0.1).
|
||||
|
||||
If you pass in an element, the element's tail will not be
|
||||
substituted, only the contents of the element.
|
||||
"""
|
||||
if el.tag in avoid_elements:
|
||||
return
|
||||
class_name = el.get('class')
|
||||
if class_name:
|
||||
class_name = class_name.split()
|
||||
for match_class in avoid_classes:
|
||||
if match_class in class_name:
|
||||
return
|
||||
for child in list(el):
|
||||
autolink(child, link_regexes=link_regexes,
|
||||
avoid_elements=avoid_elements,
|
||||
avoid_hosts=avoid_hosts,
|
||||
avoid_classes=avoid_classes)
|
||||
if child.tail:
|
||||
text, tail_children = _link_text(
|
||||
child.tail, link_regexes, avoid_hosts, factory=el.makeelement)
|
||||
if tail_children:
|
||||
child.tail = text
|
||||
index = el.index(child)
|
||||
el[index+1:index+1] = tail_children
|
||||
if el.text:
|
||||
text, pre_children = _link_text(
|
||||
el.text, link_regexes, avoid_hosts, factory=el.makeelement)
|
||||
if pre_children:
|
||||
el.text = text
|
||||
el[:0] = pre_children
|
||||
|
||||
def _link_text(text, link_regexes, avoid_hosts, factory):
|
||||
leading_text = ''
|
||||
links = []
|
||||
last_pos = 0
|
||||
while 1:
|
||||
best_match, best_pos = None, None
|
||||
for regex in link_regexes:
|
||||
regex_pos = last_pos
|
||||
while 1:
|
||||
match = regex.search(text, pos=regex_pos)
|
||||
if match is None:
|
||||
break
|
||||
host = match.group('host')
|
||||
for host_regex in avoid_hosts:
|
||||
if host_regex.search(host):
|
||||
regex_pos = match.end()
|
||||
break
|
||||
else:
|
||||
break
|
||||
if match is None:
|
||||
continue
|
||||
if best_pos is None or match.start() < best_pos:
|
||||
best_match = match
|
||||
best_pos = match.start()
|
||||
if best_match is None:
|
||||
# No more matches
|
||||
if links:
|
||||
assert not links[-1].tail
|
||||
links[-1].tail = text
|
||||
else:
|
||||
assert not leading_text
|
||||
leading_text = text
|
||||
break
|
||||
link = best_match.group(0)
|
||||
end = best_match.end()
|
||||
if link.endswith('.') or link.endswith(','):
|
||||
# These punctuation marks shouldn't end a link
|
||||
end -= 1
|
||||
link = link[:-1]
|
||||
prev_text = text[:best_match.start()]
|
||||
if links:
|
||||
assert not links[-1].tail
|
||||
links[-1].tail = prev_text
|
||||
else:
|
||||
assert not leading_text
|
||||
leading_text = prev_text
|
||||
anchor = factory('a')
|
||||
anchor.set('href', link)
|
||||
body = best_match.group('body')
|
||||
if not body:
|
||||
body = link
|
||||
if body.endswith('.') or body.endswith(','):
|
||||
body = body[:-1]
|
||||
anchor.text = body
|
||||
links.append(anchor)
|
||||
text = text[end:]
|
||||
return leading_text, links
|
||||
|
||||
def autolink_html(html, *args, **kw):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
autolink(doc, *args, **kw)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
autolink_html.__doc__ = autolink.__doc__
|
||||
|
||||
############################################################
|
||||
## Word wrapping
|
||||
############################################################
|
||||
|
||||
_avoid_word_break_elements = ['pre', 'textarea', 'code']
|
||||
_avoid_word_break_classes = ['nobreak']
|
||||
|
||||
def word_break(el, max_width=40,
|
||||
avoid_elements=_avoid_word_break_elements,
|
||||
avoid_classes=_avoid_word_break_classes,
|
||||
break_character=unichr(0x200b)):
|
||||
"""
|
||||
Breaks any long words found in the body of the text (not attributes).
|
||||
|
||||
Doesn't effect any of the tags in avoid_elements, by default
|
||||
``<textarea>`` and ``<pre>``
|
||||
|
||||
Breaks words by inserting ​, which is a unicode character
|
||||
for Zero Width Space character. This generally takes up no space
|
||||
in rendering, but does copy as a space, and in monospace contexts
|
||||
usually takes up space.
|
||||
|
||||
See http://www.cs.tut.fi/~jkorpela/html/nobr.html for a discussion
|
||||
"""
|
||||
# Character suggestion of ​ comes from:
|
||||
# http://www.cs.tut.fi/~jkorpela/html/nobr.html
|
||||
if el.tag in _avoid_word_break_elements:
|
||||
return
|
||||
class_name = el.get('class')
|
||||
if class_name:
|
||||
dont_break = False
|
||||
class_name = class_name.split()
|
||||
for avoid in avoid_classes:
|
||||
if avoid in class_name:
|
||||
dont_break = True
|
||||
break
|
||||
if dont_break:
|
||||
return
|
||||
if el.text:
|
||||
el.text = _break_text(el.text, max_width, break_character)
|
||||
for child in el:
|
||||
word_break(child, max_width=max_width,
|
||||
avoid_elements=avoid_elements,
|
||||
avoid_classes=avoid_classes,
|
||||
break_character=break_character)
|
||||
if child.tail:
|
||||
child.tail = _break_text(child.tail, max_width, break_character)
|
||||
|
||||
def word_break_html(html, *args, **kw):
|
||||
result_type = type(html)
|
||||
doc = fromstring(html)
|
||||
word_break(doc, *args, **kw)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
def _break_text(text, max_width, break_character):
|
||||
words = text.split()
|
||||
for word in words:
|
||||
if len(word) > max_width:
|
||||
replacement = _insert_break(word, max_width, break_character)
|
||||
text = text.replace(word, replacement)
|
||||
return text
|
||||
|
||||
_break_prefer_re = re.compile(r'[^a-z]', re.I)
|
||||
|
||||
def _insert_break(word, width, break_character):
|
||||
orig_word = word
|
||||
result = ''
|
||||
while len(word) > width:
|
||||
start = word[:width]
|
||||
breaks = list(_break_prefer_re.finditer(start))
|
||||
if breaks:
|
||||
last_break = breaks[-1]
|
||||
# Only walk back up to 10 characters to find a nice break:
|
||||
if last_break.end() > width-10:
|
||||
# FIXME: should the break character be at the end of the
|
||||
# chunk, or the beginning of the next chunk?
|
||||
start = word[:last_break.end()]
|
||||
result += start + break_character
|
||||
word = word[len(start):]
|
||||
result += word
|
||||
return result
|
||||
|
||||
135
.venv/lib/python3.7/site-packages/lxml/html/defs.py
Normal file
135
.venv/lib/python3.7/site-packages/lxml/html/defs.py
Normal file
@@ -0,0 +1,135 @@
|
||||
# FIXME: this should all be confirmed against what a DTD says
|
||||
# (probably in a test; this may not match the DTD exactly, but we
|
||||
# should document just how it differs).
|
||||
|
||||
"""
|
||||
Data taken from https://www.w3.org/TR/html401/index/elements.html
|
||||
and https://www.w3.org/community/webed/wiki/HTML/New_HTML5_Elements
|
||||
for html5_tags.
|
||||
"""
|
||||
|
||||
empty_tags = frozenset([
|
||||
'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
|
||||
'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track'])
|
||||
|
||||
deprecated_tags = frozenset([
|
||||
'applet', 'basefont', 'center', 'dir', 'font', 'isindex',
|
||||
'menu', 's', 'strike', 'u'])
|
||||
|
||||
# archive actually takes a space-separated list of URIs
|
||||
link_attrs = frozenset([
|
||||
'action', 'archive', 'background', 'cite', 'classid',
|
||||
'codebase', 'data', 'href', 'longdesc', 'profile', 'src',
|
||||
'usemap',
|
||||
# Not standard:
|
||||
'dynsrc', 'lowsrc',
|
||||
# HTML5 formaction
|
||||
'formaction'
|
||||
])
|
||||
|
||||
# Not in the HTML 4 spec:
|
||||
# onerror, onresize
|
||||
event_attrs = frozenset([
|
||||
'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror',
|
||||
'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload',
|
||||
'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover',
|
||||
'onmouseup', 'onreset', 'onresize', 'onselect', 'onsubmit',
|
||||
'onunload',
|
||||
])
|
||||
|
||||
safe_attrs = frozenset([
|
||||
'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align',
|
||||
'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff',
|
||||
'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan',
|
||||
'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype',
|
||||
'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id',
|
||||
'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
|
||||
'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
|
||||
'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape',
|
||||
'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
||||
'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
|
||||
|
||||
# From http://htmlhelp.com/reference/html40/olist.html
|
||||
top_level_tags = frozenset([
|
||||
'html', 'head', 'body', 'frameset',
|
||||
])
|
||||
|
||||
head_tags = frozenset([
|
||||
'base', 'isindex', 'link', 'meta', 'script', 'style', 'title',
|
||||
])
|
||||
|
||||
general_block_tags = frozenset([
|
||||
'address',
|
||||
'blockquote',
|
||||
'center',
|
||||
'del',
|
||||
'div',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'hr',
|
||||
'ins',
|
||||
'isindex',
|
||||
'noscript',
|
||||
'p',
|
||||
'pre',
|
||||
])
|
||||
|
||||
list_tags = frozenset([
|
||||
'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul',
|
||||
])
|
||||
|
||||
table_tags = frozenset([
|
||||
'table', 'caption', 'colgroup', 'col',
|
||||
'thead', 'tfoot', 'tbody', 'tr', 'td', 'th',
|
||||
])
|
||||
|
||||
# just this one from
|
||||
# http://www.georgehernandez.com/h/XComputers/HTML/2BlockLevel.htm
|
||||
block_tags = general_block_tags | list_tags | table_tags | frozenset([
|
||||
# Partial form tags
|
||||
'fieldset', 'form', 'legend', 'optgroup', 'option',
|
||||
])
|
||||
|
||||
form_tags = frozenset([
|
||||
'form', 'button', 'fieldset', 'legend', 'input', 'label',
|
||||
'select', 'optgroup', 'option', 'textarea',
|
||||
])
|
||||
|
||||
special_inline_tags = frozenset([
|
||||
'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe',
|
||||
'img', 'map', 'area', 'object', 'param', 'q', 'script',
|
||||
'span', 'sub', 'sup',
|
||||
])
|
||||
|
||||
phrase_tags = frozenset([
|
||||
'abbr', 'acronym', 'cite', 'code', 'del', 'dfn', 'em',
|
||||
'ins', 'kbd', 'samp', 'strong', 'var',
|
||||
])
|
||||
|
||||
font_style_tags = frozenset([
|
||||
'b', 'big', 'i', 's', 'small', 'strike', 'tt', 'u',
|
||||
])
|
||||
|
||||
frame_tags = frozenset([
|
||||
'frameset', 'frame', 'noframes',
|
||||
])
|
||||
|
||||
html5_tags = frozenset([
|
||||
'article', 'aside', 'audio', 'canvas', 'command', 'datalist',
|
||||
'details', 'embed', 'figcaption', 'figure', 'footer', 'header',
|
||||
'hgroup', 'keygen', 'mark', 'math', 'meter', 'nav', 'output',
|
||||
'progress', 'rp', 'rt', 'ruby', 'section', 'source', 'summary',
|
||||
'svg', 'time', 'track', 'video', 'wbr'
|
||||
])
|
||||
|
||||
# These tags aren't standard
|
||||
nonstandard_tags = frozenset(['blink', 'marquee'])
|
||||
|
||||
|
||||
tags = (top_level_tags | head_tags | general_block_tags | list_tags
|
||||
| table_tags | form_tags | special_inline_tags | phrase_tags
|
||||
| font_style_tags | nonstandard_tags | html5_tags)
|
||||
Binary file not shown.
884
.venv/lib/python3.7/site-packages/lxml/html/diff.py
Normal file
884
.venv/lib/python3.7/site-packages/lxml/html/diff.py
Normal file
@@ -0,0 +1,884 @@
|
||||
# cython: language_level=3
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import difflib
|
||||
from lxml import etree
|
||||
from lxml.html import fragment_fromstring
|
||||
import re
|
||||
|
||||
__all__ = ['html_annotate', 'htmldiff']
|
||||
|
||||
try:
|
||||
from html import escape as html_escape
|
||||
except ImportError:
|
||||
from cgi import escape as html_escape
|
||||
try:
|
||||
_unicode = unicode
|
||||
except NameError:
|
||||
# Python 3
|
||||
_unicode = str
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# Python 3
|
||||
basestring = str
|
||||
|
||||
############################################################
|
||||
## Annotation
|
||||
############################################################
|
||||
|
||||
def default_markup(text, version):
|
||||
return '<span title="%s">%s</span>' % (
|
||||
html_escape(_unicode(version), 1), text)
|
||||
|
||||
def html_annotate(doclist, markup=default_markup):
|
||||
"""
|
||||
doclist should be ordered from oldest to newest, like::
|
||||
|
||||
>>> version1 = 'Hello World'
|
||||
>>> version2 = 'Goodbye World'
|
||||
>>> print(html_annotate([(version1, 'version 1'),
|
||||
... (version2, 'version 2')]))
|
||||
<span title="version 2">Goodbye</span> <span title="version 1">World</span>
|
||||
|
||||
The documents must be *fragments* (str/UTF8 or unicode), not
|
||||
complete documents
|
||||
|
||||
The markup argument is a function to markup the spans of words.
|
||||
This function is called like markup('Hello', 'version 2'), and
|
||||
returns HTML. The first argument is text and never includes any
|
||||
markup. The default uses a span with a title:
|
||||
|
||||
>>> print(default_markup('Some Text', 'by Joe'))
|
||||
<span title="by Joe">Some Text</span>
|
||||
"""
|
||||
# The basic strategy we have is to split the documents up into
|
||||
# logical tokens (which are words with attached markup). We then
|
||||
# do diffs of each of the versions to track when a token first
|
||||
# appeared in the document; the annotation attached to the token
|
||||
# is the version where it first appeared.
|
||||
tokenlist = [tokenize_annotated(doc, version)
|
||||
for doc, version in doclist]
|
||||
cur_tokens = tokenlist[0]
|
||||
for tokens in tokenlist[1:]:
|
||||
html_annotate_merge_annotations(cur_tokens, tokens)
|
||||
cur_tokens = tokens
|
||||
|
||||
# After we've tracked all the tokens, we can combine spans of text
|
||||
# that are adjacent and have the same annotation
|
||||
cur_tokens = compress_tokens(cur_tokens)
|
||||
# And finally add markup
|
||||
result = markup_serialize_tokens(cur_tokens, markup)
|
||||
return ''.join(result).strip()
|
||||
|
||||
def tokenize_annotated(doc, annotation):
|
||||
"""Tokenize a document and add an annotation attribute to each token
|
||||
"""
|
||||
tokens = tokenize(doc, include_hrefs=False)
|
||||
for tok in tokens:
|
||||
tok.annotation = annotation
|
||||
return tokens
|
||||
|
||||
def html_annotate_merge_annotations(tokens_old, tokens_new):
|
||||
"""Merge the annotations from tokens_old into tokens_new, when the
|
||||
tokens in the new document already existed in the old document.
|
||||
"""
|
||||
s = InsensitiveSequenceMatcher(a=tokens_old, b=tokens_new)
|
||||
commands = s.get_opcodes()
|
||||
|
||||
for command, i1, i2, j1, j2 in commands:
|
||||
if command == 'equal':
|
||||
eq_old = tokens_old[i1:i2]
|
||||
eq_new = tokens_new[j1:j2]
|
||||
copy_annotations(eq_old, eq_new)
|
||||
|
||||
def copy_annotations(src, dest):
|
||||
"""
|
||||
Copy annotations from the tokens listed in src to the tokens in dest
|
||||
"""
|
||||
assert len(src) == len(dest)
|
||||
for src_tok, dest_tok in zip(src, dest):
|
||||
dest_tok.annotation = src_tok.annotation
|
||||
|
||||
def compress_tokens(tokens):
|
||||
"""
|
||||
Combine adjacent tokens when there is no HTML between the tokens,
|
||||
and they share an annotation
|
||||
"""
|
||||
result = [tokens[0]]
|
||||
for tok in tokens[1:]:
|
||||
if (not result[-1].post_tags and
|
||||
not tok.pre_tags and
|
||||
result[-1].annotation == tok.annotation):
|
||||
compress_merge_back(result, tok)
|
||||
else:
|
||||
result.append(tok)
|
||||
return result
|
||||
|
||||
def compress_merge_back(tokens, tok):
|
||||
""" Merge tok into the last element of tokens (modifying the list of
|
||||
tokens in-place). """
|
||||
last = tokens[-1]
|
||||
if type(last) is not token or type(tok) is not token:
|
||||
tokens.append(tok)
|
||||
else:
|
||||
text = _unicode(last)
|
||||
if last.trailing_whitespace:
|
||||
text += last.trailing_whitespace
|
||||
text += tok
|
||||
merged = token(text,
|
||||
pre_tags=last.pre_tags,
|
||||
post_tags=tok.post_tags,
|
||||
trailing_whitespace=tok.trailing_whitespace)
|
||||
merged.annotation = last.annotation
|
||||
tokens[-1] = merged
|
||||
|
||||
def markup_serialize_tokens(tokens, markup_func):
|
||||
"""
|
||||
Serialize the list of tokens into a list of text chunks, calling
|
||||
markup_func around text to add annotations.
|
||||
"""
|
||||
for token in tokens:
|
||||
for pre in token.pre_tags:
|
||||
yield pre
|
||||
html = token.html()
|
||||
html = markup_func(html, token.annotation)
|
||||
if token.trailing_whitespace:
|
||||
html += token.trailing_whitespace
|
||||
yield html
|
||||
for post in token.post_tags:
|
||||
yield post
|
||||
|
||||
|
||||
############################################################
|
||||
## HTML Diffs
|
||||
############################################################
|
||||
|
||||
def htmldiff(old_html, new_html):
|
||||
## FIXME: this should take parsed documents too, and use their body
|
||||
## or other content.
|
||||
""" Do a diff of the old and new document. The documents are HTML
|
||||
*fragments* (str/UTF8 or unicode), they are not complete documents
|
||||
(i.e., no <html> tag).
|
||||
|
||||
Returns HTML with <ins> and <del> tags added around the
|
||||
appropriate text.
|
||||
|
||||
Markup is generally ignored, with the markup from new_html
|
||||
preserved, and possibly some markup from old_html (though it is
|
||||
considered acceptable to lose some of the old markup). Only the
|
||||
words in the HTML are diffed. The exception is <img> tags, which
|
||||
are treated like words, and the href attribute of <a> tags, which
|
||||
are noted inside the tag itself when there are changes.
|
||||
"""
|
||||
old_html_tokens = tokenize(old_html)
|
||||
new_html_tokens = tokenize(new_html)
|
||||
result = htmldiff_tokens(old_html_tokens, new_html_tokens)
|
||||
result = ''.join(result).strip()
|
||||
return fixup_ins_del_tags(result)
|
||||
|
||||
def htmldiff_tokens(html1_tokens, html2_tokens):
|
||||
""" Does a diff on the tokens themselves, returning a list of text
|
||||
chunks (not tokens).
|
||||
"""
|
||||
# There are several passes as we do the differences. The tokens
|
||||
# isolate the portion of the content we care to diff; difflib does
|
||||
# all the actual hard work at that point.
|
||||
#
|
||||
# Then we must create a valid document from pieces of both the old
|
||||
# document and the new document. We generally prefer to take
|
||||
# markup from the new document, and only do a best effort attempt
|
||||
# to keep markup from the old document; anything that we can't
|
||||
# resolve we throw away. Also we try to put the deletes as close
|
||||
# to the location where we think they would have been -- because
|
||||
# we are only keeping the markup from the new document, it can be
|
||||
# fuzzy where in the new document the old text would have gone.
|
||||
# Again we just do a best effort attempt.
|
||||
s = InsensitiveSequenceMatcher(a=html1_tokens, b=html2_tokens)
|
||||
commands = s.get_opcodes()
|
||||
result = []
|
||||
for command, i1, i2, j1, j2 in commands:
|
||||
if command == 'equal':
|
||||
result.extend(expand_tokens(html2_tokens[j1:j2], equal=True))
|
||||
continue
|
||||
if command == 'insert' or command == 'replace':
|
||||
ins_tokens = expand_tokens(html2_tokens[j1:j2])
|
||||
merge_insert(ins_tokens, result)
|
||||
if command == 'delete' or command == 'replace':
|
||||
del_tokens = expand_tokens(html1_tokens[i1:i2])
|
||||
merge_delete(del_tokens, result)
|
||||
# If deletes were inserted directly as <del> then we'd have an
|
||||
# invalid document at this point. Instead we put in special
|
||||
# markers, and when the complete diffed document has been created
|
||||
# we try to move the deletes around and resolve any problems.
|
||||
result = cleanup_delete(result)
|
||||
|
||||
return result
|
||||
|
||||
def expand_tokens(tokens, equal=False):
|
||||
"""Given a list of tokens, return a generator of the chunks of
|
||||
text for the data in the tokens.
|
||||
"""
|
||||
for token in tokens:
|
||||
for pre in token.pre_tags:
|
||||
yield pre
|
||||
if not equal or not token.hide_when_equal:
|
||||
if token.trailing_whitespace:
|
||||
yield token.html() + token.trailing_whitespace
|
||||
else:
|
||||
yield token.html()
|
||||
for post in token.post_tags:
|
||||
yield post
|
||||
|
||||
def merge_insert(ins_chunks, doc):
|
||||
""" doc is the already-handled document (as a list of text chunks);
|
||||
here we add <ins>ins_chunks</ins> to the end of that. """
|
||||
# Though we don't throw away unbalanced_start or unbalanced_end
|
||||
# (we assume there is accompanying markup later or earlier in the
|
||||
# document), we only put <ins> around the balanced portion.
|
||||
unbalanced_start, balanced, unbalanced_end = split_unbalanced(ins_chunks)
|
||||
doc.extend(unbalanced_start)
|
||||
if doc and not doc[-1].endswith(' '):
|
||||
# Fix up the case where the word before the insert didn't end with
|
||||
# a space
|
||||
doc[-1] += ' '
|
||||
doc.append('<ins>')
|
||||
if balanced and balanced[-1].endswith(' '):
|
||||
# We move space outside of </ins>
|
||||
balanced[-1] = balanced[-1][:-1]
|
||||
doc.extend(balanced)
|
||||
doc.append('</ins> ')
|
||||
doc.extend(unbalanced_end)
|
||||
|
||||
# These are sentinels to represent the start and end of a <del>
|
||||
# segment, until we do the cleanup phase to turn them into proper
|
||||
# markup:
|
||||
class DEL_START:
|
||||
pass
|
||||
class DEL_END:
|
||||
pass
|
||||
|
||||
class NoDeletes(Exception):
|
||||
""" Raised when the document no longer contains any pending deletes
|
||||
(DEL_START/DEL_END) """
|
||||
|
||||
def merge_delete(del_chunks, doc):
|
||||
""" Adds the text chunks in del_chunks to the document doc (another
|
||||
list of text chunks) with marker to show it is a delete.
|
||||
cleanup_delete later resolves these markers into <del> tags."""
|
||||
doc.append(DEL_START)
|
||||
doc.extend(del_chunks)
|
||||
doc.append(DEL_END)
|
||||
|
||||
def cleanup_delete(chunks):
|
||||
""" Cleans up any DEL_START/DEL_END markers in the document, replacing
|
||||
them with <del></del>. To do this while keeping the document
|
||||
valid, it may need to drop some tags (either start or end tags).
|
||||
|
||||
It may also move the del into adjacent tags to try to move it to a
|
||||
similar location where it was originally located (e.g., moving a
|
||||
delete into preceding <div> tag, if the del looks like (DEL_START,
|
||||
'Text</div>', DEL_END)"""
|
||||
while 1:
|
||||
# Find a pending DEL_START/DEL_END, splitting the document
|
||||
# into stuff-preceding-DEL_START, stuff-inside, and
|
||||
# stuff-following-DEL_END
|
||||
try:
|
||||
pre_delete, delete, post_delete = split_delete(chunks)
|
||||
except NoDeletes:
|
||||
# Nothing found, we've cleaned up the entire doc
|
||||
break
|
||||
# The stuff-inside-DEL_START/END may not be well balanced
|
||||
# markup. First we figure out what unbalanced portions there are:
|
||||
unbalanced_start, balanced, unbalanced_end = split_unbalanced(delete)
|
||||
# Then we move the span forward and/or backward based on these
|
||||
# unbalanced portions:
|
||||
locate_unbalanced_start(unbalanced_start, pre_delete, post_delete)
|
||||
locate_unbalanced_end(unbalanced_end, pre_delete, post_delete)
|
||||
doc = pre_delete
|
||||
if doc and not doc[-1].endswith(' '):
|
||||
# Fix up case where the word before us didn't have a trailing space
|
||||
doc[-1] += ' '
|
||||
doc.append('<del>')
|
||||
if balanced and balanced[-1].endswith(' '):
|
||||
# We move space outside of </del>
|
||||
balanced[-1] = balanced[-1][:-1]
|
||||
doc.extend(balanced)
|
||||
doc.append('</del> ')
|
||||
doc.extend(post_delete)
|
||||
chunks = doc
|
||||
return chunks
|
||||
|
||||
def split_unbalanced(chunks):
|
||||
"""Return (unbalanced_start, balanced, unbalanced_end), where each is
|
||||
a list of text and tag chunks.
|
||||
|
||||
unbalanced_start is a list of all the tags that are opened, but
|
||||
not closed in this span. Similarly, unbalanced_end is a list of
|
||||
tags that are closed but were not opened. Extracting these might
|
||||
mean some reordering of the chunks."""
|
||||
start = []
|
||||
end = []
|
||||
tag_stack = []
|
||||
balanced = []
|
||||
for chunk in chunks:
|
||||
if not chunk.startswith('<'):
|
||||
balanced.append(chunk)
|
||||
continue
|
||||
endtag = chunk[1] == '/'
|
||||
name = chunk.split()[0].strip('<>/')
|
||||
if name in empty_tags:
|
||||
balanced.append(chunk)
|
||||
continue
|
||||
if endtag:
|
||||
if tag_stack and tag_stack[-1][0] == name:
|
||||
balanced.append(chunk)
|
||||
name, pos, tag = tag_stack.pop()
|
||||
balanced[pos] = tag
|
||||
elif tag_stack:
|
||||
start.extend([tag for name, pos, tag in tag_stack])
|
||||
tag_stack = []
|
||||
end.append(chunk)
|
||||
else:
|
||||
end.append(chunk)
|
||||
else:
|
||||
tag_stack.append((name, len(balanced), chunk))
|
||||
balanced.append(None)
|
||||
start.extend(
|
||||
[chunk for name, pos, chunk in tag_stack])
|
||||
balanced = [chunk for chunk in balanced if chunk is not None]
|
||||
return start, balanced, end
|
||||
|
||||
def split_delete(chunks):
|
||||
""" Returns (stuff_before_DEL_START, stuff_inside_DEL_START_END,
|
||||
stuff_after_DEL_END). Returns the first case found (there may be
|
||||
more DEL_STARTs in stuff_after_DEL_END). Raises NoDeletes if
|
||||
there's no DEL_START found. """
|
||||
try:
|
||||
pos = chunks.index(DEL_START)
|
||||
except ValueError:
|
||||
raise NoDeletes
|
||||
pos2 = chunks.index(DEL_END)
|
||||
return chunks[:pos], chunks[pos+1:pos2], chunks[pos2+1:]
|
||||
|
||||
def locate_unbalanced_start(unbalanced_start, pre_delete, post_delete):
|
||||
""" pre_delete and post_delete implicitly point to a place in the
|
||||
document (where the two were split). This moves that point (by
|
||||
popping items from one and pushing them onto the other). It moves
|
||||
the point to try to find a place where unbalanced_start applies.
|
||||
|
||||
As an example::
|
||||
|
||||
>>> unbalanced_start = ['<div>']
|
||||
>>> doc = ['<p>', 'Text', '</p>', '<div>', 'More Text', '</div>']
|
||||
>>> pre, post = doc[:3], doc[3:]
|
||||
>>> pre, post
|
||||
(['<p>', 'Text', '</p>'], ['<div>', 'More Text', '</div>'])
|
||||
>>> locate_unbalanced_start(unbalanced_start, pre, post)
|
||||
>>> pre, post
|
||||
(['<p>', 'Text', '</p>', '<div>'], ['More Text', '</div>'])
|
||||
|
||||
As you can see, we moved the point so that the dangling <div> that
|
||||
we found will be effectively replaced by the div in the original
|
||||
document. If this doesn't work out, we just throw away
|
||||
unbalanced_start without doing anything.
|
||||
"""
|
||||
while 1:
|
||||
if not unbalanced_start:
|
||||
# We have totally succeeded in finding the position
|
||||
break
|
||||
finding = unbalanced_start[0]
|
||||
finding_name = finding.split()[0].strip('<>')
|
||||
if not post_delete:
|
||||
break
|
||||
next = post_delete[0]
|
||||
if next is DEL_START or not next.startswith('<'):
|
||||
# Reached a word, we can't move the delete text forward
|
||||
break
|
||||
if next[1] == '/':
|
||||
# Reached a closing tag, can we go further? Maybe not...
|
||||
break
|
||||
name = next.split()[0].strip('<>')
|
||||
if name == 'ins':
|
||||
# Can't move into an insert
|
||||
break
|
||||
assert name != 'del', (
|
||||
"Unexpected delete tag: %r" % next)
|
||||
if name == finding_name:
|
||||
unbalanced_start.pop(0)
|
||||
pre_delete.append(post_delete.pop(0))
|
||||
else:
|
||||
# Found a tag that doesn't match
|
||||
break
|
||||
|
||||
def locate_unbalanced_end(unbalanced_end, pre_delete, post_delete):
|
||||
""" like locate_unbalanced_start, except handling end tags and
|
||||
possibly moving the point earlier in the document. """
|
||||
while 1:
|
||||
if not unbalanced_end:
|
||||
# Success
|
||||
break
|
||||
finding = unbalanced_end[-1]
|
||||
finding_name = finding.split()[0].strip('<>/')
|
||||
if not pre_delete:
|
||||
break
|
||||
next = pre_delete[-1]
|
||||
if next is DEL_END or not next.startswith('</'):
|
||||
# A word or a start tag
|
||||
break
|
||||
name = next.split()[0].strip('<>/')
|
||||
if name == 'ins' or name == 'del':
|
||||
# Can't move into an insert or delete
|
||||
break
|
||||
if name == finding_name:
|
||||
unbalanced_end.pop()
|
||||
post_delete.insert(0, pre_delete.pop())
|
||||
else:
|
||||
# Found a tag that doesn't match
|
||||
break
|
||||
|
||||
class token(_unicode):
|
||||
""" Represents a diffable token, generally a word that is displayed to
|
||||
the user. Opening tags are attached to this token when they are
|
||||
adjacent (pre_tags) and closing tags that follow the word
|
||||
(post_tags). Some exceptions occur when there are empty tags
|
||||
adjacent to a word, so there may be close tags in pre_tags, or
|
||||
open tags in post_tags.
|
||||
|
||||
We also keep track of whether the word was originally followed by
|
||||
whitespace, even though we do not want to treat the word as
|
||||
equivalent to a similar word that does not have a trailing
|
||||
space."""
|
||||
|
||||
# When this is true, the token will be eliminated from the
|
||||
# displayed diff if no change has occurred:
|
||||
hide_when_equal = False
|
||||
|
||||
def __new__(cls, text, pre_tags=None, post_tags=None, trailing_whitespace=""):
|
||||
obj = _unicode.__new__(cls, text)
|
||||
|
||||
if pre_tags is not None:
|
||||
obj.pre_tags = pre_tags
|
||||
else:
|
||||
obj.pre_tags = []
|
||||
|
||||
if post_tags is not None:
|
||||
obj.post_tags = post_tags
|
||||
else:
|
||||
obj.post_tags = []
|
||||
|
||||
obj.trailing_whitespace = trailing_whitespace
|
||||
|
||||
return obj
|
||||
|
||||
def __repr__(self):
|
||||
return 'token(%s, %r, %r, %r)' % (_unicode.__repr__(self), self.pre_tags,
|
||||
self.post_tags, self.trailing_whitespace)
|
||||
|
||||
def html(self):
|
||||
return _unicode(self)
|
||||
|
||||
class tag_token(token):
|
||||
|
||||
""" Represents a token that is actually a tag. Currently this is just
|
||||
the <img> tag, which takes up visible space just like a word but
|
||||
is only represented in a document by a tag. """
|
||||
|
||||
def __new__(cls, tag, data, html_repr, pre_tags=None,
|
||||
post_tags=None, trailing_whitespace=""):
|
||||
obj = token.__new__(cls, "%s: %s" % (type, data),
|
||||
pre_tags=pre_tags,
|
||||
post_tags=post_tags,
|
||||
trailing_whitespace=trailing_whitespace)
|
||||
obj.tag = tag
|
||||
obj.data = data
|
||||
obj.html_repr = html_repr
|
||||
return obj
|
||||
|
||||
def __repr__(self):
|
||||
return 'tag_token(%s, %s, html_repr=%s, post_tags=%r, pre_tags=%r, trailing_whitespace=%r)' % (
|
||||
self.tag,
|
||||
self.data,
|
||||
self.html_repr,
|
||||
self.pre_tags,
|
||||
self.post_tags,
|
||||
self.trailing_whitespace)
|
||||
def html(self):
|
||||
return self.html_repr
|
||||
|
||||
class href_token(token):
|
||||
|
||||
""" Represents the href in an anchor tag. Unlike other words, we only
|
||||
show the href when it changes. """
|
||||
|
||||
hide_when_equal = True
|
||||
|
||||
def html(self):
|
||||
return ' Link: %s' % self
|
||||
|
||||
def tokenize(html, include_hrefs=True):
|
||||
"""
|
||||
Parse the given HTML and returns token objects (words with attached tags).
|
||||
|
||||
This parses only the content of a page; anything in the head is
|
||||
ignored, and the <head> and <body> elements are themselves
|
||||
optional. The content is then parsed by lxml, which ensures the
|
||||
validity of the resulting parsed document (though lxml may make
|
||||
incorrect guesses when the markup is particular bad).
|
||||
|
||||
<ins> and <del> tags are also eliminated from the document, as
|
||||
that gets confusing.
|
||||
|
||||
If include_hrefs is true, then the href attribute of <a> tags is
|
||||
included as a special kind of diffable token."""
|
||||
if etree.iselement(html):
|
||||
body_el = html
|
||||
else:
|
||||
body_el = parse_html(html, cleanup=True)
|
||||
# Then we split the document into text chunks for each tag, word, and end tag:
|
||||
chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs)
|
||||
# Finally re-joining them into token objects:
|
||||
return fixup_chunks(chunks)
|
||||
|
||||
def parse_html(html, cleanup=True):
|
||||
"""
|
||||
Parses an HTML fragment, returning an lxml element. Note that the HTML will be
|
||||
wrapped in a <div> tag that was not in the original document.
|
||||
|
||||
If cleanup is true, make sure there's no <head> or <body>, and get
|
||||
rid of any <ins> and <del> tags.
|
||||
"""
|
||||
if cleanup:
|
||||
# This removes any extra markup or structure like <head>:
|
||||
html = cleanup_html(html)
|
||||
return fragment_fromstring(html, create_parent=True)
|
||||
|
||||
_body_re = re.compile(r'<body.*?>', re.I|re.S)
|
||||
_end_body_re = re.compile(r'</body.*?>', re.I|re.S)
|
||||
_ins_del_re = re.compile(r'</?(ins|del).*?>', re.I|re.S)
|
||||
|
||||
def cleanup_html(html):
|
||||
""" This 'cleans' the HTML, meaning that any page structure is removed
|
||||
(only the contents of <body> are used, if there is any <body).
|
||||
Also <ins> and <del> tags are removed. """
|
||||
match = _body_re.search(html)
|
||||
if match:
|
||||
html = html[match.end():]
|
||||
match = _end_body_re.search(html)
|
||||
if match:
|
||||
html = html[:match.start()]
|
||||
html = _ins_del_re.sub('', html)
|
||||
return html
|
||||
|
||||
|
||||
end_whitespace_re = re.compile(r'[ \t\n\r]$')
|
||||
|
||||
def split_trailing_whitespace(word):
|
||||
"""
|
||||
This function takes a word, such as 'test\n\n' and returns ('test','\n\n')
|
||||
"""
|
||||
stripped_length = len(word.rstrip())
|
||||
return word[0:stripped_length], word[stripped_length:]
|
||||
|
||||
|
||||
def fixup_chunks(chunks):
|
||||
"""
|
||||
This function takes a list of chunks and produces a list of tokens.
|
||||
"""
|
||||
tag_accum = []
|
||||
cur_word = None
|
||||
result = []
|
||||
for chunk in chunks:
|
||||
if isinstance(chunk, tuple):
|
||||
if chunk[0] == 'img':
|
||||
src = chunk[1]
|
||||
tag, trailing_whitespace = split_trailing_whitespace(chunk[2])
|
||||
cur_word = tag_token('img', src, html_repr=tag,
|
||||
pre_tags=tag_accum,
|
||||
trailing_whitespace=trailing_whitespace)
|
||||
tag_accum = []
|
||||
result.append(cur_word)
|
||||
|
||||
elif chunk[0] == 'href':
|
||||
href = chunk[1]
|
||||
cur_word = href_token(href, pre_tags=tag_accum, trailing_whitespace=" ")
|
||||
tag_accum = []
|
||||
result.append(cur_word)
|
||||
continue
|
||||
|
||||
if is_word(chunk):
|
||||
chunk, trailing_whitespace = split_trailing_whitespace(chunk)
|
||||
cur_word = token(chunk, pre_tags=tag_accum, trailing_whitespace=trailing_whitespace)
|
||||
tag_accum = []
|
||||
result.append(cur_word)
|
||||
|
||||
elif is_start_tag(chunk):
|
||||
tag_accum.append(chunk)
|
||||
|
||||
elif is_end_tag(chunk):
|
||||
if tag_accum:
|
||||
tag_accum.append(chunk)
|
||||
else:
|
||||
assert cur_word, (
|
||||
"Weird state, cur_word=%r, result=%r, chunks=%r of %r"
|
||||
% (cur_word, result, chunk, chunks))
|
||||
cur_word.post_tags.append(chunk)
|
||||
else:
|
||||
assert False
|
||||
|
||||
if not result:
|
||||
return [token('', pre_tags=tag_accum)]
|
||||
else:
|
||||
result[-1].post_tags.extend(tag_accum)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# All the tags in HTML that don't require end tags:
|
||||
empty_tags = (
|
||||
'param', 'img', 'area', 'br', 'basefont', 'input',
|
||||
'base', 'meta', 'link', 'col')
|
||||
|
||||
block_level_tags = (
|
||||
'address',
|
||||
'blockquote',
|
||||
'center',
|
||||
'dir',
|
||||
'div',
|
||||
'dl',
|
||||
'fieldset',
|
||||
'form',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'hr',
|
||||
'isindex',
|
||||
'menu',
|
||||
'noframes',
|
||||
'noscript',
|
||||
'ol',
|
||||
'p',
|
||||
'pre',
|
||||
'table',
|
||||
'ul',
|
||||
)
|
||||
|
||||
block_level_container_tags = (
|
||||
'dd',
|
||||
'dt',
|
||||
'frameset',
|
||||
'li',
|
||||
'tbody',
|
||||
'td',
|
||||
'tfoot',
|
||||
'th',
|
||||
'thead',
|
||||
'tr',
|
||||
)
|
||||
|
||||
|
||||
def flatten_el(el, include_hrefs, skip_tag=False):
|
||||
""" Takes an lxml element el, and generates all the text chunks for
|
||||
that tag. Each start tag is a chunk, each word is a chunk, and each
|
||||
end tag is a chunk.
|
||||
|
||||
If skip_tag is true, then the outermost container tag is
|
||||
not returned (just its contents)."""
|
||||
if not skip_tag:
|
||||
if el.tag == 'img':
|
||||
yield ('img', el.get('src'), start_tag(el))
|
||||
else:
|
||||
yield start_tag(el)
|
||||
if el.tag in empty_tags and not el.text and not len(el) and not el.tail:
|
||||
return
|
||||
start_words = split_words(el.text)
|
||||
for word in start_words:
|
||||
yield html_escape(word)
|
||||
for child in el:
|
||||
for item in flatten_el(child, include_hrefs=include_hrefs):
|
||||
yield item
|
||||
if el.tag == 'a' and el.get('href') and include_hrefs:
|
||||
yield ('href', el.get('href'))
|
||||
if not skip_tag:
|
||||
yield end_tag(el)
|
||||
end_words = split_words(el.tail)
|
||||
for word in end_words:
|
||||
yield html_escape(word)
|
||||
|
||||
split_words_re = re.compile(r'\S+(?:\s+|$)', re.U)
|
||||
|
||||
def split_words(text):
|
||||
""" Splits some text into words. Includes trailing whitespace
|
||||
on each word when appropriate. """
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
words = split_words_re.findall(text)
|
||||
return words
|
||||
|
||||
start_whitespace_re = re.compile(r'^[ \t\n\r]')
|
||||
|
||||
def start_tag(el):
|
||||
"""
|
||||
The text representation of the start tag for a tag.
|
||||
"""
|
||||
return '<%s%s>' % (
|
||||
el.tag, ''.join([' %s="%s"' % (name, html_escape(value, True))
|
||||
for name, value in el.attrib.items()]))
|
||||
|
||||
def end_tag(el):
|
||||
""" The text representation of an end tag for a tag. Includes
|
||||
trailing whitespace when appropriate. """
|
||||
if el.tail and start_whitespace_re.search(el.tail):
|
||||
extra = ' '
|
||||
else:
|
||||
extra = ''
|
||||
return '</%s>%s' % (el.tag, extra)
|
||||
|
||||
def is_word(tok):
|
||||
return not tok.startswith('<')
|
||||
|
||||
def is_end_tag(tok):
|
||||
return tok.startswith('</')
|
||||
|
||||
def is_start_tag(tok):
|
||||
return tok.startswith('<') and not tok.startswith('</')
|
||||
|
||||
def fixup_ins_del_tags(html):
|
||||
""" Given an html string, move any <ins> or <del> tags inside of any
|
||||
block-level elements, e.g. transform <ins><p>word</p></ins> to
|
||||
<p><ins>word</ins></p> """
|
||||
doc = parse_html(html, cleanup=False)
|
||||
_fixup_ins_del_tags(doc)
|
||||
html = serialize_html_fragment(doc, skip_outer=True)
|
||||
return html
|
||||
|
||||
def serialize_html_fragment(el, skip_outer=False):
|
||||
""" Serialize a single lxml element as HTML. The serialized form
|
||||
includes the elements tail.
|
||||
|
||||
If skip_outer is true, then don't serialize the outermost tag
|
||||
"""
|
||||
assert not isinstance(el, basestring), (
|
||||
"You should pass in an element, not a string like %r" % el)
|
||||
html = etree.tostring(el, method="html", encoding=_unicode)
|
||||
if skip_outer:
|
||||
# Get rid of the extra starting tag:
|
||||
html = html[html.find('>')+1:]
|
||||
# Get rid of the extra end tag:
|
||||
html = html[:html.rfind('<')]
|
||||
return html.strip()
|
||||
else:
|
||||
return html
|
||||
|
||||
def _fixup_ins_del_tags(doc):
|
||||
"""fixup_ins_del_tags that works on an lxml document in-place
|
||||
"""
|
||||
for tag in ['ins', 'del']:
|
||||
for el in doc.xpath('descendant-or-self::%s' % tag):
|
||||
if not _contains_block_level_tag(el):
|
||||
continue
|
||||
_move_el_inside_block(el, tag=tag)
|
||||
el.drop_tag()
|
||||
#_merge_element_contents(el)
|
||||
|
||||
def _contains_block_level_tag(el):
|
||||
"""True if the element contains any block-level elements, like <p>, <td>, etc.
|
||||
"""
|
||||
if el.tag in block_level_tags or el.tag in block_level_container_tags:
|
||||
return True
|
||||
for child in el:
|
||||
if _contains_block_level_tag(child):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _move_el_inside_block(el, tag):
|
||||
""" helper for _fixup_ins_del_tags; actually takes the <ins> etc tags
|
||||
and moves them inside any block-level tags. """
|
||||
for child in el:
|
||||
if _contains_block_level_tag(child):
|
||||
break
|
||||
else:
|
||||
# No block-level tags in any child
|
||||
children_tag = etree.Element(tag)
|
||||
children_tag.text = el.text
|
||||
el.text = None
|
||||
children_tag.extend(list(el))
|
||||
el[:] = [children_tag]
|
||||
return
|
||||
for child in list(el):
|
||||
if _contains_block_level_tag(child):
|
||||
_move_el_inside_block(child, tag)
|
||||
if child.tail:
|
||||
tail_tag = etree.Element(tag)
|
||||
tail_tag.text = child.tail
|
||||
child.tail = None
|
||||
el.insert(el.index(child)+1, tail_tag)
|
||||
else:
|
||||
child_tag = etree.Element(tag)
|
||||
el.replace(child, child_tag)
|
||||
child_tag.append(child)
|
||||
if el.text:
|
||||
text_tag = etree.Element(tag)
|
||||
text_tag.text = el.text
|
||||
el.text = None
|
||||
el.insert(0, text_tag)
|
||||
|
||||
def _merge_element_contents(el):
|
||||
"""
|
||||
Removes an element, but merges its contents into its place, e.g.,
|
||||
given <p>Hi <i>there!</i></p>, if you remove the <i> element you get
|
||||
<p>Hi there!</p>
|
||||
"""
|
||||
parent = el.getparent()
|
||||
text = el.text or ''
|
||||
if el.tail:
|
||||
if not len(el):
|
||||
text += el.tail
|
||||
else:
|
||||
if el[-1].tail:
|
||||
el[-1].tail += el.tail
|
||||
else:
|
||||
el[-1].tail = el.tail
|
||||
index = parent.index(el)
|
||||
if text:
|
||||
if index == 0:
|
||||
previous = None
|
||||
else:
|
||||
previous = parent[index-1]
|
||||
if previous is None:
|
||||
if parent.text:
|
||||
parent.text += text
|
||||
else:
|
||||
parent.text = text
|
||||
else:
|
||||
if previous.tail:
|
||||
previous.tail += text
|
||||
else:
|
||||
previous.tail = text
|
||||
parent[index:index+1] = el.getchildren()
|
||||
|
||||
class InsensitiveSequenceMatcher(difflib.SequenceMatcher):
|
||||
"""
|
||||
Acts like SequenceMatcher, but tries not to find very small equal
|
||||
blocks amidst large spans of changes
|
||||
"""
|
||||
|
||||
threshold = 2
|
||||
|
||||
def get_matching_blocks(self):
|
||||
size = min(len(self.b), len(self.b))
|
||||
threshold = min(self.threshold, size / 4)
|
||||
actual = difflib.SequenceMatcher.get_matching_blocks(self)
|
||||
return [item for item in actual
|
||||
if item[2] > threshold
|
||||
or not item[2]]
|
||||
|
||||
if __name__ == '__main__':
|
||||
from lxml.html import _diffcommand
|
||||
_diffcommand.main()
|
||||
|
||||
299
.venv/lib/python3.7/site-packages/lxml/html/formfill.py
Normal file
299
.venv/lib/python3.7/site-packages/lxml/html/formfill.py
Normal file
@@ -0,0 +1,299 @@
|
||||
from lxml.etree import XPath, ElementBase
|
||||
from lxml.html import fromstring, XHTML_NAMESPACE
|
||||
from lxml.html import _forms_xpath, _options_xpath, _nons, _transform_result
|
||||
from lxml.html import defs
|
||||
import copy
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# Python 3
|
||||
basestring = str
|
||||
|
||||
__all__ = ['FormNotFound', 'fill_form', 'fill_form_html',
|
||||
'insert_errors', 'insert_errors_html',
|
||||
'DefaultErrorCreator']
|
||||
|
||||
class FormNotFound(LookupError):
|
||||
"""
|
||||
Raised when no form can be found
|
||||
"""
|
||||
|
||||
_form_name_xpath = XPath('descendant-or-self::form[name=$name]|descendant-or-self::x:form[name=$name]', namespaces={'x':XHTML_NAMESPACE})
|
||||
_input_xpath = XPath('|'.join(['descendant-or-self::'+_tag for _tag in ('input','select','textarea','x:input','x:select','x:textarea')]),
|
||||
namespaces={'x':XHTML_NAMESPACE})
|
||||
_label_for_xpath = XPath('//label[@for=$for_id]|//x:label[@for=$for_id]',
|
||||
namespaces={'x':XHTML_NAMESPACE})
|
||||
_name_xpath = XPath('descendant-or-self::*[@name=$name]')
|
||||
|
||||
def fill_form(
|
||||
el,
|
||||
values,
|
||||
form_id=None,
|
||||
form_index=None,
|
||||
):
|
||||
el = _find_form(el, form_id=form_id, form_index=form_index)
|
||||
_fill_form(el, values)
|
||||
|
||||
def fill_form_html(html, values, form_id=None, form_index=None):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
fill_form(doc, values, form_id=form_id, form_index=form_index)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
def _fill_form(el, values):
|
||||
counts = {}
|
||||
if hasattr(values, 'mixed'):
|
||||
# For Paste request parameters
|
||||
values = values.mixed()
|
||||
inputs = _input_xpath(el)
|
||||
for input in inputs:
|
||||
name = input.get('name')
|
||||
if not name:
|
||||
continue
|
||||
if _takes_multiple(input):
|
||||
value = values.get(name, [])
|
||||
if not isinstance(value, (list, tuple)):
|
||||
value = [value]
|
||||
_fill_multiple(input, value)
|
||||
elif name not in values:
|
||||
continue
|
||||
else:
|
||||
index = counts.get(name, 0)
|
||||
counts[name] = index + 1
|
||||
value = values[name]
|
||||
if isinstance(value, (list, tuple)):
|
||||
try:
|
||||
value = value[index]
|
||||
except IndexError:
|
||||
continue
|
||||
elif index > 0:
|
||||
continue
|
||||
_fill_single(input, value)
|
||||
|
||||
def _takes_multiple(input):
|
||||
if _nons(input.tag) == 'select' and input.get('multiple'):
|
||||
# FIXME: multiple="0"?
|
||||
return True
|
||||
type = input.get('type', '').lower()
|
||||
if type in ('radio', 'checkbox'):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _fill_multiple(input, value):
|
||||
type = input.get('type', '').lower()
|
||||
if type == 'checkbox':
|
||||
v = input.get('value')
|
||||
if v is None:
|
||||
if not value:
|
||||
result = False
|
||||
else:
|
||||
result = value[0]
|
||||
if isinstance(value, basestring):
|
||||
# The only valid "on" value for an unnamed checkbox is 'on'
|
||||
result = result == 'on'
|
||||
_check(input, result)
|
||||
else:
|
||||
_check(input, v in value)
|
||||
elif type == 'radio':
|
||||
v = input.get('value')
|
||||
_check(input, v in value)
|
||||
else:
|
||||
assert _nons(input.tag) == 'select'
|
||||
for option in _options_xpath(input):
|
||||
v = option.get('value')
|
||||
if v is None:
|
||||
# This seems to be the default, at least on IE
|
||||
# FIXME: but I'm not sure
|
||||
v = option.text_content()
|
||||
_select(option, v in value)
|
||||
|
||||
def _check(el, check):
|
||||
if check:
|
||||
el.set('checked', '')
|
||||
else:
|
||||
if 'checked' in el.attrib:
|
||||
del el.attrib['checked']
|
||||
|
||||
def _select(el, select):
|
||||
if select:
|
||||
el.set('selected', '')
|
||||
else:
|
||||
if 'selected' in el.attrib:
|
||||
del el.attrib['selected']
|
||||
|
||||
def _fill_single(input, value):
|
||||
if _nons(input.tag) == 'textarea':
|
||||
input.text = value
|
||||
else:
|
||||
input.set('value', value)
|
||||
|
||||
def _find_form(el, form_id=None, form_index=None):
|
||||
if form_id is None and form_index is None:
|
||||
forms = _forms_xpath(el)
|
||||
for form in forms:
|
||||
return form
|
||||
raise FormNotFound(
|
||||
"No forms in page")
|
||||
if form_id is not None:
|
||||
form = el.get_element_by_id(form_id)
|
||||
if form is not None:
|
||||
return form
|
||||
forms = _form_name_xpath(el, name=form_id)
|
||||
if forms:
|
||||
return forms[0]
|
||||
else:
|
||||
raise FormNotFound(
|
||||
"No form with the name or id of %r (forms: %s)"
|
||||
% (id, ', '.join(_find_form_ids(el))))
|
||||
if form_index is not None:
|
||||
forms = _forms_xpath(el)
|
||||
try:
|
||||
return forms[form_index]
|
||||
except IndexError:
|
||||
raise FormNotFound(
|
||||
"There is no form with the index %r (%i forms found)"
|
||||
% (form_index, len(forms)))
|
||||
|
||||
def _find_form_ids(el):
|
||||
forms = _forms_xpath(el)
|
||||
if not forms:
|
||||
yield '(no forms)'
|
||||
return
|
||||
for index, form in enumerate(forms):
|
||||
if form.get('id'):
|
||||
if form.get('name'):
|
||||
yield '%s or %s' % (form.get('id'),
|
||||
form.get('name'))
|
||||
else:
|
||||
yield form.get('id')
|
||||
elif form.get('name'):
|
||||
yield form.get('name')
|
||||
else:
|
||||
yield '(unnamed form %s)' % index
|
||||
|
||||
############################################################
|
||||
## Error filling
|
||||
############################################################
|
||||
|
||||
class DefaultErrorCreator(object):
|
||||
insert_before = True
|
||||
block_inside = True
|
||||
error_container_tag = 'div'
|
||||
error_message_class = 'error-message'
|
||||
error_block_class = 'error-block'
|
||||
default_message = "Invalid"
|
||||
|
||||
def __init__(self, **kw):
|
||||
for name, value in kw.items():
|
||||
if not hasattr(self, name):
|
||||
raise TypeError(
|
||||
"Unexpected keyword argument: %s" % name)
|
||||
setattr(self, name, value)
|
||||
|
||||
def __call__(self, el, is_block, message):
|
||||
error_el = el.makeelement(self.error_container_tag)
|
||||
if self.error_message_class:
|
||||
error_el.set('class', self.error_message_class)
|
||||
if is_block and self.error_block_class:
|
||||
error_el.set('class', error_el.get('class', '')+' '+self.error_block_class)
|
||||
if message is None or message == '':
|
||||
message = self.default_message
|
||||
if isinstance(message, ElementBase):
|
||||
error_el.append(message)
|
||||
else:
|
||||
assert isinstance(message, basestring), (
|
||||
"Bad message; should be a string or element: %r" % message)
|
||||
error_el.text = message or self.default_message
|
||||
if is_block and self.block_inside:
|
||||
if self.insert_before:
|
||||
error_el.tail = el.text
|
||||
el.text = None
|
||||
el.insert(0, error_el)
|
||||
else:
|
||||
el.append(error_el)
|
||||
else:
|
||||
parent = el.getparent()
|
||||
pos = parent.index(el)
|
||||
if self.insert_before:
|
||||
parent.insert(pos, error_el)
|
||||
else:
|
||||
error_el.tail = el.tail
|
||||
el.tail = None
|
||||
parent.insert(pos+1, error_el)
|
||||
|
||||
default_error_creator = DefaultErrorCreator()
|
||||
|
||||
|
||||
def insert_errors(
|
||||
el,
|
||||
errors,
|
||||
form_id=None,
|
||||
form_index=None,
|
||||
error_class="error",
|
||||
error_creator=default_error_creator,
|
||||
):
|
||||
el = _find_form(el, form_id=form_id, form_index=form_index)
|
||||
for name, error in errors.items():
|
||||
if error is None:
|
||||
continue
|
||||
for error_el, message in _find_elements_for_name(el, name, error):
|
||||
assert isinstance(message, (basestring, type(None), ElementBase)), (
|
||||
"Bad message: %r" % message)
|
||||
_insert_error(error_el, message, error_class, error_creator)
|
||||
|
||||
def insert_errors_html(html, values, **kw):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
insert_errors(doc, values, **kw)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
def _insert_error(el, error, error_class, error_creator):
|
||||
if _nons(el.tag) in defs.empty_tags or _nons(el.tag) == 'textarea':
|
||||
is_block = False
|
||||
else:
|
||||
is_block = True
|
||||
if _nons(el.tag) != 'form' and error_class:
|
||||
_add_class(el, error_class)
|
||||
if el.get('id'):
|
||||
labels = _label_for_xpath(el, for_id=el.get('id'))
|
||||
if labels:
|
||||
for label in labels:
|
||||
_add_class(label, error_class)
|
||||
error_creator(el, is_block, error)
|
||||
|
||||
def _add_class(el, class_name):
|
||||
if el.get('class'):
|
||||
el.set('class', el.get('class')+' '+class_name)
|
||||
else:
|
||||
el.set('class', class_name)
|
||||
|
||||
def _find_elements_for_name(form, name, error):
|
||||
if name is None:
|
||||
# An error for the entire form
|
||||
yield form, error
|
||||
return
|
||||
if name.startswith('#'):
|
||||
# By id
|
||||
el = form.get_element_by_id(name[1:])
|
||||
if el is not None:
|
||||
yield el, error
|
||||
return
|
||||
els = _name_xpath(form, name=name)
|
||||
if not els:
|
||||
# FIXME: should this raise an exception?
|
||||
return
|
||||
if not isinstance(error, (list, tuple)):
|
||||
yield els[0], error
|
||||
return
|
||||
# FIXME: if error is longer than els, should it raise an error?
|
||||
for el, err in zip(els, error):
|
||||
if err is None:
|
||||
continue
|
||||
yield el, err
|
||||
260
.venv/lib/python3.7/site-packages/lxml/html/html5parser.py
Normal file
260
.venv/lib/python3.7/site-packages/lxml/html/html5parser.py
Normal file
@@ -0,0 +1,260 @@
|
||||
"""
|
||||
An interface to html5lib that mimics the lxml.html interface.
|
||||
"""
|
||||
import sys
|
||||
import string
|
||||
|
||||
from html5lib import HTMLParser as _HTMLParser
|
||||
from html5lib.treebuilders.etree_lxml import TreeBuilder
|
||||
from lxml import etree
|
||||
from lxml.html import Element, XHTML_NAMESPACE, _contains_block_level_tag
|
||||
|
||||
# python3 compatibility
|
||||
try:
|
||||
_strings = basestring
|
||||
except NameError:
|
||||
_strings = (bytes, str)
|
||||
try:
|
||||
from urllib2 import urlopen
|
||||
except ImportError:
|
||||
from urllib.request import urlopen
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
class HTMLParser(_HTMLParser):
|
||||
"""An html5lib HTML parser with lxml as tree."""
|
||||
|
||||
def __init__(self, strict=False, **kwargs):
|
||||
_HTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
|
||||
|
||||
|
||||
try:
|
||||
from html5lib import XHTMLParser as _XHTMLParser
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
class XHTMLParser(_XHTMLParser):
|
||||
"""An html5lib XHTML Parser with lxml as tree."""
|
||||
|
||||
def __init__(self, strict=False, **kwargs):
|
||||
_XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
|
||||
|
||||
xhtml_parser = XHTMLParser()
|
||||
|
||||
|
||||
def _find_tag(tree, tag):
|
||||
elem = tree.find(tag)
|
||||
if elem is not None:
|
||||
return elem
|
||||
return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag))
|
||||
|
||||
|
||||
def document_fromstring(html, guess_charset=None, parser=None):
|
||||
"""
|
||||
Parse a whole document into a string.
|
||||
|
||||
If `guess_charset` is true, or if the input is not Unicode but a
|
||||
byte string, the `chardet` library will perform charset guessing
|
||||
on the string.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
|
||||
if parser is None:
|
||||
parser = html_parser
|
||||
|
||||
options = {}
|
||||
if guess_charset is None and isinstance(html, bytes):
|
||||
# html5lib does not accept useChardet as an argument, if it
|
||||
# detected the html argument would produce unicode objects.
|
||||
guess_charset = True
|
||||
if guess_charset is not None:
|
||||
options['useChardet'] = guess_charset
|
||||
return parser.parse(html, **options).getroot()
|
||||
|
||||
|
||||
def fragments_fromstring(html, no_leading_text=False,
|
||||
guess_charset=None, parser=None):
|
||||
"""Parses several HTML elements, returning a list of elements.
|
||||
|
||||
The first item in the list may be a string. If no_leading_text is true,
|
||||
then it will be an error if there is leading text, and it will always be
|
||||
a list of only elements.
|
||||
|
||||
If `guess_charset` is true, the `chardet` library will perform charset
|
||||
guessing on the string.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
|
||||
if parser is None:
|
||||
parser = html_parser
|
||||
|
||||
options = {}
|
||||
if guess_charset is None and isinstance(html, bytes):
|
||||
# html5lib does not accept useChardet as an argument, if it
|
||||
# detected the html argument would produce unicode objects.
|
||||
guess_charset = False
|
||||
if guess_charset is not None:
|
||||
options['useChardet'] = guess_charset
|
||||
children = parser.parseFragment(html, 'div', **options)
|
||||
if children and isinstance(children[0], _strings):
|
||||
if no_leading_text:
|
||||
if children[0].strip():
|
||||
raise etree.ParserError('There is leading text: %r' %
|
||||
children[0])
|
||||
del children[0]
|
||||
return children
|
||||
|
||||
|
||||
def fragment_fromstring(html, create_parent=False,
|
||||
guess_charset=None, parser=None):
|
||||
"""Parses a single HTML element; it is an error if there is more than
|
||||
one element, or if anything but whitespace precedes or follows the
|
||||
element.
|
||||
|
||||
If 'create_parent' is true (or is a tag name) then a parent node
|
||||
will be created to encapsulate the HTML in a single element. In
|
||||
this case, leading or trailing text is allowed.
|
||||
|
||||
If `guess_charset` is true, the `chardet` library will perform charset
|
||||
guessing on the string.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
|
||||
accept_leading_text = bool(create_parent)
|
||||
|
||||
elements = fragments_fromstring(
|
||||
html, guess_charset=guess_charset, parser=parser,
|
||||
no_leading_text=not accept_leading_text)
|
||||
|
||||
if create_parent:
|
||||
if not isinstance(create_parent, _strings):
|
||||
create_parent = 'div'
|
||||
new_root = Element(create_parent)
|
||||
if elements:
|
||||
if isinstance(elements[0], _strings):
|
||||
new_root.text = elements[0]
|
||||
del elements[0]
|
||||
new_root.extend(elements)
|
||||
return new_root
|
||||
|
||||
if not elements:
|
||||
raise etree.ParserError('No elements found')
|
||||
if len(elements) > 1:
|
||||
raise etree.ParserError('Multiple elements found')
|
||||
result = elements[0]
|
||||
if result.tail and result.tail.strip():
|
||||
raise etree.ParserError('Element followed by text: %r' % result.tail)
|
||||
result.tail = None
|
||||
return result
|
||||
|
||||
|
||||
def fromstring(html, guess_charset=None, parser=None):
|
||||
"""Parse the html, returning a single element/document.
|
||||
|
||||
This tries to minimally parse the chunk of text, without knowing if it
|
||||
is a fragment or a document.
|
||||
|
||||
'base_url' will set the document's base_url attribute (and the tree's
|
||||
docinfo.URL)
|
||||
|
||||
If `guess_charset` is true, or if the input is not Unicode but a
|
||||
byte string, the `chardet` library will perform charset guessing
|
||||
on the string.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
doc = document_fromstring(html, parser=parser,
|
||||
guess_charset=guess_charset)
|
||||
|
||||
# document starts with doctype or <html>, full document!
|
||||
start = html[:50]
|
||||
if isinstance(start, bytes):
|
||||
# Allow text comparison in python3.
|
||||
# Decode as ascii, that also covers latin-1 and utf-8 for the
|
||||
# characters we need.
|
||||
start = start.decode('ascii', 'replace')
|
||||
|
||||
start = start.lstrip().lower()
|
||||
if start.startswith('<html') or start.startswith('<!doctype'):
|
||||
return doc
|
||||
|
||||
head = _find_tag(doc, 'head')
|
||||
|
||||
# if the head is not empty we have a full document
|
||||
if len(head):
|
||||
return doc
|
||||
|
||||
body = _find_tag(doc, 'body')
|
||||
|
||||
# The body has just one element, so it was probably a single
|
||||
# element passed in
|
||||
if (len(body) == 1 and (not body.text or not body.text.strip())
|
||||
and (not body[-1].tail or not body[-1].tail.strip())):
|
||||
return body[0]
|
||||
|
||||
# Now we have a body which represents a bunch of tags which have the
|
||||
# content that was passed in. We will create a fake container, which
|
||||
# is the body tag, except <body> implies too much structure.
|
||||
if _contains_block_level_tag(body):
|
||||
body.tag = 'div'
|
||||
else:
|
||||
body.tag = 'span'
|
||||
return body
|
||||
|
||||
|
||||
def parse(filename_url_or_file, guess_charset=None, parser=None):
|
||||
"""Parse a filename, URL, or file-like object into an HTML document
|
||||
tree. Note: this returns a tree, not an element. Use
|
||||
``parse(...).getroot()`` to get the document root.
|
||||
|
||||
If ``guess_charset`` is true, the ``useChardet`` option is passed into
|
||||
html5lib to enable character detection. This option is on by default
|
||||
when parsing from URLs, off by default when parsing from file(-like)
|
||||
objects (which tend to return Unicode more often than not), and on by
|
||||
default when parsing from a file path (which is read in binary mode).
|
||||
"""
|
||||
if parser is None:
|
||||
parser = html_parser
|
||||
if not isinstance(filename_url_or_file, _strings):
|
||||
fp = filename_url_or_file
|
||||
if guess_charset is None:
|
||||
# assume that file-like objects return Unicode more often than bytes
|
||||
guess_charset = False
|
||||
elif _looks_like_url(filename_url_or_file):
|
||||
fp = urlopen(filename_url_or_file)
|
||||
if guess_charset is None:
|
||||
# assume that URLs return bytes
|
||||
guess_charset = True
|
||||
else:
|
||||
fp = open(filename_url_or_file, 'rb')
|
||||
if guess_charset is None:
|
||||
guess_charset = True
|
||||
|
||||
options = {}
|
||||
# html5lib does not accept useChardet as an argument, if it
|
||||
# detected the html argument would produce unicode objects.
|
||||
if guess_charset:
|
||||
options['useChardet'] = guess_charset
|
||||
return parser.parse(fp, **options)
|
||||
|
||||
|
||||
def _looks_like_url(str):
|
||||
scheme = urlparse(str)[0]
|
||||
if not scheme:
|
||||
return False
|
||||
elif (sys.platform == 'win32' and
|
||||
scheme in string.ascii_letters
|
||||
and len(scheme) == 1):
|
||||
# looks like a 'normal' absolute path
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
html_parser = HTMLParser()
|
||||
314
.venv/lib/python3.7/site-packages/lxml/html/soupparser.py
Normal file
314
.venv/lib/python3.7/site-packages/lxml/html/soupparser.py
Normal file
@@ -0,0 +1,314 @@
|
||||
"""External interface to the BeautifulSoup HTML parser.
|
||||
"""
|
||||
|
||||
__all__ = ["fromstring", "parse", "convert_tree"]
|
||||
|
||||
import re
|
||||
from lxml import etree, html
|
||||
|
||||
try:
|
||||
from bs4 import (
|
||||
BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
|
||||
Declaration, Doctype)
|
||||
_DECLARATION_OR_DOCTYPE = (Declaration, Doctype)
|
||||
except ImportError:
|
||||
from BeautifulSoup import (
|
||||
BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
|
||||
Declaration)
|
||||
_DECLARATION_OR_DOCTYPE = Declaration
|
||||
|
||||
|
||||
def fromstring(data, beautifulsoup=None, makeelement=None, **bsargs):
|
||||
"""Parse a string of HTML data into an Element tree using the
|
||||
BeautifulSoup parser.
|
||||
|
||||
Returns the root ``<html>`` Element of the tree.
|
||||
|
||||
You can pass a different BeautifulSoup parser through the
|
||||
`beautifulsoup` keyword, and a diffent Element factory function
|
||||
through the `makeelement` keyword. By default, the standard
|
||||
``BeautifulSoup`` class and the default factory of `lxml.html` are
|
||||
used.
|
||||
"""
|
||||
return _parse(data, beautifulsoup, makeelement, **bsargs)
|
||||
|
||||
|
||||
def parse(file, beautifulsoup=None, makeelement=None, **bsargs):
|
||||
"""Parse a file into an ElemenTree using the BeautifulSoup parser.
|
||||
|
||||
You can pass a different BeautifulSoup parser through the
|
||||
`beautifulsoup` keyword, and a diffent Element factory function
|
||||
through the `makeelement` keyword. By default, the standard
|
||||
``BeautifulSoup`` class and the default factory of `lxml.html` are
|
||||
used.
|
||||
"""
|
||||
if not hasattr(file, 'read'):
|
||||
file = open(file)
|
||||
root = _parse(file, beautifulsoup, makeelement, **bsargs)
|
||||
return etree.ElementTree(root)
|
||||
|
||||
|
||||
def convert_tree(beautiful_soup_tree, makeelement=None):
|
||||
"""Convert a BeautifulSoup tree to a list of Element trees.
|
||||
|
||||
Returns a list instead of a single root Element to support
|
||||
HTML-like soup with more than one root element.
|
||||
|
||||
You can pass a different Element factory through the `makeelement`
|
||||
keyword.
|
||||
"""
|
||||
root = _convert_tree(beautiful_soup_tree, makeelement)
|
||||
children = root.getchildren()
|
||||
for child in children:
|
||||
root.remove(child)
|
||||
return children
|
||||
|
||||
|
||||
# helpers
|
||||
|
||||
def _parse(source, beautifulsoup, makeelement, **bsargs):
|
||||
if beautifulsoup is None:
|
||||
beautifulsoup = BeautifulSoup
|
||||
if hasattr(beautifulsoup, "HTML_ENTITIES"): # bs3
|
||||
if 'convertEntities' not in bsargs:
|
||||
bsargs['convertEntities'] = 'html'
|
||||
if hasattr(beautifulsoup, "DEFAULT_BUILDER_FEATURES"): # bs4
|
||||
if 'features' not in bsargs:
|
||||
bsargs['features'] = 'html.parser' # use Python html parser
|
||||
tree = beautifulsoup(source, **bsargs)
|
||||
root = _convert_tree(tree, makeelement)
|
||||
# from ET: wrap the document in a html root element, if necessary
|
||||
if len(root) == 1 and root[0].tag == "html":
|
||||
return root[0]
|
||||
root.tag = "html"
|
||||
return root
|
||||
|
||||
|
||||
_parse_doctype_declaration = re.compile(
|
||||
r'(?:\s|[<!])*DOCTYPE\s*HTML'
|
||||
r'(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?'
|
||||
r'(?:\s+(\'[^\']*\'|"[^"]*"))?',
|
||||
re.IGNORECASE).match
|
||||
|
||||
|
||||
class _PseudoTag:
|
||||
# Minimal imitation of BeautifulSoup.Tag
|
||||
def __init__(self, contents):
|
||||
self.name = 'html'
|
||||
self.attrs = []
|
||||
self.contents = contents
|
||||
|
||||
def __iter__(self):
|
||||
return self.contents.__iter__()
|
||||
|
||||
|
||||
def _convert_tree(beautiful_soup_tree, makeelement):
|
||||
if makeelement is None:
|
||||
makeelement = html.html_parser.makeelement
|
||||
|
||||
# Split the tree into three parts:
|
||||
# i) everything before the root element: document type
|
||||
# declaration, comments, processing instructions, whitespace
|
||||
# ii) the root(s),
|
||||
# iii) everything after the root: comments, processing
|
||||
# instructions, whitespace
|
||||
first_element_idx = last_element_idx = None
|
||||
html_root = declaration = None
|
||||
for i, e in enumerate(beautiful_soup_tree):
|
||||
if isinstance(e, Tag):
|
||||
if first_element_idx is None:
|
||||
first_element_idx = i
|
||||
last_element_idx = i
|
||||
if html_root is None and e.name and e.name.lower() == 'html':
|
||||
html_root = e
|
||||
elif declaration is None and isinstance(e, _DECLARATION_OR_DOCTYPE):
|
||||
declaration = e
|
||||
|
||||
# For a nice, well-formatted document, the variable roots below is
|
||||
# a list consisting of a single <html> element. However, the document
|
||||
# may be a soup like '<meta><head><title>Hello</head><body>Hi
|
||||
# all<\p>'. In this example roots is a list containing meta, head
|
||||
# and body elements.
|
||||
if first_element_idx is None:
|
||||
pre_root = post_root = []
|
||||
roots = beautiful_soup_tree.contents
|
||||
else:
|
||||
pre_root = beautiful_soup_tree.contents[:first_element_idx]
|
||||
roots = beautiful_soup_tree.contents[first_element_idx:last_element_idx+1]
|
||||
post_root = beautiful_soup_tree.contents[last_element_idx+1:]
|
||||
|
||||
# Reorganize so that there is one <html> root...
|
||||
if html_root is not None:
|
||||
# ... use existing one if possible, ...
|
||||
i = roots.index(html_root)
|
||||
html_root.contents = roots[:i] + html_root.contents + roots[i+1:]
|
||||
else:
|
||||
# ... otherwise create a new one.
|
||||
html_root = _PseudoTag(roots)
|
||||
|
||||
convert_node = _init_node_converters(makeelement)
|
||||
|
||||
# Process pre_root
|
||||
res_root = convert_node(html_root)
|
||||
prev = res_root
|
||||
for e in reversed(pre_root):
|
||||
converted = convert_node(e)
|
||||
if converted is not None:
|
||||
prev.addprevious(converted)
|
||||
prev = converted
|
||||
|
||||
# ditto for post_root
|
||||
prev = res_root
|
||||
for e in post_root:
|
||||
converted = convert_node(e)
|
||||
if converted is not None:
|
||||
prev.addnext(converted)
|
||||
prev = converted
|
||||
|
||||
if declaration is not None:
|
||||
try:
|
||||
# bs4 provides full Doctype string
|
||||
doctype_string = declaration.output_ready()
|
||||
except AttributeError:
|
||||
doctype_string = declaration.string
|
||||
|
||||
match = _parse_doctype_declaration(doctype_string)
|
||||
if not match:
|
||||
# Something is wrong if we end up in here. Since soupparser should
|
||||
# tolerate errors, do not raise Exception, just let it pass.
|
||||
pass
|
||||
else:
|
||||
external_id, sys_uri = match.groups()
|
||||
docinfo = res_root.getroottree().docinfo
|
||||
# strip quotes and update DOCTYPE values (any of None, '', '...')
|
||||
docinfo.public_id = external_id and external_id[1:-1]
|
||||
docinfo.system_url = sys_uri and sys_uri[1:-1]
|
||||
|
||||
return res_root
|
||||
|
||||
|
||||
def _init_node_converters(makeelement):
|
||||
converters = {}
|
||||
ordered_node_types = []
|
||||
|
||||
def converter(*types):
|
||||
def add(handler):
|
||||
for t in types:
|
||||
converters[t] = handler
|
||||
ordered_node_types.append(t)
|
||||
return handler
|
||||
return add
|
||||
|
||||
def find_best_converter(node):
|
||||
for t in ordered_node_types:
|
||||
if isinstance(node, t):
|
||||
return converters[t]
|
||||
return None
|
||||
|
||||
def convert_node(bs_node, parent=None):
|
||||
# duplicated in convert_tag() below
|
||||
try:
|
||||
handler = converters[type(bs_node)]
|
||||
except KeyError:
|
||||
handler = converters[type(bs_node)] = find_best_converter(bs_node)
|
||||
if handler is None:
|
||||
return None
|
||||
return handler(bs_node, parent)
|
||||
|
||||
def map_attrs(bs_attrs):
|
||||
if isinstance(bs_attrs, dict): # bs4
|
||||
attribs = {}
|
||||
for k, v in bs_attrs.items():
|
||||
if isinstance(v, list):
|
||||
v = " ".join(v)
|
||||
attribs[k] = unescape(v)
|
||||
else:
|
||||
attribs = dict((k, unescape(v)) for k, v in bs_attrs)
|
||||
return attribs
|
||||
|
||||
def append_text(parent, text):
|
||||
if len(parent) == 0:
|
||||
parent.text = (parent.text or '') + text
|
||||
else:
|
||||
parent[-1].tail = (parent[-1].tail or '') + text
|
||||
|
||||
# converters are tried in order of their definition
|
||||
|
||||
@converter(Tag, _PseudoTag)
|
||||
def convert_tag(bs_node, parent):
|
||||
attrs = bs_node.attrs
|
||||
if parent is not None:
|
||||
attribs = map_attrs(attrs) if attrs else None
|
||||
res = etree.SubElement(parent, bs_node.name, attrib=attribs)
|
||||
else:
|
||||
attribs = map_attrs(attrs) if attrs else {}
|
||||
res = makeelement(bs_node.name, attrib=attribs)
|
||||
|
||||
for child in bs_node:
|
||||
# avoid double recursion by inlining convert_node(), see above
|
||||
try:
|
||||
handler = converters[type(child)]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if handler is not None:
|
||||
handler(child, res)
|
||||
continue
|
||||
convert_node(child, res)
|
||||
return res
|
||||
|
||||
@converter(Comment)
|
||||
def convert_comment(bs_node, parent):
|
||||
res = html.HtmlComment(bs_node)
|
||||
if parent is not None:
|
||||
parent.append(res)
|
||||
return res
|
||||
|
||||
@converter(ProcessingInstruction)
|
||||
def convert_pi(bs_node, parent):
|
||||
if bs_node.endswith('?'):
|
||||
# The PI is of XML style (<?as df?>) but BeautifulSoup
|
||||
# interpreted it as being SGML style (<?as df>). Fix.
|
||||
bs_node = bs_node[:-1]
|
||||
res = etree.ProcessingInstruction(*bs_node.split(' ', 1))
|
||||
if parent is not None:
|
||||
parent.append(res)
|
||||
return res
|
||||
|
||||
@converter(NavigableString)
|
||||
def convert_text(bs_node, parent):
|
||||
if parent is not None:
|
||||
append_text(parent, unescape(bs_node))
|
||||
return None
|
||||
|
||||
return convert_node
|
||||
|
||||
|
||||
# copied from ET's ElementSoup
|
||||
|
||||
try:
|
||||
from html.entities import name2codepoint # Python 3
|
||||
except ImportError:
|
||||
from htmlentitydefs import name2codepoint
|
||||
|
||||
|
||||
handle_entities = re.compile(r"&(\w+);").sub
|
||||
|
||||
|
||||
try:
|
||||
unichr
|
||||
except NameError:
|
||||
# Python 3
|
||||
unichr = chr
|
||||
|
||||
|
||||
def unescape(string):
|
||||
if not string:
|
||||
return ''
|
||||
# work around oddities in BeautifulSoup's entity handling
|
||||
def unescape_entity(m):
|
||||
try:
|
||||
return unichr(name2codepoint[m.group(1)])
|
||||
except KeyError:
|
||||
return m.group(0) # use as is
|
||||
return handle_entities(unescape_entity, string)
|
||||
13
.venv/lib/python3.7/site-packages/lxml/html/usedoctest.py
Normal file
13
.venv/lib/python3.7/site-packages/lxml/html/usedoctest.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""Doctest module for HTML comparison.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> import lxml.html.usedoctest
|
||||
>>> # now do your HTML doctests ...
|
||||
|
||||
See `lxml.doctestcompare`.
|
||||
"""
|
||||
|
||||
from lxml import doctestcompare
|
||||
|
||||
doctestcompare.temp_install(html=True, del_module=__name__)
|
||||
Binary file not shown.
26
.venv/lib/python3.7/site-packages/lxml/includes/c14n.pxd
Normal file
26
.venv/lib/python3.7/site-packages/lxml/includes/c14n.pxd
Normal file
@@ -0,0 +1,26 @@
|
||||
from lxml.includes.tree cimport xmlDoc, xmlOutputBuffer, xmlChar
|
||||
from lxml.includes.xpath cimport xmlNodeSet
|
||||
|
||||
cdef extern from "libxml/c14n.h":
|
||||
cdef int xmlC14NDocDumpMemory(xmlDoc* doc,
|
||||
xmlNodeSet* nodes,
|
||||
int exclusive,
|
||||
xmlChar** inclusive_ns_prefixes,
|
||||
int with_comments,
|
||||
xmlChar** doc_txt_ptr) nogil
|
||||
|
||||
cdef int xmlC14NDocSave(xmlDoc* doc,
|
||||
xmlNodeSet* nodes,
|
||||
int exclusive,
|
||||
xmlChar** inclusive_ns_prefixes,
|
||||
int with_comments,
|
||||
char* filename,
|
||||
int compression) nogil
|
||||
|
||||
cdef int xmlC14NDocSaveTo(xmlDoc* doc,
|
||||
xmlNodeSet* nodes,
|
||||
int exclusive,
|
||||
xmlChar** inclusive_ns_prefixes,
|
||||
int with_comments,
|
||||
xmlOutputBuffer* buffer) nogil
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
cdef extern from "etree_defs.h":
|
||||
cdef bint ENABLE_THREADING
|
||||
cdef bint ENABLE_SCHEMATRON
|
||||
18
.venv/lib/python3.7/site-packages/lxml/includes/dtdvalid.pxd
Normal file
18
.venv/lib/python3.7/site-packages/lxml/includes/dtdvalid.pxd
Normal file
@@ -0,0 +1,18 @@
|
||||
from lxml.includes cimport tree
|
||||
from lxml.includes.tree cimport xmlDoc, xmlDtd
|
||||
|
||||
cdef extern from "libxml/valid.h" nogil:
|
||||
ctypedef void (*xmlValidityErrorFunc)(void * ctx, const char * msg, ...)
|
||||
ctypedef void (*xmlValidityWarningFunc)(void * ctx, const char * msg, ...)
|
||||
|
||||
ctypedef struct xmlValidCtxt:
|
||||
void *userData
|
||||
xmlValidityErrorFunc error
|
||||
xmlValidityWarningFunc warning
|
||||
|
||||
cdef xmlValidCtxt* xmlNewValidCtxt()
|
||||
cdef void xmlFreeValidCtxt(xmlValidCtxt* cur)
|
||||
|
||||
cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd)
|
||||
cdef tree.xmlElement* xmlGetDtdElementDesc(
|
||||
xmlDtd* dtd, tree.const_xmlChar* name)
|
||||
427
.venv/lib/python3.7/site-packages/lxml/includes/etree_defs.h
Normal file
427
.venv/lib/python3.7/site-packages/lxml/includes/etree_defs.h
Normal file
@@ -0,0 +1,427 @@
|
||||
#ifndef HAS_ETREE_DEFS_H
|
||||
#define HAS_ETREE_DEFS_H
|
||||
|
||||
/* quick check for Python/libxml2/libxslt devel setup */
|
||||
#include "Python.h"
|
||||
#ifndef PY_VERSION_HEX
|
||||
# error the development package of Python (header files etc.) is not installed correctly
|
||||
#else
|
||||
# if PY_VERSION_HEX < 0x02070000 || PY_MAJOR_VERSION >= 3 && PY_VERSION_HEX < 0x03050000
|
||||
# error this version of lxml requires Python 2.7, 3.5 or later
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "libxml/xmlversion.h"
|
||||
#ifndef LIBXML_VERSION
|
||||
# error the development package of libxml2 (header files etc.) is not installed correctly
|
||||
#else
|
||||
#if LIBXML_VERSION < 20700
|
||||
# error minimum required version of libxml2 is 2.7.0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "libxslt/xsltconfig.h"
|
||||
#ifndef LIBXSLT_VERSION
|
||||
# error the development package of libxslt (header files etc.) is not installed correctly
|
||||
#else
|
||||
#if LIBXSLT_VERSION < 10123
|
||||
# error minimum required version of libxslt is 1.1.23
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* v_arg functions */
|
||||
#define va_int(ap) va_arg(ap, int)
|
||||
#define va_charptr(ap) va_arg(ap, char *)
|
||||
|
||||
#ifdef PYPY_VERSION
|
||||
# define IS_PYPY 1
|
||||
#else
|
||||
# define IS_PYPY 0
|
||||
#endif
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
# define IS_PYTHON2 0 /* prefer for special casing Python 2.x */
|
||||
# define IS_PYTHON3 1 /* avoid */
|
||||
#else
|
||||
# define IS_PYTHON2 1
|
||||
# define IS_PYTHON3 0
|
||||
#endif
|
||||
|
||||
#if IS_PYTHON2
|
||||
#ifndef LXML_UNICODE_STRINGS
|
||||
#define LXML_UNICODE_STRINGS 0
|
||||
#endif
|
||||
#else
|
||||
#undef LXML_UNICODE_STRINGS
|
||||
#define LXML_UNICODE_STRINGS 1
|
||||
#endif
|
||||
|
||||
#if !IS_PYPY
|
||||
# define PyWeakref_LockObject(obj) (NULL)
|
||||
#endif
|
||||
|
||||
/* Threading is not currently supported by PyPy */
|
||||
#if IS_PYPY
|
||||
# ifndef WITHOUT_THREADING
|
||||
# define WITHOUT_THREADING
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if IS_PYPY
|
||||
# undef PyFile_AsFile
|
||||
# define PyFile_AsFile(o) (NULL)
|
||||
# undef PyByteArray_Check
|
||||
# define PyByteArray_Check(o) (0)
|
||||
#elif !IS_PYTHON2
|
||||
/* Python 3+ doesn't have PyFile_*() anymore */
|
||||
# define PyFile_AsFile(o) (NULL)
|
||||
#endif
|
||||
|
||||
#if IS_PYPY
|
||||
# ifndef PyUnicode_FromFormat
|
||||
# define PyUnicode_FromFormat PyString_FromFormat
|
||||
# endif
|
||||
# if !IS_PYTHON2 && !defined(PyBytes_FromFormat)
|
||||
# ifdef PyString_FromFormat
|
||||
# define PyBytes_FromFormat PyString_FromFormat
|
||||
# else
|
||||
#include <stdarg.h>
|
||||
static PyObject* PyBytes_FromFormat(const char* format, ...) {
|
||||
PyObject *string;
|
||||
va_list vargs;
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
va_start(vargs, format);
|
||||
#else
|
||||
va_start(vargs);
|
||||
#endif
|
||||
string = PyUnicode_FromFormatV(format, vargs);
|
||||
va_end(vargs);
|
||||
if (string && PyUnicode_Check(string)) {
|
||||
PyObject *bstring = PyUnicode_AsUTF8String(string);
|
||||
Py_DECREF(string);
|
||||
string = bstring;
|
||||
}
|
||||
if (string && !PyBytes_CheckExact(string)) {
|
||||
Py_DECREF(string);
|
||||
string = NULL;
|
||||
PyErr_SetString(PyExc_TypeError, "String formatting and encoding failed to return bytes object");
|
||||
}
|
||||
return string;
|
||||
}
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* PySlice_GetIndicesEx() has wrong signature in Py<=3.1 */
|
||||
#if PY_VERSION_HEX >= 0x03020000
|
||||
# define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(o, l, b, e, s, sl)
|
||||
#else
|
||||
# define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(((PySliceObject*)o), l, b, e, s, sl)
|
||||
#endif
|
||||
|
||||
#if PY_VERSION_HEX >= 0x030B00A1
|
||||
/* Python 3.12 doesn't have wstr Unicode strings any more. */
|
||||
#undef PyUnicode_GET_DATA_SIZE
|
||||
#define PyUnicode_GET_DATA_SIZE(ustr) (0)
|
||||
#undef PyUnicode_AS_DATA
|
||||
#define PyUnicode_AS_DATA(ustr) (NULL)
|
||||
#undef PyUnicode_IS_READY
|
||||
#define PyUnicode_IS_READY(ustr) (1)
|
||||
#endif
|
||||
|
||||
#ifdef WITHOUT_THREADING
|
||||
# undef PyEval_SaveThread
|
||||
# define PyEval_SaveThread() (NULL)
|
||||
# undef PyEval_RestoreThread
|
||||
# define PyEval_RestoreThread(state) if (state); else {}
|
||||
# undef PyGILState_Ensure
|
||||
# define PyGILState_Ensure() (PyGILState_UNLOCKED)
|
||||
# undef PyGILState_Release
|
||||
# define PyGILState_Release(state) if (state); else {}
|
||||
# undef Py_UNBLOCK_THREADS
|
||||
# define Py_UNBLOCK_THREADS _save = NULL;
|
||||
# undef Py_BLOCK_THREADS
|
||||
# define Py_BLOCK_THREADS if (_save); else {}
|
||||
#endif
|
||||
|
||||
#ifdef WITHOUT_THREADING
|
||||
# define ENABLE_THREADING 0
|
||||
#else
|
||||
# define ENABLE_THREADING 1
|
||||
#endif
|
||||
|
||||
#if LIBXML_VERSION < 20704
|
||||
/* FIXME: hack to make new error reporting compile in old libxml2 versions */
|
||||
# define xmlStructuredErrorContext NULL
|
||||
# define xmlXIncludeProcessTreeFlagsData(n,o,d) xmlXIncludeProcessTreeFlags(n,o)
|
||||
#endif
|
||||
|
||||
/* schematron was added in libxml2 2.6.21 */
|
||||
#ifdef LIBXML_SCHEMATRON_ENABLED
|
||||
# define ENABLE_SCHEMATRON 1
|
||||
#else
|
||||
# define ENABLE_SCHEMATRON 0
|
||||
# define XML_SCHEMATRON_OUT_QUIET 0
|
||||
# define XML_SCHEMATRON_OUT_XML 0
|
||||
# define XML_SCHEMATRON_OUT_ERROR 0
|
||||
typedef void xmlSchematron;
|
||||
typedef void xmlSchematronParserCtxt;
|
||||
typedef void xmlSchematronValidCtxt;
|
||||
# define xmlSchematronNewDocParserCtxt(doc) NULL
|
||||
# define xmlSchematronNewParserCtxt(file) NULL
|
||||
# define xmlSchematronParse(ctxt) NULL
|
||||
# define xmlSchematronFreeParserCtxt(ctxt)
|
||||
# define xmlSchematronFree(schema)
|
||||
# define xmlSchematronNewValidCtxt(schema, options) NULL
|
||||
# define xmlSchematronValidateDoc(ctxt, doc) 0
|
||||
# define xmlSchematronFreeValidCtxt(ctxt)
|
||||
# define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data)
|
||||
#endif
|
||||
|
||||
#if LIBXML_VERSION < 20708
|
||||
# define HTML_PARSE_NODEFDTD 4
|
||||
#endif
|
||||
#if LIBXML_VERSION < 20900
|
||||
# define XML_PARSE_BIG_LINES 4194304
|
||||
#endif
|
||||
|
||||
#include "libxml/tree.h"
|
||||
#ifndef LIBXML2_NEW_BUFFER
|
||||
typedef xmlBuffer xmlBuf;
|
||||
# define xmlBufContent(buf) xmlBufferContent(buf)
|
||||
# define xmlBufUse(buf) xmlBufferLength(buf)
|
||||
#endif
|
||||
|
||||
/* libexslt 1.1.25+ support EXSLT functions in XPath */
|
||||
#if LIBXSLT_VERSION < 10125
|
||||
#define exsltDateXpathCtxtRegister(ctxt, prefix)
|
||||
#define exsltSetsXpathCtxtRegister(ctxt, prefix)
|
||||
#define exsltMathXpathCtxtRegister(ctxt, prefix)
|
||||
#define exsltStrXpathCtxtRegister(ctxt, prefix)
|
||||
#endif
|
||||
|
||||
#define LXML_GET_XSLT_ENCODING(result_var, style) XSLT_GET_IMPORT_PTR(result_var, style, encoding)
|
||||
|
||||
/* work around MSDEV 6.0 */
|
||||
#if (_MSC_VER == 1200) && (WINVER < 0x0500)
|
||||
long _ftol( double ); //defined by VC6 C libs
|
||||
long _ftol2( double dblSource ) { return _ftol( dblSource ); }
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
/* Test for GCC > 2.95 */
|
||||
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
|
||||
#define unlikely_condition(x) __builtin_expect((x), 0)
|
||||
#else /* __GNUC__ > 2 ... */
|
||||
#define unlikely_condition(x) (x)
|
||||
#endif /* __GNUC__ > 2 ... */
|
||||
#else /* __GNUC__ */
|
||||
#define unlikely_condition(x) (x)
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#ifndef Py_TYPE
|
||||
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
|
||||
#endif
|
||||
|
||||
#define PY_NEW(T) \
|
||||
(((PyTypeObject*)(T))->tp_new( \
|
||||
(PyTypeObject*)(T), __pyx_empty_tuple, NULL))
|
||||
|
||||
#define _fqtypename(o) ((Py_TYPE(o))->tp_name)
|
||||
|
||||
#define lxml_malloc(count, item_size) \
|
||||
(unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
|
||||
(PyMem_Malloc((count) * item_size)))
|
||||
|
||||
#define lxml_realloc(mem, count, item_size) \
|
||||
(unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
|
||||
(PyMem_Realloc(mem, (count) * item_size)))
|
||||
|
||||
#define lxml_free(mem) PyMem_Free(mem)
|
||||
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
#define _isString(obj) (PyString_CheckExact(obj) || \
|
||||
PyUnicode_CheckExact(obj) || \
|
||||
PyType_IsSubtype(Py_TYPE(obj), &PyBaseString_Type))
|
||||
#else
|
||||
/* builtin subtype type checks are almost as fast as exact checks in Py2.7+
|
||||
* and Unicode is more common in Py3 */
|
||||
#define _isString(obj) (PyUnicode_Check(obj) || PyBytes_Check(obj))
|
||||
#endif
|
||||
|
||||
#if PY_VERSION_HEX >= 0x03060000
|
||||
#define lxml_PyOS_FSPath(obj) (PyOS_FSPath(obj))
|
||||
#else
|
||||
#define lxml_PyOS_FSPath(obj) (NULL)
|
||||
#endif
|
||||
|
||||
#define _isElement(c_node) \
|
||||
(((c_node)->type == XML_ELEMENT_NODE) || \
|
||||
((c_node)->type == XML_COMMENT_NODE) || \
|
||||
((c_node)->type == XML_ENTITY_REF_NODE) || \
|
||||
((c_node)->type == XML_PI_NODE))
|
||||
|
||||
#define _isElementOrXInclude(c_node) \
|
||||
(_isElement(c_node) || \
|
||||
((c_node)->type == XML_XINCLUDE_START) || \
|
||||
((c_node)->type == XML_XINCLUDE_END))
|
||||
|
||||
#define _getNs(c_node) \
|
||||
(((c_node)->ns == 0) ? 0 : ((c_node)->ns->href))
|
||||
|
||||
|
||||
#include "string.h"
|
||||
static void* lxml_unpack_xmldoc_capsule(PyObject* capsule, int* is_owned) {
|
||||
xmlDoc *c_doc;
|
||||
void *context;
|
||||
*is_owned = 0;
|
||||
if (unlikely_condition(!PyCapsule_IsValid(capsule, (const char*)"libxml2:xmlDoc"))) {
|
||||
PyErr_SetString(
|
||||
PyExc_TypeError,
|
||||
"Not a valid capsule. The capsule argument must be a capsule object with name libxml2:xmlDoc");
|
||||
return NULL;
|
||||
}
|
||||
c_doc = (xmlDoc*) PyCapsule_GetPointer(capsule, (const char*)"libxml2:xmlDoc");
|
||||
if (unlikely_condition(!c_doc)) return NULL;
|
||||
|
||||
if (unlikely_condition(c_doc->type != XML_DOCUMENT_NODE && c_doc->type != XML_HTML_DOCUMENT_NODE)) {
|
||||
PyErr_Format(
|
||||
PyExc_ValueError,
|
||||
"Illegal document provided: expected XML or HTML, found %d", (int)c_doc->type);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
context = PyCapsule_GetContext(capsule);
|
||||
if (unlikely_condition(!context && PyErr_Occurred())) return NULL;
|
||||
if (context && strcmp((const char*) context, "destructor:xmlFreeDoc") == 0) {
|
||||
/* take ownership by setting destructor to NULL */
|
||||
if (PyCapsule_SetDestructor(capsule, NULL) == 0) {
|
||||
/* ownership transferred => invalidate capsule by clearing its name */
|
||||
if (unlikely_condition(PyCapsule_SetName(capsule, NULL))) {
|
||||
/* this should never happen since everything above succeeded */
|
||||
xmlFreeDoc(c_doc);
|
||||
return NULL;
|
||||
}
|
||||
*is_owned = 1;
|
||||
}
|
||||
}
|
||||
return c_doc;
|
||||
}
|
||||
|
||||
/* Macro pair implementation of a depth first tree walker
|
||||
*
|
||||
* Calls the code block between the BEGIN and END macros for all elements
|
||||
* below c_tree_top (exclusively), starting at c_node (inclusively iff
|
||||
* 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes
|
||||
* that match _isElement(), the normal variant will stop on every node
|
||||
* except text nodes.
|
||||
*
|
||||
* To traverse the node and all of its children and siblings in Pyrex, call
|
||||
* cdef xmlNode* some_node
|
||||
* BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1)
|
||||
* # do something with some_node
|
||||
* END_FOR_EACH_ELEMENT_FROM(some_node)
|
||||
*
|
||||
* To traverse only the children and siblings of a node, call
|
||||
* cdef xmlNode* some_node
|
||||
* BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0)
|
||||
* # do something with some_node
|
||||
* END_FOR_EACH_ELEMENT_FROM(some_node)
|
||||
*
|
||||
* To traverse only the children, do:
|
||||
* cdef xmlNode* some_node
|
||||
* some_node = parent_node.children
|
||||
* BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1)
|
||||
* # do something with some_node
|
||||
* END_FOR_EACH_ELEMENT_FROM(some_node)
|
||||
*
|
||||
* NOTE: 'some_node' MUST be a plain 'xmlNode*' !
|
||||
*
|
||||
* NOTE: parent modification during the walk can divert the iterator, but
|
||||
* should not segfault !
|
||||
*/
|
||||
|
||||
#define _LX__ELEMENT_MATCH(c_node, only_elements) \
|
||||
((only_elements) ? (_isElement(c_node)) : 1)
|
||||
|
||||
#define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \
|
||||
while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \
|
||||
c_node = c_node->next;
|
||||
|
||||
#define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \
|
||||
{ \
|
||||
/* walk through children first */ \
|
||||
xmlNode* _lx__next = c_node->children; \
|
||||
if (_lx__next != 0) { \
|
||||
if (c_node->type == XML_ENTITY_REF_NODE || c_node->type == XML_DTD_NODE) { \
|
||||
_lx__next = 0; \
|
||||
} else { \
|
||||
_LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
|
||||
} \
|
||||
} \
|
||||
if ((_lx__next == 0) && (c_node != c_stop_node)) { \
|
||||
/* try siblings */ \
|
||||
_lx__next = c_node->next; \
|
||||
_LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
|
||||
/* back off through parents */ \
|
||||
while (_lx__next == 0) { \
|
||||
c_node = c_node->parent; \
|
||||
if (c_node == 0) \
|
||||
break; \
|
||||
if (c_node == c_stop_node) \
|
||||
break; \
|
||||
if ((only_elements) && !_isElement(c_node)) \
|
||||
break; \
|
||||
/* we already traversed the parents -> siblings */ \
|
||||
_lx__next = c_node->next; \
|
||||
_LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
|
||||
} \
|
||||
} \
|
||||
c_node = _lx__next; \
|
||||
}
|
||||
|
||||
#define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \
|
||||
{ \
|
||||
if (c_node != 0) { \
|
||||
const xmlNode* _lx__tree_top = (c_tree_top); \
|
||||
const int _lx__only_elements = (only_elements); \
|
||||
/* make sure we start at an element */ \
|
||||
if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \
|
||||
/* we skip the node, so 'inclusive' is irrelevant */ \
|
||||
if (c_node == _lx__tree_top) \
|
||||
c_node = 0; /* nothing to traverse */ \
|
||||
else { \
|
||||
c_node = c_node->next; \
|
||||
_LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \
|
||||
} \
|
||||
} else if (! (inclusive)) { \
|
||||
/* skip the first node */ \
|
||||
_LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
|
||||
} \
|
||||
\
|
||||
/* now run the user code on the elements we find */ \
|
||||
while (c_node != 0) { \
|
||||
/* here goes the code to be run for each element */
|
||||
|
||||
#define _LX__END_FOR_EACH_FROM(c_node) \
|
||||
_LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
|
||||
_LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1)
|
||||
|
||||
#define END_FOR_EACH_ELEMENT_FROM(c_node) \
|
||||
_LX__END_FOR_EACH_FROM(c_node)
|
||||
|
||||
#define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \
|
||||
_LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0)
|
||||
|
||||
#define END_FOR_EACH_FROM(c_node) \
|
||||
_LX__END_FOR_EACH_FROM(c_node)
|
||||
|
||||
|
||||
#endif /* HAS_ETREE_DEFS_H */
|
||||
237
.venv/lib/python3.7/site-packages/lxml/includes/etreepublic.pxd
Normal file
237
.venv/lib/python3.7/site-packages/lxml/includes/etreepublic.pxd
Normal file
@@ -0,0 +1,237 @@
|
||||
# public Cython/C interface to lxml.etree
|
||||
|
||||
from lxml.includes cimport tree
|
||||
from lxml.includes.tree cimport const_xmlChar
|
||||
|
||||
cdef extern from "lxml-version.h":
|
||||
cdef char* LXML_VERSION_STRING
|
||||
|
||||
cdef extern from "etree_defs.h":
|
||||
# test if c_node is considered an Element (i.e. Element, Comment, etc.)
|
||||
cdef bint _isElement(tree.xmlNode* c_node) nogil
|
||||
|
||||
# return the namespace URI of the node or NULL
|
||||
cdef const_xmlChar* _getNs(tree.xmlNode* node) nogil
|
||||
|
||||
# pair of macros for tree traversal
|
||||
cdef void BEGIN_FOR_EACH_ELEMENT_FROM(tree.xmlNode* tree_top,
|
||||
tree.xmlNode* start_node,
|
||||
int start_node_inclusive) nogil
|
||||
cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) nogil
|
||||
|
||||
cdef extern from "etree_api.h":
|
||||
|
||||
# first function to call!
|
||||
cdef int import_lxml__etree() except -1
|
||||
|
||||
##########################################################################
|
||||
# public ElementTree API classes
|
||||
|
||||
cdef class lxml.etree._Document [ object LxmlDocument ]:
|
||||
cdef tree.xmlDoc* _c_doc
|
||||
|
||||
cdef class lxml.etree._Element [ object LxmlElement ]:
|
||||
cdef _Document _doc
|
||||
cdef tree.xmlNode* _c_node
|
||||
|
||||
cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]:
|
||||
pass
|
||||
|
||||
cdef class lxml.etree._ElementTree [ object LxmlElementTree ]:
|
||||
cdef _Document _doc
|
||||
cdef _Element _context_node
|
||||
|
||||
cdef class lxml.etree.ElementClassLookup [ object LxmlElementClassLookup ]:
|
||||
cdef object (*_lookup_function)(object, _Document, tree.xmlNode*)
|
||||
|
||||
cdef class lxml.etree.FallbackElementClassLookup(ElementClassLookup) \
|
||||
[ object LxmlFallbackElementClassLookup ]:
|
||||
cdef ElementClassLookup fallback
|
||||
cdef object (*_fallback_function)(object, _Document, tree.xmlNode*)
|
||||
|
||||
##########################################################################
|
||||
# creating Element objects
|
||||
|
||||
# create an Element for a C-node in the Document
|
||||
cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node)
|
||||
|
||||
# create an ElementTree for an Element
|
||||
cdef _ElementTree elementTreeFactory(_Element context_node)
|
||||
|
||||
# create an ElementTree subclass for an Element
|
||||
cdef _ElementTree newElementTree(_Element context_node, object subclass)
|
||||
|
||||
# create an ElementTree from an external document
|
||||
cdef _ElementTree adoptExternalDocument(tree.xmlDoc* c_doc, parser, bint is_owned)
|
||||
|
||||
# create a new Element for an existing or new document (doc = None)
|
||||
# builds Python object after setting text, tail, namespaces and attributes
|
||||
cdef _Element makeElement(tag, _Document doc, parser,
|
||||
text, tail, attrib, nsmap)
|
||||
|
||||
# create a new SubElement for an existing parent
|
||||
# builds Python object after setting text, tail, namespaces and attributes
|
||||
cdef _Element makeSubElement(_Element parent, tag, text, tail,
|
||||
attrib, nsmap)
|
||||
|
||||
# deep copy a node to include it in the Document
|
||||
cdef _Element deepcopyNodeToDocument(_Document doc, tree.xmlNode* c_root)
|
||||
|
||||
# set the internal lookup function for Element/Comment/PI classes
|
||||
# use setElementClassLookupFunction(NULL, None) to reset it
|
||||
# note that the lookup function *must always* return an _Element subclass!
|
||||
cdef void setElementClassLookupFunction(
|
||||
object (*function)(object, _Document, tree.xmlNode*), object state)
|
||||
|
||||
# lookup function that always returns the default Element class
|
||||
# note that the first argument is expected to be None!
|
||||
cdef object lookupDefaultElementClass(_1, _Document _2,
|
||||
tree.xmlNode* c_node)
|
||||
|
||||
# lookup function for namespace/tag specific Element classes
|
||||
# note that the first argument is expected to be None!
|
||||
cdef object lookupNamespaceElementClass(_1, _Document _2,
|
||||
tree.xmlNode* c_node)
|
||||
|
||||
# call the fallback lookup function of a FallbackElementClassLookup
|
||||
cdef object callLookupFallback(FallbackElementClassLookup lookup,
|
||||
_Document doc, tree.xmlNode* c_node)
|
||||
|
||||
##########################################################################
|
||||
# XML attribute access
|
||||
|
||||
# return an attribute value for a C attribute on a C element node
|
||||
cdef object attributeValue(tree.xmlNode* c_element,
|
||||
tree.xmlAttr* c_attrib_node)
|
||||
|
||||
# return the value of the attribute with 'ns' and 'name' (or None)
|
||||
cdef object attributeValueFromNsName(tree.xmlNode* c_element,
|
||||
const_xmlChar* c_ns, const_xmlChar* c_name)
|
||||
|
||||
# return the value of attribute "{ns}name", or the default value
|
||||
cdef object getAttributeValue(_Element element, key, default)
|
||||
|
||||
# return an iterator over attribute names (1), values (2) or items (3)
|
||||
# attributes must not be removed during iteration!
|
||||
cdef object iterattributes(_Element element, int keysvalues)
|
||||
|
||||
# return the list of all attribute names (1), values (2) or items (3)
|
||||
cdef list collectAttributes(tree.xmlNode* c_element, int keysvalues)
|
||||
|
||||
# set an attribute value on an element
|
||||
# on failure, sets an exception and returns -1
|
||||
cdef int setAttributeValue(_Element element, key, value) except -1
|
||||
|
||||
# delete an attribute
|
||||
# on failure, sets an exception and returns -1
|
||||
cdef int delAttribute(_Element element, key) except -1
|
||||
|
||||
# delete an attribute based on name and namespace URI
|
||||
# returns -1 if the attribute was not found (no exception)
|
||||
cdef int delAttributeFromNsName(tree.xmlNode* c_element,
|
||||
const_xmlChar* c_href, const_xmlChar* c_name)
|
||||
|
||||
##########################################################################
|
||||
# XML node helper functions
|
||||
|
||||
# check if the element has at least one child
|
||||
cdef bint hasChild(tree.xmlNode* c_node) nogil
|
||||
|
||||
# find child element number 'index' (supports negative indexes)
|
||||
cdef tree.xmlNode* findChild(tree.xmlNode* c_node,
|
||||
Py_ssize_t index) nogil
|
||||
|
||||
# find child element number 'index' starting at first one
|
||||
cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node,
|
||||
Py_ssize_t index) nogil
|
||||
|
||||
# find child element number 'index' starting at last one
|
||||
cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node,
|
||||
Py_ssize_t index) nogil
|
||||
|
||||
# return next/previous sibling element of the node
|
||||
cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) nogil
|
||||
cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) nogil
|
||||
|
||||
##########################################################################
|
||||
# iterators (DEPRECATED API, don't use in new code!)
|
||||
|
||||
cdef class lxml.etree._ElementTagMatcher [ object LxmlElementTagMatcher ]:
|
||||
cdef char* _href
|
||||
cdef char* _name
|
||||
|
||||
# store "{ns}tag" (or None) filter for this matcher or element iterator
|
||||
# ** unless _href *and* _name are set up 'by hand', this function *must*
|
||||
# ** be called when subclassing the iterator below!
|
||||
cdef void initTagMatch(_ElementTagMatcher matcher, tag)
|
||||
|
||||
cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [
|
||||
object LxmlElementIterator ]:
|
||||
cdef _Element _node
|
||||
cdef tree.xmlNode* (*_next_element)(tree.xmlNode*)
|
||||
|
||||
# store the initial node of the iterator if it matches the required tag
|
||||
# or its next matching sibling if not
|
||||
cdef void iteratorStoreNext(_ElementIterator iterator, _Element node)
|
||||
|
||||
##########################################################################
|
||||
# other helper functions
|
||||
|
||||
# check if a C node matches a tag name and namespace
|
||||
# (NULL allowed for each => always matches)
|
||||
cdef int tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name)
|
||||
|
||||
# convert a UTF-8 char* to a Python string or unicode string
|
||||
cdef object pyunicode(const_xmlChar* s)
|
||||
|
||||
# convert the string to UTF-8 using the normal lxml.etree semantics
|
||||
cdef bytes utf8(object s)
|
||||
|
||||
# split a tag into a (URI, name) tuple, return None as URI for '{}tag'
|
||||
cdef tuple getNsTag(object tag)
|
||||
|
||||
# split a tag into a (URI, name) tuple, return b'' as URI for '{}tag'
|
||||
cdef tuple getNsTagWithEmptyNs(object tag)
|
||||
|
||||
# get the "{ns}tag" string for a C node
|
||||
cdef object namespacedName(tree.xmlNode* c_node)
|
||||
|
||||
# get the "{ns}tag" string for a href/tagname pair (c_ns may be NULL)
|
||||
cdef object namespacedNameFromNsName(const_xmlChar* c_ns, const_xmlChar* c_tag)
|
||||
|
||||
# check if the node has a text value (which may be '')
|
||||
cdef bint hasText(tree.xmlNode* c_node) nogil
|
||||
|
||||
# check if the node has a tail value (which may be '')
|
||||
cdef bint hasTail(tree.xmlNode* c_node) nogil
|
||||
|
||||
# get the text content of an element (or None)
|
||||
cdef object textOf(tree.xmlNode* c_node)
|
||||
|
||||
# get the tail content of an element (or None)
|
||||
cdef object tailOf(tree.xmlNode* c_node)
|
||||
|
||||
# set the text value of an element
|
||||
cdef int setNodeText(tree.xmlNode* c_node, text) except -1
|
||||
|
||||
# set the tail text value of an element
|
||||
cdef int setTailText(tree.xmlNode* c_node, text) except -1
|
||||
|
||||
# append an element to the children of a parent element
|
||||
# deprecated: don't use, does not propagate exceptions!
|
||||
# use appendChildToElement() instead
|
||||
cdef void appendChild(_Element parent, _Element child)
|
||||
|
||||
# added in lxml 3.3 as a safe replacement for appendChild()
|
||||
# return -1 for exception, 0 for ok
|
||||
cdef int appendChildToElement(_Element parent, _Element child) except -1
|
||||
|
||||
# recursively lookup a namespace in element or ancestors, or create it
|
||||
cdef tree.xmlNs* findOrBuildNodeNsPrefix(
|
||||
_Document doc, tree.xmlNode* c_node, const_xmlChar* href, const_xmlChar* prefix)
|
||||
|
||||
# find the Document of an Element, ElementTree or Document (itself!)
|
||||
cdef _Document documentOrRaise(object input)
|
||||
|
||||
# find the root Element of an Element (itself!), ElementTree or Document
|
||||
cdef _Element rootNodeOrRaise(object input)
|
||||
@@ -0,0 +1,56 @@
|
||||
from libc.string cimport const_char
|
||||
|
||||
from lxml.includes.tree cimport xmlDoc
|
||||
from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
|
||||
from lxml.includes.xmlparser cimport xmlParserCtxt, xmlSAXHandler, xmlSAXHandlerV1
|
||||
|
||||
cdef extern from "libxml/HTMLparser.h":
|
||||
ctypedef enum htmlParserOption:
|
||||
HTML_PARSE_NOERROR # suppress error reports
|
||||
HTML_PARSE_NOWARNING # suppress warning reports
|
||||
HTML_PARSE_PEDANTIC # pedantic error reporting
|
||||
HTML_PARSE_NOBLANKS # remove blank nodes
|
||||
HTML_PARSE_NONET # Forbid network access
|
||||
# libxml2 2.6.21+ only:
|
||||
HTML_PARSE_RECOVER # Relaxed parsing
|
||||
HTML_PARSE_COMPACT # compact small text nodes
|
||||
# libxml2 2.7.7+ only:
|
||||
HTML_PARSE_NOIMPLIED # Do not add implied html/body... elements
|
||||
# libxml2 2.7.8+ only:
|
||||
HTML_PARSE_NODEFDTD # do not default a doctype if not found
|
||||
# libxml2 2.8.0+ only:
|
||||
XML_PARSE_IGNORE_ENC # ignore internal document encoding hint
|
||||
|
||||
xmlSAXHandlerV1 htmlDefaultSAXHandler
|
||||
|
||||
cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(
|
||||
char* buffer, int size) nogil
|
||||
cdef xmlParserCtxt* htmlCreateFileParserCtxt(
|
||||
char* filename, char* encoding) nogil
|
||||
cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax,
|
||||
void* user_data,
|
||||
char* chunk, int size,
|
||||
char* filename, int enc) nogil
|
||||
cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil
|
||||
cdef void htmlCtxtReset(xmlParserCtxt* ctxt) nogil
|
||||
cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil
|
||||
cdef int htmlParseDocument(xmlParserCtxt* ctxt) nogil
|
||||
cdef int htmlParseChunk(xmlParserCtxt* ctxt,
|
||||
char* chunk, int size, int terminate) nogil
|
||||
|
||||
cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
|
||||
char* filename, const_char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
|
||||
char* buffer, char* URL, const_char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt,
|
||||
xmlInputReadCallback ioread,
|
||||
xmlInputCloseCallback ioclose,
|
||||
void* ioctx,
|
||||
char* URL, const_char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt,
|
||||
char* buffer, int size,
|
||||
char* filename, const_char* encoding,
|
||||
int options) nogil
|
||||
@@ -0,0 +1,3 @@
|
||||
#ifndef LXML_VERSION_STRING
|
||||
#define LXML_VERSION_STRING "4.9.3"
|
||||
#endif
|
||||
64
.venv/lib/python3.7/site-packages/lxml/includes/relaxng.pxd
Normal file
64
.venv/lib/python3.7/site-packages/lxml/includes/relaxng.pxd
Normal file
@@ -0,0 +1,64 @@
|
||||
from lxml.includes.tree cimport xmlDoc
|
||||
from lxml.includes.xmlerror cimport xmlStructuredErrorFunc
|
||||
|
||||
cdef extern from "libxml/relaxng.h":
|
||||
ctypedef struct xmlRelaxNG
|
||||
ctypedef struct xmlRelaxNGParserCtxt
|
||||
|
||||
ctypedef struct xmlRelaxNGValidCtxt
|
||||
|
||||
ctypedef enum xmlRelaxNGValidErr:
|
||||
XML_RELAXNG_OK = 0
|
||||
XML_RELAXNG_ERR_MEMORY = 1
|
||||
XML_RELAXNG_ERR_TYPE = 2
|
||||
XML_RELAXNG_ERR_TYPEVAL = 3
|
||||
XML_RELAXNG_ERR_DUPID = 4
|
||||
XML_RELAXNG_ERR_TYPECMP = 5
|
||||
XML_RELAXNG_ERR_NOSTATE = 6
|
||||
XML_RELAXNG_ERR_NODEFINE = 7
|
||||
XML_RELAXNG_ERR_LISTEXTRA = 8
|
||||
XML_RELAXNG_ERR_LISTEMPTY = 9
|
||||
XML_RELAXNG_ERR_INTERNODATA = 10
|
||||
XML_RELAXNG_ERR_INTERSEQ = 11
|
||||
XML_RELAXNG_ERR_INTEREXTRA = 12
|
||||
XML_RELAXNG_ERR_ELEMNAME = 13
|
||||
XML_RELAXNG_ERR_ATTRNAME = 14
|
||||
XML_RELAXNG_ERR_ELEMNONS = 15
|
||||
XML_RELAXNG_ERR_ATTRNONS = 16
|
||||
XML_RELAXNG_ERR_ELEMWRONGNS = 17
|
||||
XML_RELAXNG_ERR_ATTRWRONGNS = 18
|
||||
XML_RELAXNG_ERR_ELEMEXTRANS = 19
|
||||
XML_RELAXNG_ERR_ATTREXTRANS = 20
|
||||
XML_RELAXNG_ERR_ELEMNOTEMPTY = 21
|
||||
XML_RELAXNG_ERR_NOELEM = 22
|
||||
XML_RELAXNG_ERR_NOTELEM = 23
|
||||
XML_RELAXNG_ERR_ATTRVALID = 24
|
||||
XML_RELAXNG_ERR_CONTENTVALID = 25
|
||||
XML_RELAXNG_ERR_EXTRACONTENT = 26
|
||||
XML_RELAXNG_ERR_INVALIDATTR = 27
|
||||
XML_RELAXNG_ERR_DATAELEM = 28
|
||||
XML_RELAXNG_ERR_VALELEM = 29
|
||||
XML_RELAXNG_ERR_LISTELEM = 30
|
||||
XML_RELAXNG_ERR_DATATYPE = 31
|
||||
XML_RELAXNG_ERR_VALUE = 32
|
||||
XML_RELAXNG_ERR_LIST = 33
|
||||
XML_RELAXNG_ERR_NOGRAMMAR = 34
|
||||
XML_RELAXNG_ERR_EXTRADATA = 35
|
||||
XML_RELAXNG_ERR_LACKDATA = 36
|
||||
XML_RELAXNG_ERR_INTERNAL = 37
|
||||
XML_RELAXNG_ERR_ELEMWRONG = 38
|
||||
XML_RELAXNG_ERR_TEXTWRONG = 39
|
||||
|
||||
cdef xmlRelaxNGValidCtxt* xmlRelaxNGNewValidCtxt(xmlRelaxNG* schema) nogil
|
||||
cdef int xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxt* ctxt, xmlDoc* doc) nogil
|
||||
cdef xmlRelaxNG* xmlRelaxNGParse(xmlRelaxNGParserCtxt* ctxt) nogil
|
||||
cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewParserCtxt(char* URL) nogil
|
||||
cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewDocParserCtxt(xmlDoc* doc) nogil
|
||||
cdef void xmlRelaxNGFree(xmlRelaxNG* schema) nogil
|
||||
cdef void xmlRelaxNGFreeParserCtxt(xmlRelaxNGParserCtxt* ctxt) nogil
|
||||
cdef void xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxt* ctxt) nogil
|
||||
|
||||
cdef void xmlRelaxNGSetValidStructuredErrors(
|
||||
xmlRelaxNGValidCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx) nogil
|
||||
cdef void xmlRelaxNGSetParserStructuredErrors(
|
||||
xmlRelaxNGParserCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx) nogil
|
||||
@@ -0,0 +1,34 @@
|
||||
from lxml.includes cimport xmlerror
|
||||
from lxml.includes.tree cimport xmlDoc
|
||||
|
||||
cdef extern from "libxml/schematron.h":
|
||||
ctypedef struct xmlSchematron
|
||||
ctypedef struct xmlSchematronParserCtxt
|
||||
ctypedef struct xmlSchematronValidCtxt
|
||||
|
||||
ctypedef enum xmlSchematronValidOptions:
|
||||
XML_SCHEMATRON_OUT_QUIET = 1 # quiet no report
|
||||
XML_SCHEMATRON_OUT_TEXT = 2 # build a textual report
|
||||
XML_SCHEMATRON_OUT_XML = 4 # output SVRL
|
||||
XML_SCHEMATRON_OUT_ERROR = 8 # output via xmlStructuredErrorFunc
|
||||
XML_SCHEMATRON_OUT_FILE = 256 # output to a file descriptor
|
||||
XML_SCHEMATRON_OUT_BUFFER = 512 # output to a buffer
|
||||
XML_SCHEMATRON_OUT_IO = 1024 # output to I/O mechanism
|
||||
|
||||
cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(
|
||||
xmlDoc* doc) nogil
|
||||
cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(
|
||||
char* filename) nogil
|
||||
cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(
|
||||
xmlSchematron* schema, int options) nogil
|
||||
|
||||
cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt) nogil
|
||||
cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt,
|
||||
xmlDoc* instance) nogil
|
||||
|
||||
cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt) nogil
|
||||
cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt) nogil
|
||||
cdef void xmlSchematronFree(xmlSchematron* schema) nogil
|
||||
cdef void xmlSchematronSetValidStructuredErrors(
|
||||
xmlSchematronValidCtxt* ctxt,
|
||||
xmlerror.xmlStructuredErrorFunc error_func, void *data)
|
||||
480
.venv/lib/python3.7/site-packages/lxml/includes/tree.pxd
Normal file
480
.venv/lib/python3.7/site-packages/lxml/includes/tree.pxd
Normal file
@@ -0,0 +1,480 @@
|
||||
from libc cimport stdio
|
||||
from libc.string cimport const_char, const_uchar
|
||||
|
||||
cdef extern from "lxml-version.h":
|
||||
# deprecated declaration, use etreepublic.pxd instead
|
||||
cdef char* LXML_VERSION_STRING
|
||||
|
||||
cdef extern from "libxml/xmlversion.h":
|
||||
cdef const_char* xmlParserVersion
|
||||
cdef int LIBXML_VERSION
|
||||
|
||||
cdef extern from "libxml/xmlstring.h":
|
||||
ctypedef unsigned char xmlChar
|
||||
ctypedef const xmlChar const_xmlChar "const xmlChar"
|
||||
cdef int xmlStrlen(const_xmlChar* str) nogil
|
||||
cdef xmlChar* xmlStrdup(const_xmlChar* cur) nogil
|
||||
cdef int xmlStrncmp(const_xmlChar* str1, const_xmlChar* str2, int length) nogil
|
||||
cdef int xmlStrcmp(const_xmlChar* str1, const_xmlChar* str2) nogil
|
||||
cdef int xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) nogil
|
||||
cdef const_xmlChar* xmlStrstr(const_xmlChar* str1, const_xmlChar* str2) nogil
|
||||
cdef const_xmlChar* xmlStrchr(const_xmlChar* str1, xmlChar ch) nogil
|
||||
cdef const_xmlChar* _xcstr "(const xmlChar*)PyBytes_AS_STRING" (object s)
|
||||
|
||||
cdef extern from "libxml/encoding.h":
|
||||
ctypedef enum xmlCharEncoding:
|
||||
XML_CHAR_ENCODING_ERROR = -1 # No char encoding detected
|
||||
XML_CHAR_ENCODING_NONE = 0 # No char encoding detected
|
||||
XML_CHAR_ENCODING_UTF8 = 1 # UTF-8
|
||||
XML_CHAR_ENCODING_UTF16LE = 2 # UTF-16 little endian
|
||||
XML_CHAR_ENCODING_UTF16BE = 3 # UTF-16 big endian
|
||||
XML_CHAR_ENCODING_UCS4LE = 4 # UCS-4 little endian
|
||||
XML_CHAR_ENCODING_UCS4BE = 5 # UCS-4 big endian
|
||||
XML_CHAR_ENCODING_EBCDIC = 6 # EBCDIC uh!
|
||||
XML_CHAR_ENCODING_UCS4_2143 = 7 # UCS-4 unusual ordering
|
||||
XML_CHAR_ENCODING_UCS4_3412 = 8 # UCS-4 unusual ordering
|
||||
XML_CHAR_ENCODING_UCS2 = 9 # UCS-2
|
||||
XML_CHAR_ENCODING_8859_1 = 10 # ISO-8859-1 ISO Latin 1
|
||||
XML_CHAR_ENCODING_8859_2 = 11 # ISO-8859-2 ISO Latin 2
|
||||
XML_CHAR_ENCODING_8859_3 = 12 # ISO-8859-3
|
||||
XML_CHAR_ENCODING_8859_4 = 13 # ISO-8859-4
|
||||
XML_CHAR_ENCODING_8859_5 = 14 # ISO-8859-5
|
||||
XML_CHAR_ENCODING_8859_6 = 15 # ISO-8859-6
|
||||
XML_CHAR_ENCODING_8859_7 = 16 # ISO-8859-7
|
||||
XML_CHAR_ENCODING_8859_8 = 17 # ISO-8859-8
|
||||
XML_CHAR_ENCODING_8859_9 = 18 # ISO-8859-9
|
||||
XML_CHAR_ENCODING_2022_JP = 19 # ISO-2022-JP
|
||||
XML_CHAR_ENCODING_SHIFT_JIS = 20 # Shift_JIS
|
||||
XML_CHAR_ENCODING_EUC_JP = 21 # EUC-JP
|
||||
XML_CHAR_ENCODING_ASCII = 22 # pure ASCII
|
||||
|
||||
ctypedef struct xmlCharEncodingHandler
|
||||
cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) nogil
|
||||
cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(
|
||||
xmlCharEncoding enc) nogil
|
||||
cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler) nogil
|
||||
cdef xmlCharEncoding xmlDetectCharEncoding(const_xmlChar* text, int len) nogil
|
||||
cdef const_char* xmlGetCharEncodingName(xmlCharEncoding enc) nogil
|
||||
cdef xmlCharEncoding xmlParseCharEncoding(char* name) nogil
|
||||
ctypedef int (*xmlCharEncodingOutputFunc)(
|
||||
unsigned char *out_buf, int *outlen, const_uchar *in_buf, int *inlen)
|
||||
|
||||
cdef extern from "libxml/chvalid.h":
|
||||
cdef int xmlIsChar_ch(char c) nogil
|
||||
cdef int xmlIsCharQ(int ch) nogil
|
||||
|
||||
cdef extern from "libxml/hash.h":
|
||||
ctypedef struct xmlHashTable
|
||||
ctypedef void (*xmlHashScanner)(void* payload, void* data, const_xmlChar* name) # may require GIL!
|
||||
void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) nogil
|
||||
void* xmlHashLookup(xmlHashTable* table, const_xmlChar* name) nogil
|
||||
ctypedef void (*xmlHashDeallocator)(void *payload, xmlChar *name)
|
||||
cdef xmlHashTable* xmlHashCreate(int size)
|
||||
cdef xmlHashTable* xmlHashCreateDict(int size, xmlDict *dict)
|
||||
cdef int xmlHashSize(xmlHashTable* table)
|
||||
cdef void xmlHashFree(xmlHashTable* table, xmlHashDeallocator f)
|
||||
|
||||
cdef extern from *: # actually "libxml/dict.h"
|
||||
# libxml/dict.h appears to be broken to include in C
|
||||
ctypedef struct xmlDict
|
||||
cdef const_xmlChar* xmlDictLookup(xmlDict* dict, const_xmlChar* name, int len) nogil
|
||||
cdef const_xmlChar* xmlDictExists(xmlDict* dict, const_xmlChar* name, int len) nogil
|
||||
cdef int xmlDictOwns(xmlDict* dict, const_xmlChar* name) nogil
|
||||
cdef size_t xmlDictSize(xmlDict* dict) nogil
|
||||
|
||||
cdef extern from "libxml/tree.h":
|
||||
ctypedef struct xmlDoc
|
||||
ctypedef struct xmlAttr
|
||||
ctypedef struct xmlNotationTable
|
||||
|
||||
ctypedef enum xmlElementType:
|
||||
XML_ELEMENT_NODE= 1
|
||||
XML_ATTRIBUTE_NODE= 2
|
||||
XML_TEXT_NODE= 3
|
||||
XML_CDATA_SECTION_NODE= 4
|
||||
XML_ENTITY_REF_NODE= 5
|
||||
XML_ENTITY_NODE= 6
|
||||
XML_PI_NODE= 7
|
||||
XML_COMMENT_NODE= 8
|
||||
XML_DOCUMENT_NODE= 9
|
||||
XML_DOCUMENT_TYPE_NODE= 10
|
||||
XML_DOCUMENT_FRAG_NODE= 11
|
||||
XML_NOTATION_NODE= 12
|
||||
XML_HTML_DOCUMENT_NODE= 13
|
||||
XML_DTD_NODE= 14
|
||||
XML_ELEMENT_DECL= 15
|
||||
XML_ATTRIBUTE_DECL= 16
|
||||
XML_ENTITY_DECL= 17
|
||||
XML_NAMESPACE_DECL= 18
|
||||
XML_XINCLUDE_START= 19
|
||||
XML_XINCLUDE_END= 20
|
||||
|
||||
ctypedef enum xmlElementTypeVal:
|
||||
XML_ELEMENT_TYPE_UNDEFINED= 0
|
||||
XML_ELEMENT_TYPE_EMPTY= 1
|
||||
XML_ELEMENT_TYPE_ANY= 2
|
||||
XML_ELEMENT_TYPE_MIXED= 3
|
||||
XML_ELEMENT_TYPE_ELEMENT= 4
|
||||
|
||||
ctypedef enum xmlElementContentType:
|
||||
XML_ELEMENT_CONTENT_PCDATA= 1
|
||||
XML_ELEMENT_CONTENT_ELEMENT= 2
|
||||
XML_ELEMENT_CONTENT_SEQ= 3
|
||||
XML_ELEMENT_CONTENT_OR= 4
|
||||
|
||||
ctypedef enum xmlElementContentOccur:
|
||||
XML_ELEMENT_CONTENT_ONCE= 1
|
||||
XML_ELEMENT_CONTENT_OPT= 2
|
||||
XML_ELEMENT_CONTENT_MULT= 3
|
||||
XML_ELEMENT_CONTENT_PLUS= 4
|
||||
|
||||
ctypedef enum xmlAttributeType:
|
||||
XML_ATTRIBUTE_CDATA = 1
|
||||
XML_ATTRIBUTE_ID= 2
|
||||
XML_ATTRIBUTE_IDREF= 3
|
||||
XML_ATTRIBUTE_IDREFS= 4
|
||||
XML_ATTRIBUTE_ENTITY= 5
|
||||
XML_ATTRIBUTE_ENTITIES= 6
|
||||
XML_ATTRIBUTE_NMTOKEN= 7
|
||||
XML_ATTRIBUTE_NMTOKENS= 8
|
||||
XML_ATTRIBUTE_ENUMERATION= 9
|
||||
XML_ATTRIBUTE_NOTATION= 10
|
||||
|
||||
ctypedef enum xmlAttributeDefault:
|
||||
XML_ATTRIBUTE_NONE= 1
|
||||
XML_ATTRIBUTE_REQUIRED= 2
|
||||
XML_ATTRIBUTE_IMPLIED= 3
|
||||
XML_ATTRIBUTE_FIXED= 4
|
||||
|
||||
ctypedef enum xmlEntityType:
|
||||
XML_INTERNAL_GENERAL_ENTITY= 1
|
||||
XML_EXTERNAL_GENERAL_PARSED_ENTITY= 2
|
||||
XML_EXTERNAL_GENERAL_UNPARSED_ENTITY= 3
|
||||
XML_INTERNAL_PARAMETER_ENTITY= 4
|
||||
XML_EXTERNAL_PARAMETER_ENTITY= 5
|
||||
XML_INTERNAL_PREDEFINED_ENTITY= 6
|
||||
|
||||
ctypedef struct xmlNs:
|
||||
const_xmlChar* href
|
||||
const_xmlChar* prefix
|
||||
xmlNs* next
|
||||
|
||||
ctypedef struct xmlNode:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlNode* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlChar* content
|
||||
xmlAttr* properties
|
||||
xmlNs* ns
|
||||
xmlNs* nsDef
|
||||
unsigned short line
|
||||
|
||||
ctypedef struct xmlElementContent:
|
||||
xmlElementContentType type
|
||||
xmlElementContentOccur ocur
|
||||
const_xmlChar *name
|
||||
xmlElementContent *c1
|
||||
xmlElementContent *c2
|
||||
xmlElementContent *parent
|
||||
const_xmlChar *prefix
|
||||
|
||||
ctypedef struct xmlEnumeration:
|
||||
xmlEnumeration *next
|
||||
const_xmlChar *name
|
||||
|
||||
ctypedef struct xmlAttribute:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlDtd* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlAttribute* nexth
|
||||
xmlAttributeType atype
|
||||
xmlAttributeDefault def_ "def"
|
||||
const_xmlChar* defaultValue
|
||||
xmlEnumeration* tree
|
||||
const_xmlChar* prefix
|
||||
const_xmlChar* elem
|
||||
|
||||
ctypedef struct xmlElement:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlNode* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlElementTypeVal etype
|
||||
xmlElementContent* content
|
||||
xmlAttribute* attributes
|
||||
const_xmlChar* prefix
|
||||
void *contModel
|
||||
|
||||
ctypedef struct xmlEntity:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlDtd* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlChar* orig
|
||||
xmlChar* content
|
||||
int length
|
||||
xmlEntityType etype
|
||||
const_xmlChar* ExternalID
|
||||
const_xmlChar* SystemID
|
||||
xmlEntity* nexte
|
||||
const_xmlChar* URI
|
||||
int owner
|
||||
int checked
|
||||
|
||||
ctypedef struct xmlDtd:
|
||||
const_xmlChar* name
|
||||
const_xmlChar* ExternalID
|
||||
const_xmlChar* SystemID
|
||||
void* notations
|
||||
void* entities
|
||||
void* pentities
|
||||
void* attributes
|
||||
void* elements
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlDoc* doc
|
||||
|
||||
ctypedef struct xmlDoc:
|
||||
xmlElementType type
|
||||
char* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlNode* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlDict* dict
|
||||
xmlHashTable* ids
|
||||
int standalone
|
||||
const_xmlChar* version
|
||||
const_xmlChar* encoding
|
||||
const_xmlChar* URL
|
||||
void* _private
|
||||
xmlDtd* intSubset
|
||||
xmlDtd* extSubset
|
||||
|
||||
ctypedef struct xmlAttr:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlNode* parent
|
||||
xmlAttr* next
|
||||
xmlAttr* prev
|
||||
xmlDoc* doc
|
||||
xmlNs* ns
|
||||
xmlAttributeType atype
|
||||
|
||||
ctypedef struct xmlID:
|
||||
const_xmlChar* value
|
||||
const_xmlChar* name
|
||||
xmlAttr* attr
|
||||
xmlDoc* doc
|
||||
|
||||
ctypedef struct xmlBuffer
|
||||
|
||||
ctypedef struct xmlBuf # new in libxml2 2.9
|
||||
|
||||
ctypedef struct xmlOutputBuffer:
|
||||
xmlBuf* buffer
|
||||
xmlBuf* conv
|
||||
int error
|
||||
|
||||
const_xmlChar* XML_XML_NAMESPACE
|
||||
|
||||
cdef void xmlFreeDoc(xmlDoc* cur) nogil
|
||||
cdef void xmlFreeDtd(xmlDtd* cur) nogil
|
||||
cdef void xmlFreeNode(xmlNode* cur) nogil
|
||||
cdef void xmlFreeNsList(xmlNs* ns) nogil
|
||||
cdef void xmlFreeNs(xmlNs* ns) nogil
|
||||
cdef void xmlFree(void* buf) nogil
|
||||
|
||||
cdef xmlNode* xmlNewNode(xmlNs* ns, const_xmlChar* name) nogil
|
||||
cdef xmlNode* xmlNewDocText(xmlDoc* doc, const_xmlChar* content) nogil
|
||||
cdef xmlNode* xmlNewDocComment(xmlDoc* doc, const_xmlChar* content) nogil
|
||||
cdef xmlNode* xmlNewDocPI(xmlDoc* doc, const_xmlChar* name, const_xmlChar* content) nogil
|
||||
cdef xmlNode* xmlNewReference(xmlDoc* doc, const_xmlChar* name) nogil
|
||||
cdef xmlNode* xmlNewCDataBlock(xmlDoc* doc, const_xmlChar* text, int len) nogil
|
||||
cdef xmlNs* xmlNewNs(xmlNode* node, const_xmlChar* href, const_xmlChar* prefix) nogil
|
||||
cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) nogil
|
||||
cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur) nogil
|
||||
cdef xmlNode* xmlAddPrevSibling(xmlNode* cur, xmlNode* elem) nogil
|
||||
cdef xmlNode* xmlAddNextSibling(xmlNode* cur, xmlNode* elem) nogil
|
||||
cdef xmlNode* xmlNewDocNode(xmlDoc* doc, xmlNs* ns,
|
||||
const_xmlChar* name, const_xmlChar* content) nogil
|
||||
cdef xmlDoc* xmlNewDoc(const_xmlChar* version) nogil
|
||||
cdef xmlAttr* xmlNewProp(xmlNode* node, const_xmlChar* name, const_xmlChar* value) nogil
|
||||
cdef xmlAttr* xmlNewNsProp(xmlNode* node, xmlNs* ns,
|
||||
const_xmlChar* name, const_xmlChar* value) nogil
|
||||
cdef xmlChar* xmlGetNoNsProp(xmlNode* node, const_xmlChar* name) nogil
|
||||
cdef xmlChar* xmlGetNsProp(xmlNode* node, const_xmlChar* name, const_xmlChar* nameSpace) nogil
|
||||
cdef void xmlSetNs(xmlNode* node, xmlNs* ns) nogil
|
||||
cdef xmlAttr* xmlSetProp(xmlNode* node, const_xmlChar* name, const_xmlChar* value) nogil
|
||||
cdef xmlAttr* xmlSetNsProp(xmlNode* node, xmlNs* ns,
|
||||
const_xmlChar* name, const_xmlChar* value) nogil
|
||||
cdef int xmlRemoveID(xmlDoc* doc, xmlAttr* cur) nogil
|
||||
cdef int xmlRemoveProp(xmlAttr* cur) nogil
|
||||
cdef void xmlFreePropList(xmlAttr* cur) nogil
|
||||
cdef xmlChar* xmlGetNodePath(xmlNode* node) nogil
|
||||
cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) nogil
|
||||
cdef void xmlDocDumpMemoryEnc(xmlDoc* cur, char** mem, int* size,
|
||||
char* encoding) nogil
|
||||
cdef int xmlSaveFileTo(xmlOutputBuffer* out, xmlDoc* cur,
|
||||
char* encoding) nogil
|
||||
|
||||
cdef void xmlUnlinkNode(xmlNode* cur) nogil
|
||||
cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root) nogil
|
||||
cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc) nogil
|
||||
cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc) nogil
|
||||
cdef xmlAttr* xmlHasProp(xmlNode* node, const_xmlChar* name) nogil
|
||||
cdef xmlAttr* xmlHasNsProp(xmlNode* node, const_xmlChar* name, const_xmlChar* nameSpace) nogil
|
||||
cdef xmlChar* xmlNodeGetContent(xmlNode* cur) nogil
|
||||
cdef int xmlNodeBufGetContent(xmlBuffer* buffer, xmlNode* cur) nogil
|
||||
cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, const_xmlChar* prefix) nogil
|
||||
cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, const_xmlChar* href) nogil
|
||||
cdef int xmlIsBlankNode(xmlNode* node) nogil
|
||||
cdef long xmlGetLineNo(xmlNode* node) nogil
|
||||
cdef void xmlElemDump(stdio.FILE* f, xmlDoc* doc, xmlNode* cur) nogil
|
||||
cdef void xmlNodeDumpOutput(xmlOutputBuffer* buf,
|
||||
xmlDoc* doc, xmlNode* cur, int level,
|
||||
int format, const_char* encoding) nogil
|
||||
cdef void xmlBufAttrSerializeTxtContent(xmlOutputBuffer *buf, xmlDoc *doc,
|
||||
xmlAttr *attr, const_xmlChar *string) nogil
|
||||
cdef void xmlNodeSetName(xmlNode* cur, const_xmlChar* name) nogil
|
||||
cdef void xmlNodeSetContent(xmlNode* cur, const_xmlChar* content) nogil
|
||||
cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd) nogil
|
||||
cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) nogil
|
||||
cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) nogil
|
||||
cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended) nogil
|
||||
cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) nogil
|
||||
cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns) nogil
|
||||
cdef xmlBuffer* xmlBufferCreate() nogil
|
||||
cdef void xmlBufferWriteChar(xmlBuffer* buf, char* string) nogil
|
||||
cdef void xmlBufferFree(xmlBuffer* buf) nogil
|
||||
cdef const_xmlChar* xmlBufferContent(xmlBuffer* buf) nogil
|
||||
cdef int xmlBufferLength(xmlBuffer* buf) nogil
|
||||
cdef const_xmlChar* xmlBufContent(xmlBuf* buf) nogil # new in libxml2 2.9
|
||||
cdef size_t xmlBufUse(xmlBuf* buf) nogil # new in libxml2 2.9
|
||||
cdef int xmlKeepBlanksDefault(int val) nogil
|
||||
cdef xmlChar* xmlNodeGetBase(xmlDoc* doc, xmlNode* node) nogil
|
||||
cdef xmlDtd* xmlCreateIntSubset(xmlDoc* doc, const_xmlChar* name,
|
||||
const_xmlChar* ExternalID, const_xmlChar* SystemID) nogil
|
||||
cdef void xmlNodeSetBase(xmlNode* node, const_xmlChar* uri) nogil
|
||||
cdef int xmlValidateNCName(const_xmlChar* value, int space) nogil
|
||||
|
||||
cdef extern from "libxml/uri.h":
|
||||
cdef const_xmlChar* xmlBuildURI(const_xmlChar* href, const_xmlChar* base) nogil
|
||||
|
||||
cdef extern from "libxml/HTMLtree.h":
|
||||
cdef void htmlNodeDumpFormatOutput(xmlOutputBuffer* buf,
|
||||
xmlDoc* doc, xmlNode* cur,
|
||||
char* encoding, int format) nogil
|
||||
cdef xmlDoc* htmlNewDoc(const_xmlChar* uri, const_xmlChar* externalID) nogil
|
||||
|
||||
cdef extern from "libxml/valid.h":
|
||||
cdef xmlAttr* xmlGetID(xmlDoc* doc, const_xmlChar* ID) nogil
|
||||
cdef void xmlDumpNotationTable(xmlBuffer* buffer,
|
||||
xmlNotationTable* table) nogil
|
||||
cdef int xmlValidateNameValue(const_xmlChar* value) nogil
|
||||
|
||||
cdef extern from "libxml/xmlIO.h":
|
||||
cdef int xmlOutputBufferWrite(xmlOutputBuffer* out,
|
||||
int len, const_char* str) nogil
|
||||
cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, const_char* str) nogil
|
||||
cdef int xmlOutputBufferWriteEscape(xmlOutputBuffer* out,
|
||||
const_xmlChar* str,
|
||||
xmlCharEncodingOutputFunc escapefunc) nogil
|
||||
cdef int xmlOutputBufferFlush(xmlOutputBuffer* out) nogil
|
||||
cdef int xmlOutputBufferClose(xmlOutputBuffer* out) nogil
|
||||
|
||||
ctypedef int (*xmlInputReadCallback)(void* context,
|
||||
char* buffer, int len)
|
||||
ctypedef int (*xmlInputCloseCallback)(void* context)
|
||||
|
||||
ctypedef int (*xmlOutputWriteCallback)(void* context,
|
||||
char* buffer, int len)
|
||||
ctypedef int (*xmlOutputCloseCallback)(void* context)
|
||||
|
||||
cdef xmlOutputBuffer* xmlAllocOutputBuffer(
|
||||
xmlCharEncodingHandler* encoder) nogil
|
||||
cdef xmlOutputBuffer* xmlOutputBufferCreateIO(
|
||||
xmlOutputWriteCallback iowrite,
|
||||
xmlOutputCloseCallback ioclose,
|
||||
void * ioctx,
|
||||
xmlCharEncodingHandler* encoder) nogil
|
||||
cdef xmlOutputBuffer* xmlOutputBufferCreateFile(
|
||||
stdio.FILE* file, xmlCharEncodingHandler* encoder) nogil
|
||||
cdef xmlOutputBuffer* xmlOutputBufferCreateFilename(
|
||||
char* URI, xmlCharEncodingHandler* encoder, int compression) nogil
|
||||
|
||||
cdef extern from "libxml/xmlsave.h":
|
||||
ctypedef struct xmlSaveCtxt
|
||||
|
||||
ctypedef enum xmlSaveOption:
|
||||
XML_SAVE_FORMAT = 1 # format save output (2.6.17)
|
||||
XML_SAVE_NO_DECL = 2 # drop the xml declaration (2.6.21)
|
||||
XML_SAVE_NO_EMPTY = 4 # no empty tags (2.6.22)
|
||||
XML_SAVE_NO_XHTML = 8 # disable XHTML1 specific rules (2.6.22)
|
||||
XML_SAVE_XHTML = 16 # force XHTML1 specific rules (2.7.2)
|
||||
XML_SAVE_AS_XML = 32 # force XML serialization on HTML doc (2.7.2)
|
||||
XML_SAVE_AS_HTML = 64 # force HTML serialization on XML doc (2.7.2)
|
||||
|
||||
cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlSaveCtxt* xmlSaveToBuffer(xmlBuffer* buffer, char* encoding,
|
||||
int options) nogil # libxml2 2.6.23
|
||||
cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc) nogil
|
||||
cdef long xmlSaveTree(xmlSaveCtxt* ctxt, xmlNode* node) nogil
|
||||
cdef int xmlSaveClose(xmlSaveCtxt* ctxt) nogil
|
||||
cdef int xmlSaveFlush(xmlSaveCtxt* ctxt) nogil
|
||||
cdef int xmlSaveSetAttrEscape(xmlSaveCtxt* ctxt, void* escape_func) nogil
|
||||
cdef int xmlSaveSetEscape(xmlSaveCtxt* ctxt, void* escape_func) nogil
|
||||
|
||||
cdef extern from "libxml/globals.h":
|
||||
cdef int xmlThrDefKeepBlanksDefaultValue(int onoff) nogil
|
||||
cdef int xmlThrDefLineNumbersDefaultValue(int onoff) nogil
|
||||
cdef int xmlThrDefIndentTreeOutput(int onoff) nogil
|
||||
|
||||
cdef extern from "libxml/xmlmemory.h" nogil:
|
||||
cdef void* xmlMalloc(size_t size)
|
||||
cdef int xmlMemBlocks()
|
||||
cdef int xmlMemUsed()
|
||||
cdef void xmlMemDisplay(stdio.FILE* file)
|
||||
cdef void xmlMemDisplayLast(stdio.FILE* file, long num_bytes)
|
||||
cdef void xmlMemShow(stdio.FILE* file, int count)
|
||||
|
||||
cdef extern from "etree_defs.h":
|
||||
cdef bint _isElement(xmlNode* node) nogil
|
||||
cdef bint _isElementOrXInclude(xmlNode* node) nogil
|
||||
cdef const_xmlChar* _getNs(xmlNode* node) nogil
|
||||
cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top,
|
||||
xmlNode* start_node,
|
||||
bint inclusive) nogil
|
||||
cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) nogil
|
||||
cdef void BEGIN_FOR_EACH_FROM(xmlNode* tree_top,
|
||||
xmlNode* start_node,
|
||||
bint inclusive) nogil
|
||||
cdef void END_FOR_EACH_FROM(xmlNode* start_node) nogil
|
||||
5
.venv/lib/python3.7/site-packages/lxml/includes/uri.pxd
Normal file
5
.venv/lib/python3.7/site-packages/lxml/includes/uri.pxd
Normal file
@@ -0,0 +1,5 @@
|
||||
cdef extern from "libxml/uri.h":
|
||||
ctypedef struct xmlURI
|
||||
|
||||
cdef xmlURI* xmlParseURI(char* str)
|
||||
cdef void xmlFreeURI(xmlURI* uri)
|
||||
22
.venv/lib/python3.7/site-packages/lxml/includes/xinclude.pxd
Normal file
22
.venv/lib/python3.7/site-packages/lxml/includes/xinclude.pxd
Normal file
@@ -0,0 +1,22 @@
|
||||
from lxml.includes.tree cimport xmlDoc, xmlNode
|
||||
|
||||
cdef extern from "libxml/xinclude.h":
|
||||
|
||||
ctypedef struct xmlXIncludeCtxt
|
||||
|
||||
cdef int xmlXIncludeProcess(xmlDoc* doc) nogil
|
||||
cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts) nogil
|
||||
cdef int xmlXIncludeProcessTree(xmlNode* doc) nogil
|
||||
cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts) nogil
|
||||
|
||||
# libxml2 >= 2.7.4
|
||||
cdef int xmlXIncludeProcessTreeFlagsData(
|
||||
xmlNode* doc, int parser_opts, void* data) nogil
|
||||
|
||||
cdef xmlXIncludeCtxt* xmlXIncludeNewContext(xmlDoc* doc) nogil
|
||||
cdef int xmlXIncludeProcessNode(xmlXIncludeCtxt* ctxt, xmlNode* node) nogil
|
||||
cdef int xmlXIncludeSetFlags(xmlXIncludeCtxt* ctxt, int flags) nogil
|
||||
|
||||
# libxml2 >= 2.6.27
|
||||
cdef int xmlXIncludeProcessFlagsData(
|
||||
xmlDoc* doc, int flags, void* data) nogil
|
||||
852
.venv/lib/python3.7/site-packages/lxml/includes/xmlerror.pxd
Normal file
852
.venv/lib/python3.7/site-packages/lxml/includes/xmlerror.pxd
Normal file
@@ -0,0 +1,852 @@
|
||||
|
||||
# --- BEGIN: GENERATED CONSTANTS ---
|
||||
|
||||
# This section is generated by the script 'update-error-constants.py'.
|
||||
|
||||
cdef extern from "libxml/xmlerror.h":
|
||||
ctypedef enum xmlErrorLevel:
|
||||
XML_ERR_NONE = 0
|
||||
XML_ERR_WARNING = 1 # A simple warning
|
||||
XML_ERR_ERROR = 2 # A recoverable error
|
||||
XML_ERR_FATAL = 3 # A fatal error
|
||||
|
||||
ctypedef enum xmlErrorDomain:
|
||||
XML_FROM_NONE = 0
|
||||
XML_FROM_PARSER = 1 # The XML parser
|
||||
XML_FROM_TREE = 2 # The tree module
|
||||
XML_FROM_NAMESPACE = 3 # The XML Namespace module
|
||||
XML_FROM_DTD = 4 # The XML DTD validation with parser contex
|
||||
XML_FROM_HTML = 5 # The HTML parser
|
||||
XML_FROM_MEMORY = 6 # The memory allocator
|
||||
XML_FROM_OUTPUT = 7 # The serialization code
|
||||
XML_FROM_IO = 8 # The Input/Output stack
|
||||
XML_FROM_FTP = 9 # The FTP module
|
||||
XML_FROM_HTTP = 10 # The HTTP module
|
||||
XML_FROM_XINCLUDE = 11 # The XInclude processing
|
||||
XML_FROM_XPATH = 12 # The XPath module
|
||||
XML_FROM_XPOINTER = 13 # The XPointer module
|
||||
XML_FROM_REGEXP = 14 # The regular expressions module
|
||||
XML_FROM_DATATYPE = 15 # The W3C XML Schemas Datatype module
|
||||
XML_FROM_SCHEMASP = 16 # The W3C XML Schemas parser module
|
||||
XML_FROM_SCHEMASV = 17 # The W3C XML Schemas validation module
|
||||
XML_FROM_RELAXNGP = 18 # The Relax-NG parser module
|
||||
XML_FROM_RELAXNGV = 19 # The Relax-NG validator module
|
||||
XML_FROM_CATALOG = 20 # The Catalog module
|
||||
XML_FROM_C14N = 21 # The Canonicalization module
|
||||
XML_FROM_XSLT = 22 # The XSLT engine from libxslt
|
||||
XML_FROM_VALID = 23 # The XML DTD validation with valid context
|
||||
XML_FROM_CHECK = 24 # The error checking module
|
||||
XML_FROM_WRITER = 25 # The xmlwriter module
|
||||
XML_FROM_MODULE = 26 # The dynamically loaded module modul
|
||||
XML_FROM_I18N = 27 # The module handling character conversion
|
||||
XML_FROM_SCHEMATRONV = 28 # The Schematron validator module
|
||||
XML_FROM_BUFFER = 29 # The buffers module
|
||||
XML_FROM_URI = 30 # The URI module
|
||||
|
||||
ctypedef enum xmlParserErrors:
|
||||
XML_ERR_OK = 0
|
||||
XML_ERR_INTERNAL_ERROR = 1
|
||||
XML_ERR_NO_MEMORY = 2
|
||||
XML_ERR_DOCUMENT_START = 3
|
||||
XML_ERR_DOCUMENT_EMPTY = 4
|
||||
XML_ERR_DOCUMENT_END = 5
|
||||
XML_ERR_INVALID_HEX_CHARREF = 6
|
||||
XML_ERR_INVALID_DEC_CHARREF = 7
|
||||
XML_ERR_INVALID_CHARREF = 8
|
||||
XML_ERR_INVALID_CHAR = 9
|
||||
XML_ERR_CHARREF_AT_EOF = 10
|
||||
XML_ERR_CHARREF_IN_PROLOG = 11
|
||||
XML_ERR_CHARREF_IN_EPILOG = 12
|
||||
XML_ERR_CHARREF_IN_DTD = 13
|
||||
XML_ERR_ENTITYREF_AT_EOF = 14
|
||||
XML_ERR_ENTITYREF_IN_PROLOG = 15
|
||||
XML_ERR_ENTITYREF_IN_EPILOG = 16
|
||||
XML_ERR_ENTITYREF_IN_DTD = 17
|
||||
XML_ERR_PEREF_AT_EOF = 18
|
||||
XML_ERR_PEREF_IN_PROLOG = 19
|
||||
XML_ERR_PEREF_IN_EPILOG = 20
|
||||
XML_ERR_PEREF_IN_INT_SUBSET = 21
|
||||
XML_ERR_ENTITYREF_NO_NAME = 22
|
||||
XML_ERR_ENTITYREF_SEMICOL_MISSING = 23
|
||||
XML_ERR_PEREF_NO_NAME = 24
|
||||
XML_ERR_PEREF_SEMICOL_MISSING = 25
|
||||
XML_ERR_UNDECLARED_ENTITY = 26
|
||||
XML_WAR_UNDECLARED_ENTITY = 27
|
||||
XML_ERR_UNPARSED_ENTITY = 28
|
||||
XML_ERR_ENTITY_IS_EXTERNAL = 29
|
||||
XML_ERR_ENTITY_IS_PARAMETER = 30
|
||||
XML_ERR_UNKNOWN_ENCODING = 31
|
||||
XML_ERR_UNSUPPORTED_ENCODING = 32
|
||||
XML_ERR_STRING_NOT_STARTED = 33
|
||||
XML_ERR_STRING_NOT_CLOSED = 34
|
||||
XML_ERR_NS_DECL_ERROR = 35
|
||||
XML_ERR_ENTITY_NOT_STARTED = 36
|
||||
XML_ERR_ENTITY_NOT_FINISHED = 37
|
||||
XML_ERR_LT_IN_ATTRIBUTE = 38
|
||||
XML_ERR_ATTRIBUTE_NOT_STARTED = 39
|
||||
XML_ERR_ATTRIBUTE_NOT_FINISHED = 40
|
||||
XML_ERR_ATTRIBUTE_WITHOUT_VALUE = 41
|
||||
XML_ERR_ATTRIBUTE_REDEFINED = 42
|
||||
XML_ERR_LITERAL_NOT_STARTED = 43
|
||||
XML_ERR_LITERAL_NOT_FINISHED = 44
|
||||
XML_ERR_COMMENT_NOT_FINISHED = 45
|
||||
XML_ERR_PI_NOT_STARTED = 46
|
||||
XML_ERR_PI_NOT_FINISHED = 47
|
||||
XML_ERR_NOTATION_NOT_STARTED = 48
|
||||
XML_ERR_NOTATION_NOT_FINISHED = 49
|
||||
XML_ERR_ATTLIST_NOT_STARTED = 50
|
||||
XML_ERR_ATTLIST_NOT_FINISHED = 51
|
||||
XML_ERR_MIXED_NOT_STARTED = 52
|
||||
XML_ERR_MIXED_NOT_FINISHED = 53
|
||||
XML_ERR_ELEMCONTENT_NOT_STARTED = 54
|
||||
XML_ERR_ELEMCONTENT_NOT_FINISHED = 55
|
||||
XML_ERR_XMLDECL_NOT_STARTED = 56
|
||||
XML_ERR_XMLDECL_NOT_FINISHED = 57
|
||||
XML_ERR_CONDSEC_NOT_STARTED = 58
|
||||
XML_ERR_CONDSEC_NOT_FINISHED = 59
|
||||
XML_ERR_EXT_SUBSET_NOT_FINISHED = 60
|
||||
XML_ERR_DOCTYPE_NOT_FINISHED = 61
|
||||
XML_ERR_MISPLACED_CDATA_END = 62
|
||||
XML_ERR_CDATA_NOT_FINISHED = 63
|
||||
XML_ERR_RESERVED_XML_NAME = 64
|
||||
XML_ERR_SPACE_REQUIRED = 65
|
||||
XML_ERR_SEPARATOR_REQUIRED = 66
|
||||
XML_ERR_NMTOKEN_REQUIRED = 67
|
||||
XML_ERR_NAME_REQUIRED = 68
|
||||
XML_ERR_PCDATA_REQUIRED = 69
|
||||
XML_ERR_URI_REQUIRED = 70
|
||||
XML_ERR_PUBID_REQUIRED = 71
|
||||
XML_ERR_LT_REQUIRED = 72
|
||||
XML_ERR_GT_REQUIRED = 73
|
||||
XML_ERR_LTSLASH_REQUIRED = 74
|
||||
XML_ERR_EQUAL_REQUIRED = 75
|
||||
XML_ERR_TAG_NAME_MISMATCH = 76
|
||||
XML_ERR_TAG_NOT_FINISHED = 77
|
||||
XML_ERR_STANDALONE_VALUE = 78
|
||||
XML_ERR_ENCODING_NAME = 79
|
||||
XML_ERR_HYPHEN_IN_COMMENT = 80
|
||||
XML_ERR_INVALID_ENCODING = 81
|
||||
XML_ERR_EXT_ENTITY_STANDALONE = 82
|
||||
XML_ERR_CONDSEC_INVALID = 83
|
||||
XML_ERR_VALUE_REQUIRED = 84
|
||||
XML_ERR_NOT_WELL_BALANCED = 85
|
||||
XML_ERR_EXTRA_CONTENT = 86
|
||||
XML_ERR_ENTITY_CHAR_ERROR = 87
|
||||
XML_ERR_ENTITY_PE_INTERNAL = 88
|
||||
XML_ERR_ENTITY_LOOP = 89
|
||||
XML_ERR_ENTITY_BOUNDARY = 90
|
||||
XML_ERR_INVALID_URI = 91
|
||||
XML_ERR_URI_FRAGMENT = 92
|
||||
XML_WAR_CATALOG_PI = 93
|
||||
XML_ERR_NO_DTD = 94
|
||||
XML_ERR_CONDSEC_INVALID_KEYWORD = 95
|
||||
XML_ERR_VERSION_MISSING = 96
|
||||
XML_WAR_UNKNOWN_VERSION = 97
|
||||
XML_WAR_LANG_VALUE = 98
|
||||
XML_WAR_NS_URI = 99
|
||||
XML_WAR_NS_URI_RELATIVE = 100
|
||||
XML_ERR_MISSING_ENCODING = 101
|
||||
XML_WAR_SPACE_VALUE = 102
|
||||
XML_ERR_NOT_STANDALONE = 103
|
||||
XML_ERR_ENTITY_PROCESSING = 104
|
||||
XML_ERR_NOTATION_PROCESSING = 105
|
||||
XML_WAR_NS_COLUMN = 106
|
||||
XML_WAR_ENTITY_REDEFINED = 107
|
||||
XML_ERR_UNKNOWN_VERSION = 108
|
||||
XML_ERR_VERSION_MISMATCH = 109
|
||||
XML_ERR_NAME_TOO_LONG = 110
|
||||
XML_ERR_USER_STOP = 111
|
||||
XML_ERR_COMMENT_ABRUPTLY_ENDED = 112
|
||||
XML_NS_ERR_XML_NAMESPACE = 200
|
||||
XML_NS_ERR_UNDEFINED_NAMESPACE = 201
|
||||
XML_NS_ERR_QNAME = 202
|
||||
XML_NS_ERR_ATTRIBUTE_REDEFINED = 203
|
||||
XML_NS_ERR_EMPTY = 204
|
||||
XML_NS_ERR_COLON = 205
|
||||
XML_DTD_ATTRIBUTE_DEFAULT = 500
|
||||
XML_DTD_ATTRIBUTE_REDEFINED = 501
|
||||
XML_DTD_ATTRIBUTE_VALUE = 502
|
||||
XML_DTD_CONTENT_ERROR = 503
|
||||
XML_DTD_CONTENT_MODEL = 504
|
||||
XML_DTD_CONTENT_NOT_DETERMINIST = 505
|
||||
XML_DTD_DIFFERENT_PREFIX = 506
|
||||
XML_DTD_ELEM_DEFAULT_NAMESPACE = 507
|
||||
XML_DTD_ELEM_NAMESPACE = 508
|
||||
XML_DTD_ELEM_REDEFINED = 509
|
||||
XML_DTD_EMPTY_NOTATION = 510
|
||||
XML_DTD_ENTITY_TYPE = 511
|
||||
XML_DTD_ID_FIXED = 512
|
||||
XML_DTD_ID_REDEFINED = 513
|
||||
XML_DTD_ID_SUBSET = 514
|
||||
XML_DTD_INVALID_CHILD = 515
|
||||
XML_DTD_INVALID_DEFAULT = 516
|
||||
XML_DTD_LOAD_ERROR = 517
|
||||
XML_DTD_MISSING_ATTRIBUTE = 518
|
||||
XML_DTD_MIXED_CORRUPT = 519
|
||||
XML_DTD_MULTIPLE_ID = 520
|
||||
XML_DTD_NO_DOC = 521
|
||||
XML_DTD_NO_DTD = 522
|
||||
XML_DTD_NO_ELEM_NAME = 523
|
||||
XML_DTD_NO_PREFIX = 524
|
||||
XML_DTD_NO_ROOT = 525
|
||||
XML_DTD_NOTATION_REDEFINED = 526
|
||||
XML_DTD_NOTATION_VALUE = 527
|
||||
XML_DTD_NOT_EMPTY = 528
|
||||
XML_DTD_NOT_PCDATA = 529
|
||||
XML_DTD_NOT_STANDALONE = 530
|
||||
XML_DTD_ROOT_NAME = 531
|
||||
XML_DTD_STANDALONE_WHITE_SPACE = 532
|
||||
XML_DTD_UNKNOWN_ATTRIBUTE = 533
|
||||
XML_DTD_UNKNOWN_ELEM = 534
|
||||
XML_DTD_UNKNOWN_ENTITY = 535
|
||||
XML_DTD_UNKNOWN_ID = 536
|
||||
XML_DTD_UNKNOWN_NOTATION = 537
|
||||
XML_DTD_STANDALONE_DEFAULTED = 538
|
||||
XML_DTD_XMLID_VALUE = 539
|
||||
XML_DTD_XMLID_TYPE = 540
|
||||
XML_DTD_DUP_TOKEN = 541
|
||||
XML_HTML_STRUCURE_ERROR = 800
|
||||
XML_HTML_UNKNOWN_TAG = 801
|
||||
XML_RNGP_ANYNAME_ATTR_ANCESTOR = 1000
|
||||
XML_RNGP_ATTR_CONFLICT = 1001
|
||||
XML_RNGP_ATTRIBUTE_CHILDREN = 1002
|
||||
XML_RNGP_ATTRIBUTE_CONTENT = 1003
|
||||
XML_RNGP_ATTRIBUTE_EMPTY = 1004
|
||||
XML_RNGP_ATTRIBUTE_NOOP = 1005
|
||||
XML_RNGP_CHOICE_CONTENT = 1006
|
||||
XML_RNGP_CHOICE_EMPTY = 1007
|
||||
XML_RNGP_CREATE_FAILURE = 1008
|
||||
XML_RNGP_DATA_CONTENT = 1009
|
||||
XML_RNGP_DEF_CHOICE_AND_INTERLEAVE = 1010
|
||||
XML_RNGP_DEFINE_CREATE_FAILED = 1011
|
||||
XML_RNGP_DEFINE_EMPTY = 1012
|
||||
XML_RNGP_DEFINE_MISSING = 1013
|
||||
XML_RNGP_DEFINE_NAME_MISSING = 1014
|
||||
XML_RNGP_ELEM_CONTENT_EMPTY = 1015
|
||||
XML_RNGP_ELEM_CONTENT_ERROR = 1016
|
||||
XML_RNGP_ELEMENT_EMPTY = 1017
|
||||
XML_RNGP_ELEMENT_CONTENT = 1018
|
||||
XML_RNGP_ELEMENT_NAME = 1019
|
||||
XML_RNGP_ELEMENT_NO_CONTENT = 1020
|
||||
XML_RNGP_ELEM_TEXT_CONFLICT = 1021
|
||||
XML_RNGP_EMPTY = 1022
|
||||
XML_RNGP_EMPTY_CONSTRUCT = 1023
|
||||
XML_RNGP_EMPTY_CONTENT = 1024
|
||||
XML_RNGP_EMPTY_NOT_EMPTY = 1025
|
||||
XML_RNGP_ERROR_TYPE_LIB = 1026
|
||||
XML_RNGP_EXCEPT_EMPTY = 1027
|
||||
XML_RNGP_EXCEPT_MISSING = 1028
|
||||
XML_RNGP_EXCEPT_MULTIPLE = 1029
|
||||
XML_RNGP_EXCEPT_NO_CONTENT = 1030
|
||||
XML_RNGP_EXTERNALREF_EMTPY = 1031
|
||||
XML_RNGP_EXTERNAL_REF_FAILURE = 1032
|
||||
XML_RNGP_EXTERNALREF_RECURSE = 1033
|
||||
XML_RNGP_FORBIDDEN_ATTRIBUTE = 1034
|
||||
XML_RNGP_FOREIGN_ELEMENT = 1035
|
||||
XML_RNGP_GRAMMAR_CONTENT = 1036
|
||||
XML_RNGP_GRAMMAR_EMPTY = 1037
|
||||
XML_RNGP_GRAMMAR_MISSING = 1038
|
||||
XML_RNGP_GRAMMAR_NO_START = 1039
|
||||
XML_RNGP_GROUP_ATTR_CONFLICT = 1040
|
||||
XML_RNGP_HREF_ERROR = 1041
|
||||
XML_RNGP_INCLUDE_EMPTY = 1042
|
||||
XML_RNGP_INCLUDE_FAILURE = 1043
|
||||
XML_RNGP_INCLUDE_RECURSE = 1044
|
||||
XML_RNGP_INTERLEAVE_ADD = 1045
|
||||
XML_RNGP_INTERLEAVE_CREATE_FAILED = 1046
|
||||
XML_RNGP_INTERLEAVE_EMPTY = 1047
|
||||
XML_RNGP_INTERLEAVE_NO_CONTENT = 1048
|
||||
XML_RNGP_INVALID_DEFINE_NAME = 1049
|
||||
XML_RNGP_INVALID_URI = 1050
|
||||
XML_RNGP_INVALID_VALUE = 1051
|
||||
XML_RNGP_MISSING_HREF = 1052
|
||||
XML_RNGP_NAME_MISSING = 1053
|
||||
XML_RNGP_NEED_COMBINE = 1054
|
||||
XML_RNGP_NOTALLOWED_NOT_EMPTY = 1055
|
||||
XML_RNGP_NSNAME_ATTR_ANCESTOR = 1056
|
||||
XML_RNGP_NSNAME_NO_NS = 1057
|
||||
XML_RNGP_PARAM_FORBIDDEN = 1058
|
||||
XML_RNGP_PARAM_NAME_MISSING = 1059
|
||||
XML_RNGP_PARENTREF_CREATE_FAILED = 1060
|
||||
XML_RNGP_PARENTREF_NAME_INVALID = 1061
|
||||
XML_RNGP_PARENTREF_NO_NAME = 1062
|
||||
XML_RNGP_PARENTREF_NO_PARENT = 1063
|
||||
XML_RNGP_PARENTREF_NOT_EMPTY = 1064
|
||||
XML_RNGP_PARSE_ERROR = 1065
|
||||
XML_RNGP_PAT_ANYNAME_EXCEPT_ANYNAME = 1066
|
||||
XML_RNGP_PAT_ATTR_ATTR = 1067
|
||||
XML_RNGP_PAT_ATTR_ELEM = 1068
|
||||
XML_RNGP_PAT_DATA_EXCEPT_ATTR = 1069
|
||||
XML_RNGP_PAT_DATA_EXCEPT_ELEM = 1070
|
||||
XML_RNGP_PAT_DATA_EXCEPT_EMPTY = 1071
|
||||
XML_RNGP_PAT_DATA_EXCEPT_GROUP = 1072
|
||||
XML_RNGP_PAT_DATA_EXCEPT_INTERLEAVE = 1073
|
||||
XML_RNGP_PAT_DATA_EXCEPT_LIST = 1074
|
||||
XML_RNGP_PAT_DATA_EXCEPT_ONEMORE = 1075
|
||||
XML_RNGP_PAT_DATA_EXCEPT_REF = 1076
|
||||
XML_RNGP_PAT_DATA_EXCEPT_TEXT = 1077
|
||||
XML_RNGP_PAT_LIST_ATTR = 1078
|
||||
XML_RNGP_PAT_LIST_ELEM = 1079
|
||||
XML_RNGP_PAT_LIST_INTERLEAVE = 1080
|
||||
XML_RNGP_PAT_LIST_LIST = 1081
|
||||
XML_RNGP_PAT_LIST_REF = 1082
|
||||
XML_RNGP_PAT_LIST_TEXT = 1083
|
||||
XML_RNGP_PAT_NSNAME_EXCEPT_ANYNAME = 1084
|
||||
XML_RNGP_PAT_NSNAME_EXCEPT_NSNAME = 1085
|
||||
XML_RNGP_PAT_ONEMORE_GROUP_ATTR = 1086
|
||||
XML_RNGP_PAT_ONEMORE_INTERLEAVE_ATTR = 1087
|
||||
XML_RNGP_PAT_START_ATTR = 1088
|
||||
XML_RNGP_PAT_START_DATA = 1089
|
||||
XML_RNGP_PAT_START_EMPTY = 1090
|
||||
XML_RNGP_PAT_START_GROUP = 1091
|
||||
XML_RNGP_PAT_START_INTERLEAVE = 1092
|
||||
XML_RNGP_PAT_START_LIST = 1093
|
||||
XML_RNGP_PAT_START_ONEMORE = 1094
|
||||
XML_RNGP_PAT_START_TEXT = 1095
|
||||
XML_RNGP_PAT_START_VALUE = 1096
|
||||
XML_RNGP_PREFIX_UNDEFINED = 1097
|
||||
XML_RNGP_REF_CREATE_FAILED = 1098
|
||||
XML_RNGP_REF_CYCLE = 1099
|
||||
XML_RNGP_REF_NAME_INVALID = 1100
|
||||
XML_RNGP_REF_NO_DEF = 1101
|
||||
XML_RNGP_REF_NO_NAME = 1102
|
||||
XML_RNGP_REF_NOT_EMPTY = 1103
|
||||
XML_RNGP_START_CHOICE_AND_INTERLEAVE = 1104
|
||||
XML_RNGP_START_CONTENT = 1105
|
||||
XML_RNGP_START_EMPTY = 1106
|
||||
XML_RNGP_START_MISSING = 1107
|
||||
XML_RNGP_TEXT_EXPECTED = 1108
|
||||
XML_RNGP_TEXT_HAS_CHILD = 1109
|
||||
XML_RNGP_TYPE_MISSING = 1110
|
||||
XML_RNGP_TYPE_NOT_FOUND = 1111
|
||||
XML_RNGP_TYPE_VALUE = 1112
|
||||
XML_RNGP_UNKNOWN_ATTRIBUTE = 1113
|
||||
XML_RNGP_UNKNOWN_COMBINE = 1114
|
||||
XML_RNGP_UNKNOWN_CONSTRUCT = 1115
|
||||
XML_RNGP_UNKNOWN_TYPE_LIB = 1116
|
||||
XML_RNGP_URI_FRAGMENT = 1117
|
||||
XML_RNGP_URI_NOT_ABSOLUTE = 1118
|
||||
XML_RNGP_VALUE_EMPTY = 1119
|
||||
XML_RNGP_VALUE_NO_CONTENT = 1120
|
||||
XML_RNGP_XMLNS_NAME = 1121
|
||||
XML_RNGP_XML_NS = 1122
|
||||
XML_XPATH_EXPRESSION_OK = 1200
|
||||
XML_XPATH_NUMBER_ERROR = 1201
|
||||
XML_XPATH_UNFINISHED_LITERAL_ERROR = 1202
|
||||
XML_XPATH_START_LITERAL_ERROR = 1203
|
||||
XML_XPATH_VARIABLE_REF_ERROR = 1204
|
||||
XML_XPATH_UNDEF_VARIABLE_ERROR = 1205
|
||||
XML_XPATH_INVALID_PREDICATE_ERROR = 1206
|
||||
XML_XPATH_EXPR_ERROR = 1207
|
||||
XML_XPATH_UNCLOSED_ERROR = 1208
|
||||
XML_XPATH_UNKNOWN_FUNC_ERROR = 1209
|
||||
XML_XPATH_INVALID_OPERAND = 1210
|
||||
XML_XPATH_INVALID_TYPE = 1211
|
||||
XML_XPATH_INVALID_ARITY = 1212
|
||||
XML_XPATH_INVALID_CTXT_SIZE = 1213
|
||||
XML_XPATH_INVALID_CTXT_POSITION = 1214
|
||||
XML_XPATH_MEMORY_ERROR = 1215
|
||||
XML_XPTR_SYNTAX_ERROR = 1216
|
||||
XML_XPTR_RESOURCE_ERROR = 1217
|
||||
XML_XPTR_SUB_RESOURCE_ERROR = 1218
|
||||
XML_XPATH_UNDEF_PREFIX_ERROR = 1219
|
||||
XML_XPATH_ENCODING_ERROR = 1220
|
||||
XML_XPATH_INVALID_CHAR_ERROR = 1221
|
||||
XML_TREE_INVALID_HEX = 1300
|
||||
XML_TREE_INVALID_DEC = 1301
|
||||
XML_TREE_UNTERMINATED_ENTITY = 1302
|
||||
XML_TREE_NOT_UTF8 = 1303
|
||||
XML_SAVE_NOT_UTF8 = 1400
|
||||
XML_SAVE_CHAR_INVALID = 1401
|
||||
XML_SAVE_NO_DOCTYPE = 1402
|
||||
XML_SAVE_UNKNOWN_ENCODING = 1403
|
||||
XML_REGEXP_COMPILE_ERROR = 1450
|
||||
XML_IO_UNKNOWN = 1500
|
||||
XML_IO_EACCES = 1501
|
||||
XML_IO_EAGAIN = 1502
|
||||
XML_IO_EBADF = 1503
|
||||
XML_IO_EBADMSG = 1504
|
||||
XML_IO_EBUSY = 1505
|
||||
XML_IO_ECANCELED = 1506
|
||||
XML_IO_ECHILD = 1507
|
||||
XML_IO_EDEADLK = 1508
|
||||
XML_IO_EDOM = 1509
|
||||
XML_IO_EEXIST = 1510
|
||||
XML_IO_EFAULT = 1511
|
||||
XML_IO_EFBIG = 1512
|
||||
XML_IO_EINPROGRESS = 1513
|
||||
XML_IO_EINTR = 1514
|
||||
XML_IO_EINVAL = 1515
|
||||
XML_IO_EIO = 1516
|
||||
XML_IO_EISDIR = 1517
|
||||
XML_IO_EMFILE = 1518
|
||||
XML_IO_EMLINK = 1519
|
||||
XML_IO_EMSGSIZE = 1520
|
||||
XML_IO_ENAMETOOLONG = 1521
|
||||
XML_IO_ENFILE = 1522
|
||||
XML_IO_ENODEV = 1523
|
||||
XML_IO_ENOENT = 1524
|
||||
XML_IO_ENOEXEC = 1525
|
||||
XML_IO_ENOLCK = 1526
|
||||
XML_IO_ENOMEM = 1527
|
||||
XML_IO_ENOSPC = 1528
|
||||
XML_IO_ENOSYS = 1529
|
||||
XML_IO_ENOTDIR = 1530
|
||||
XML_IO_ENOTEMPTY = 1531
|
||||
XML_IO_ENOTSUP = 1532
|
||||
XML_IO_ENOTTY = 1533
|
||||
XML_IO_ENXIO = 1534
|
||||
XML_IO_EPERM = 1535
|
||||
XML_IO_EPIPE = 1536
|
||||
XML_IO_ERANGE = 1537
|
||||
XML_IO_EROFS = 1538
|
||||
XML_IO_ESPIPE = 1539
|
||||
XML_IO_ESRCH = 1540
|
||||
XML_IO_ETIMEDOUT = 1541
|
||||
XML_IO_EXDEV = 1542
|
||||
XML_IO_NETWORK_ATTEMPT = 1543
|
||||
XML_IO_ENCODER = 1544
|
||||
XML_IO_FLUSH = 1545
|
||||
XML_IO_WRITE = 1546
|
||||
XML_IO_NO_INPUT = 1547
|
||||
XML_IO_BUFFER_FULL = 1548
|
||||
XML_IO_LOAD_ERROR = 1549
|
||||
XML_IO_ENOTSOCK = 1550
|
||||
XML_IO_EISCONN = 1551
|
||||
XML_IO_ECONNREFUSED = 1552
|
||||
XML_IO_ENETUNREACH = 1553
|
||||
XML_IO_EADDRINUSE = 1554
|
||||
XML_IO_EALREADY = 1555
|
||||
XML_IO_EAFNOSUPPORT = 1556
|
||||
XML_XINCLUDE_RECURSION = 1600
|
||||
XML_XINCLUDE_PARSE_VALUE = 1601
|
||||
XML_XINCLUDE_ENTITY_DEF_MISMATCH = 1602
|
||||
XML_XINCLUDE_NO_HREF = 1603
|
||||
XML_XINCLUDE_NO_FALLBACK = 1604
|
||||
XML_XINCLUDE_HREF_URI = 1605
|
||||
XML_XINCLUDE_TEXT_FRAGMENT = 1606
|
||||
XML_XINCLUDE_TEXT_DOCUMENT = 1607
|
||||
XML_XINCLUDE_INVALID_CHAR = 1608
|
||||
XML_XINCLUDE_BUILD_FAILED = 1609
|
||||
XML_XINCLUDE_UNKNOWN_ENCODING = 1610
|
||||
XML_XINCLUDE_MULTIPLE_ROOT = 1611
|
||||
XML_XINCLUDE_XPTR_FAILED = 1612
|
||||
XML_XINCLUDE_XPTR_RESULT = 1613
|
||||
XML_XINCLUDE_INCLUDE_IN_INCLUDE = 1614
|
||||
XML_XINCLUDE_FALLBACKS_IN_INCLUDE = 1615
|
||||
XML_XINCLUDE_FALLBACK_NOT_IN_INCLUDE = 1616
|
||||
XML_XINCLUDE_DEPRECATED_NS = 1617
|
||||
XML_XINCLUDE_FRAGMENT_ID = 1618
|
||||
XML_CATALOG_MISSING_ATTR = 1650
|
||||
XML_CATALOG_ENTRY_BROKEN = 1651
|
||||
XML_CATALOG_PREFER_VALUE = 1652
|
||||
XML_CATALOG_NOT_CATALOG = 1653
|
||||
XML_CATALOG_RECURSION = 1654
|
||||
XML_SCHEMAP_PREFIX_UNDEFINED = 1700
|
||||
XML_SCHEMAP_ATTRFORMDEFAULT_VALUE = 1701
|
||||
XML_SCHEMAP_ATTRGRP_NONAME_NOREF = 1702
|
||||
XML_SCHEMAP_ATTR_NONAME_NOREF = 1703
|
||||
XML_SCHEMAP_COMPLEXTYPE_NONAME_NOREF = 1704
|
||||
XML_SCHEMAP_ELEMFORMDEFAULT_VALUE = 1705
|
||||
XML_SCHEMAP_ELEM_NONAME_NOREF = 1706
|
||||
XML_SCHEMAP_EXTENSION_NO_BASE = 1707
|
||||
XML_SCHEMAP_FACET_NO_VALUE = 1708
|
||||
XML_SCHEMAP_FAILED_BUILD_IMPORT = 1709
|
||||
XML_SCHEMAP_GROUP_NONAME_NOREF = 1710
|
||||
XML_SCHEMAP_IMPORT_NAMESPACE_NOT_URI = 1711
|
||||
XML_SCHEMAP_IMPORT_REDEFINE_NSNAME = 1712
|
||||
XML_SCHEMAP_IMPORT_SCHEMA_NOT_URI = 1713
|
||||
XML_SCHEMAP_INVALID_BOOLEAN = 1714
|
||||
XML_SCHEMAP_INVALID_ENUM = 1715
|
||||
XML_SCHEMAP_INVALID_FACET = 1716
|
||||
XML_SCHEMAP_INVALID_FACET_VALUE = 1717
|
||||
XML_SCHEMAP_INVALID_MAXOCCURS = 1718
|
||||
XML_SCHEMAP_INVALID_MINOCCURS = 1719
|
||||
XML_SCHEMAP_INVALID_REF_AND_SUBTYPE = 1720
|
||||
XML_SCHEMAP_INVALID_WHITE_SPACE = 1721
|
||||
XML_SCHEMAP_NOATTR_NOREF = 1722
|
||||
XML_SCHEMAP_NOTATION_NO_NAME = 1723
|
||||
XML_SCHEMAP_NOTYPE_NOREF = 1724
|
||||
XML_SCHEMAP_REF_AND_SUBTYPE = 1725
|
||||
XML_SCHEMAP_RESTRICTION_NONAME_NOREF = 1726
|
||||
XML_SCHEMAP_SIMPLETYPE_NONAME = 1727
|
||||
XML_SCHEMAP_TYPE_AND_SUBTYPE = 1728
|
||||
XML_SCHEMAP_UNKNOWN_ALL_CHILD = 1729
|
||||
XML_SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD = 1730
|
||||
XML_SCHEMAP_UNKNOWN_ATTR_CHILD = 1731
|
||||
XML_SCHEMAP_UNKNOWN_ATTRGRP_CHILD = 1732
|
||||
XML_SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP = 1733
|
||||
XML_SCHEMAP_UNKNOWN_BASE_TYPE = 1734
|
||||
XML_SCHEMAP_UNKNOWN_CHOICE_CHILD = 1735
|
||||
XML_SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD = 1736
|
||||
XML_SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD = 1737
|
||||
XML_SCHEMAP_UNKNOWN_ELEM_CHILD = 1738
|
||||
XML_SCHEMAP_UNKNOWN_EXTENSION_CHILD = 1739
|
||||
XML_SCHEMAP_UNKNOWN_FACET_CHILD = 1740
|
||||
XML_SCHEMAP_UNKNOWN_FACET_TYPE = 1741
|
||||
XML_SCHEMAP_UNKNOWN_GROUP_CHILD = 1742
|
||||
XML_SCHEMAP_UNKNOWN_IMPORT_CHILD = 1743
|
||||
XML_SCHEMAP_UNKNOWN_LIST_CHILD = 1744
|
||||
XML_SCHEMAP_UNKNOWN_NOTATION_CHILD = 1745
|
||||
XML_SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD = 1746
|
||||
XML_SCHEMAP_UNKNOWN_REF = 1747
|
||||
XML_SCHEMAP_UNKNOWN_RESTRICTION_CHILD = 1748
|
||||
XML_SCHEMAP_UNKNOWN_SCHEMAS_CHILD = 1749
|
||||
XML_SCHEMAP_UNKNOWN_SEQUENCE_CHILD = 1750
|
||||
XML_SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD = 1751
|
||||
XML_SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD = 1752
|
||||
XML_SCHEMAP_UNKNOWN_TYPE = 1753
|
||||
XML_SCHEMAP_UNKNOWN_UNION_CHILD = 1754
|
||||
XML_SCHEMAP_ELEM_DEFAULT_FIXED = 1755
|
||||
XML_SCHEMAP_REGEXP_INVALID = 1756
|
||||
XML_SCHEMAP_FAILED_LOAD = 1757
|
||||
XML_SCHEMAP_NOTHING_TO_PARSE = 1758
|
||||
XML_SCHEMAP_NOROOT = 1759
|
||||
XML_SCHEMAP_REDEFINED_GROUP = 1760
|
||||
XML_SCHEMAP_REDEFINED_TYPE = 1761
|
||||
XML_SCHEMAP_REDEFINED_ELEMENT = 1762
|
||||
XML_SCHEMAP_REDEFINED_ATTRGROUP = 1763
|
||||
XML_SCHEMAP_REDEFINED_ATTR = 1764
|
||||
XML_SCHEMAP_REDEFINED_NOTATION = 1765
|
||||
XML_SCHEMAP_FAILED_PARSE = 1766
|
||||
XML_SCHEMAP_UNKNOWN_PREFIX = 1767
|
||||
XML_SCHEMAP_DEF_AND_PREFIX = 1768
|
||||
XML_SCHEMAP_UNKNOWN_INCLUDE_CHILD = 1769
|
||||
XML_SCHEMAP_INCLUDE_SCHEMA_NOT_URI = 1770
|
||||
XML_SCHEMAP_INCLUDE_SCHEMA_NO_URI = 1771
|
||||
XML_SCHEMAP_NOT_SCHEMA = 1772
|
||||
XML_SCHEMAP_UNKNOWN_MEMBER_TYPE = 1773
|
||||
XML_SCHEMAP_INVALID_ATTR_USE = 1774
|
||||
XML_SCHEMAP_RECURSIVE = 1775
|
||||
XML_SCHEMAP_SUPERNUMEROUS_LIST_ITEM_TYPE = 1776
|
||||
XML_SCHEMAP_INVALID_ATTR_COMBINATION = 1777
|
||||
XML_SCHEMAP_INVALID_ATTR_INLINE_COMBINATION = 1778
|
||||
XML_SCHEMAP_MISSING_SIMPLETYPE_CHILD = 1779
|
||||
XML_SCHEMAP_INVALID_ATTR_NAME = 1780
|
||||
XML_SCHEMAP_REF_AND_CONTENT = 1781
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_1 = 1782
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_2 = 1783
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_3 = 1784
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_4 = 1785
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_5 = 1786
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_1 = 1787
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_1 = 1788
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_2 = 1789
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_2 = 1790
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_3 = 1791
|
||||
XML_SCHEMAP_WILDCARD_INVALID_NS_MEMBER = 1792
|
||||
XML_SCHEMAP_INTERSECTION_NOT_EXPRESSIBLE = 1793
|
||||
XML_SCHEMAP_UNION_NOT_EXPRESSIBLE = 1794
|
||||
XML_SCHEMAP_SRC_IMPORT_3_1 = 1795
|
||||
XML_SCHEMAP_SRC_IMPORT_3_2 = 1796
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_1 = 1797
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_2 = 1798
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_3 = 1799
|
||||
XML_SCHEMAP_COS_CT_EXTENDS_1_3 = 1800
|
||||
XML_SCHEMAV_NOROOT = 1801
|
||||
XML_SCHEMAV_UNDECLAREDELEM = 1802
|
||||
XML_SCHEMAV_NOTTOPLEVEL = 1803
|
||||
XML_SCHEMAV_MISSING = 1804
|
||||
XML_SCHEMAV_WRONGELEM = 1805
|
||||
XML_SCHEMAV_NOTYPE = 1806
|
||||
XML_SCHEMAV_NOROLLBACK = 1807
|
||||
XML_SCHEMAV_ISABSTRACT = 1808
|
||||
XML_SCHEMAV_NOTEMPTY = 1809
|
||||
XML_SCHEMAV_ELEMCONT = 1810
|
||||
XML_SCHEMAV_HAVEDEFAULT = 1811
|
||||
XML_SCHEMAV_NOTNILLABLE = 1812
|
||||
XML_SCHEMAV_EXTRACONTENT = 1813
|
||||
XML_SCHEMAV_INVALIDATTR = 1814
|
||||
XML_SCHEMAV_INVALIDELEM = 1815
|
||||
XML_SCHEMAV_NOTDETERMINIST = 1816
|
||||
XML_SCHEMAV_CONSTRUCT = 1817
|
||||
XML_SCHEMAV_INTERNAL = 1818
|
||||
XML_SCHEMAV_NOTSIMPLE = 1819
|
||||
XML_SCHEMAV_ATTRUNKNOWN = 1820
|
||||
XML_SCHEMAV_ATTRINVALID = 1821
|
||||
XML_SCHEMAV_VALUE = 1822
|
||||
XML_SCHEMAV_FACET = 1823
|
||||
XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_1 = 1824
|
||||
XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_2 = 1825
|
||||
XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_3 = 1826
|
||||
XML_SCHEMAV_CVC_TYPE_3_1_1 = 1827
|
||||
XML_SCHEMAV_CVC_TYPE_3_1_2 = 1828
|
||||
XML_SCHEMAV_CVC_FACET_VALID = 1829
|
||||
XML_SCHEMAV_CVC_LENGTH_VALID = 1830
|
||||
XML_SCHEMAV_CVC_MINLENGTH_VALID = 1831
|
||||
XML_SCHEMAV_CVC_MAXLENGTH_VALID = 1832
|
||||
XML_SCHEMAV_CVC_MININCLUSIVE_VALID = 1833
|
||||
XML_SCHEMAV_CVC_MAXINCLUSIVE_VALID = 1834
|
||||
XML_SCHEMAV_CVC_MINEXCLUSIVE_VALID = 1835
|
||||
XML_SCHEMAV_CVC_MAXEXCLUSIVE_VALID = 1836
|
||||
XML_SCHEMAV_CVC_TOTALDIGITS_VALID = 1837
|
||||
XML_SCHEMAV_CVC_FRACTIONDIGITS_VALID = 1838
|
||||
XML_SCHEMAV_CVC_PATTERN_VALID = 1839
|
||||
XML_SCHEMAV_CVC_ENUMERATION_VALID = 1840
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_2_1 = 1841
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_2_2 = 1842
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_2_3 = 1843
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_2_4 = 1844
|
||||
XML_SCHEMAV_CVC_ELT_1 = 1845
|
||||
XML_SCHEMAV_CVC_ELT_2 = 1846
|
||||
XML_SCHEMAV_CVC_ELT_3_1 = 1847
|
||||
XML_SCHEMAV_CVC_ELT_3_2_1 = 1848
|
||||
XML_SCHEMAV_CVC_ELT_3_2_2 = 1849
|
||||
XML_SCHEMAV_CVC_ELT_4_1 = 1850
|
||||
XML_SCHEMAV_CVC_ELT_4_2 = 1851
|
||||
XML_SCHEMAV_CVC_ELT_4_3 = 1852
|
||||
XML_SCHEMAV_CVC_ELT_5_1_1 = 1853
|
||||
XML_SCHEMAV_CVC_ELT_5_1_2 = 1854
|
||||
XML_SCHEMAV_CVC_ELT_5_2_1 = 1855
|
||||
XML_SCHEMAV_CVC_ELT_5_2_2_1 = 1856
|
||||
XML_SCHEMAV_CVC_ELT_5_2_2_2_1 = 1857
|
||||
XML_SCHEMAV_CVC_ELT_5_2_2_2_2 = 1858
|
||||
XML_SCHEMAV_CVC_ELT_6 = 1859
|
||||
XML_SCHEMAV_CVC_ELT_7 = 1860
|
||||
XML_SCHEMAV_CVC_ATTRIBUTE_1 = 1861
|
||||
XML_SCHEMAV_CVC_ATTRIBUTE_2 = 1862
|
||||
XML_SCHEMAV_CVC_ATTRIBUTE_3 = 1863
|
||||
XML_SCHEMAV_CVC_ATTRIBUTE_4 = 1864
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_3_1 = 1865
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_1 = 1866
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_2 = 1867
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_4 = 1868
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_5_1 = 1869
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_5_2 = 1870
|
||||
XML_SCHEMAV_ELEMENT_CONTENT = 1871
|
||||
XML_SCHEMAV_DOCUMENT_ELEMENT_MISSING = 1872
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_1 = 1873
|
||||
XML_SCHEMAV_CVC_AU = 1874
|
||||
XML_SCHEMAV_CVC_TYPE_1 = 1875
|
||||
XML_SCHEMAV_CVC_TYPE_2 = 1876
|
||||
XML_SCHEMAV_CVC_IDC = 1877
|
||||
XML_SCHEMAV_CVC_WILDCARD = 1878
|
||||
XML_SCHEMAV_MISC = 1879
|
||||
XML_XPTR_UNKNOWN_SCHEME = 1900
|
||||
XML_XPTR_CHILDSEQ_START = 1901
|
||||
XML_XPTR_EVAL_FAILED = 1902
|
||||
XML_XPTR_EXTRA_OBJECTS = 1903
|
||||
XML_C14N_CREATE_CTXT = 1950
|
||||
XML_C14N_REQUIRES_UTF8 = 1951
|
||||
XML_C14N_CREATE_STACK = 1952
|
||||
XML_C14N_INVALID_NODE = 1953
|
||||
XML_C14N_UNKNOW_NODE = 1954
|
||||
XML_C14N_RELATIVE_NAMESPACE = 1955
|
||||
XML_FTP_PASV_ANSWER = 2000
|
||||
XML_FTP_EPSV_ANSWER = 2001
|
||||
XML_FTP_ACCNT = 2002
|
||||
XML_FTP_URL_SYNTAX = 2003
|
||||
XML_HTTP_URL_SYNTAX = 2020
|
||||
XML_HTTP_USE_IP = 2021
|
||||
XML_HTTP_UNKNOWN_HOST = 2022
|
||||
XML_SCHEMAP_SRC_SIMPLE_TYPE_1 = 3000
|
||||
XML_SCHEMAP_SRC_SIMPLE_TYPE_2 = 3001
|
||||
XML_SCHEMAP_SRC_SIMPLE_TYPE_3 = 3002
|
||||
XML_SCHEMAP_SRC_SIMPLE_TYPE_4 = 3003
|
||||
XML_SCHEMAP_SRC_RESOLVE = 3004
|
||||
XML_SCHEMAP_SRC_RESTRICTION_BASE_OR_SIMPLETYPE = 3005
|
||||
XML_SCHEMAP_SRC_LIST_ITEMTYPE_OR_SIMPLETYPE = 3006
|
||||
XML_SCHEMAP_SRC_UNION_MEMBERTYPES_OR_SIMPLETYPES = 3007
|
||||
XML_SCHEMAP_ST_PROPS_CORRECT_1 = 3008
|
||||
XML_SCHEMAP_ST_PROPS_CORRECT_2 = 3009
|
||||
XML_SCHEMAP_ST_PROPS_CORRECT_3 = 3010
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_1_1 = 3011
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_1_2 = 3012
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_1_3_1 = 3013
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_1_3_2 = 3014
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_1 = 3015
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_1 = 3016
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_2 = 3017
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_1 = 3018
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_2 = 3019
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_3 = 3020
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_4 = 3021
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_5 = 3022
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_1 = 3023
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1 = 3024
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1_2 = 3025
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_2 = 3026
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_1 = 3027
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_3 = 3028
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_4 = 3029
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_5 = 3030
|
||||
XML_SCHEMAP_COS_ST_DERIVED_OK_2_1 = 3031
|
||||
XML_SCHEMAP_COS_ST_DERIVED_OK_2_2 = 3032
|
||||
XML_SCHEMAP_S4S_ELEM_NOT_ALLOWED = 3033
|
||||
XML_SCHEMAP_S4S_ELEM_MISSING = 3034
|
||||
XML_SCHEMAP_S4S_ATTR_NOT_ALLOWED = 3035
|
||||
XML_SCHEMAP_S4S_ATTR_MISSING = 3036
|
||||
XML_SCHEMAP_S4S_ATTR_INVALID_VALUE = 3037
|
||||
XML_SCHEMAP_SRC_ELEMENT_1 = 3038
|
||||
XML_SCHEMAP_SRC_ELEMENT_2_1 = 3039
|
||||
XML_SCHEMAP_SRC_ELEMENT_2_2 = 3040
|
||||
XML_SCHEMAP_SRC_ELEMENT_3 = 3041
|
||||
XML_SCHEMAP_P_PROPS_CORRECT_1 = 3042
|
||||
XML_SCHEMAP_P_PROPS_CORRECT_2_1 = 3043
|
||||
XML_SCHEMAP_P_PROPS_CORRECT_2_2 = 3044
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_2 = 3045
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_3 = 3046
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_4 = 3047
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_5 = 3048
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_6 = 3049
|
||||
XML_SCHEMAP_SRC_INCLUDE = 3050
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_1 = 3051
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_2 = 3052
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_3_1 = 3053
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_3_2 = 3054
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_4 = 3055
|
||||
XML_SCHEMAP_NO_XMLNS = 3056
|
||||
XML_SCHEMAP_NO_XSI = 3057
|
||||
XML_SCHEMAP_COS_VALID_DEFAULT_1 = 3058
|
||||
XML_SCHEMAP_COS_VALID_DEFAULT_2_1 = 3059
|
||||
XML_SCHEMAP_COS_VALID_DEFAULT_2_2_1 = 3060
|
||||
XML_SCHEMAP_COS_VALID_DEFAULT_2_2_2 = 3061
|
||||
XML_SCHEMAP_CVC_SIMPLE_TYPE = 3062
|
||||
XML_SCHEMAP_COS_CT_EXTENDS_1_1 = 3063
|
||||
XML_SCHEMAP_SRC_IMPORT_1_1 = 3064
|
||||
XML_SCHEMAP_SRC_IMPORT_1_2 = 3065
|
||||
XML_SCHEMAP_SRC_IMPORT_2 = 3066
|
||||
XML_SCHEMAP_SRC_IMPORT_2_1 = 3067
|
||||
XML_SCHEMAP_SRC_IMPORT_2_2 = 3068
|
||||
XML_SCHEMAP_INTERNAL = 3069 # 3069 non-W3C
|
||||
XML_SCHEMAP_NOT_DETERMINISTIC = 3070 # 3070 non-W3C
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_1 = 3071
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_2 = 3072
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_3 = 3073
|
||||
XML_SCHEMAP_MG_PROPS_CORRECT_1 = 3074
|
||||
XML_SCHEMAP_MG_PROPS_CORRECT_2 = 3075
|
||||
XML_SCHEMAP_SRC_CT_1 = 3076
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3 = 3077
|
||||
XML_SCHEMAP_AU_PROPS_CORRECT_2 = 3078
|
||||
XML_SCHEMAP_A_PROPS_CORRECT_2 = 3079
|
||||
XML_SCHEMAP_C_PROPS_CORRECT = 3080
|
||||
XML_SCHEMAP_SRC_REDEFINE = 3081
|
||||
XML_SCHEMAP_SRC_IMPORT = 3082
|
||||
XML_SCHEMAP_WARN_SKIP_SCHEMA = 3083
|
||||
XML_SCHEMAP_WARN_UNLOCATED_SCHEMA = 3084
|
||||
XML_SCHEMAP_WARN_ATTR_REDECL_PROH = 3085
|
||||
XML_SCHEMAP_WARN_ATTR_POINTLESS_PROH = 3086 # 3085
|
||||
XML_SCHEMAP_AG_PROPS_CORRECT = 3087 # 3086
|
||||
XML_SCHEMAP_COS_CT_EXTENDS_1_2 = 3088 # 3087
|
||||
XML_SCHEMAP_AU_PROPS_CORRECT = 3089 # 3088
|
||||
XML_SCHEMAP_A_PROPS_CORRECT_3 = 3090 # 3089
|
||||
XML_SCHEMAP_COS_ALL_LIMITED = 3091 # 3090
|
||||
XML_SCHEMATRONV_ASSERT = 4000
|
||||
XML_SCHEMATRONV_REPORT = 4001
|
||||
XML_MODULE_OPEN = 4900
|
||||
XML_MODULE_CLOSE = 4901
|
||||
XML_CHECK_FOUND_ELEMENT = 5000
|
||||
XML_CHECK_FOUND_ATTRIBUTE = 5001
|
||||
XML_CHECK_FOUND_TEXT = 5002
|
||||
XML_CHECK_FOUND_CDATA = 5003
|
||||
XML_CHECK_FOUND_ENTITYREF = 5004
|
||||
XML_CHECK_FOUND_ENTITY = 5005
|
||||
XML_CHECK_FOUND_PI = 5006
|
||||
XML_CHECK_FOUND_COMMENT = 5007
|
||||
XML_CHECK_FOUND_DOCTYPE = 5008
|
||||
XML_CHECK_FOUND_FRAGMENT = 5009
|
||||
XML_CHECK_FOUND_NOTATION = 5010
|
||||
XML_CHECK_UNKNOWN_NODE = 5011
|
||||
XML_CHECK_ENTITY_TYPE = 5012
|
||||
XML_CHECK_NO_PARENT = 5013
|
||||
XML_CHECK_NO_DOC = 5014
|
||||
XML_CHECK_NO_NAME = 5015
|
||||
XML_CHECK_NO_ELEM = 5016
|
||||
XML_CHECK_WRONG_DOC = 5017
|
||||
XML_CHECK_NO_PREV = 5018
|
||||
XML_CHECK_WRONG_PREV = 5019
|
||||
XML_CHECK_NO_NEXT = 5020
|
||||
XML_CHECK_WRONG_NEXT = 5021
|
||||
XML_CHECK_NOT_DTD = 5022
|
||||
XML_CHECK_NOT_ATTR = 5023
|
||||
XML_CHECK_NOT_ATTR_DECL = 5024
|
||||
XML_CHECK_NOT_ELEM_DECL = 5025
|
||||
XML_CHECK_NOT_ENTITY_DECL = 5026
|
||||
XML_CHECK_NOT_NS_DECL = 5027
|
||||
XML_CHECK_NO_HREF = 5028
|
||||
XML_CHECK_WRONG_PARENT = 5029
|
||||
XML_CHECK_NS_SCOPE = 5030
|
||||
XML_CHECK_NS_ANCESTOR = 5031
|
||||
XML_CHECK_NOT_UTF8 = 5032
|
||||
XML_CHECK_NO_DICT = 5033
|
||||
XML_CHECK_NOT_NCNAME = 5034
|
||||
XML_CHECK_OUTSIDE_DICT = 5035
|
||||
XML_CHECK_WRONG_NAME = 5036
|
||||
XML_CHECK_NAME_NOT_NULL = 5037
|
||||
XML_I18N_NO_NAME = 6000
|
||||
XML_I18N_NO_HANDLER = 6001
|
||||
XML_I18N_EXCESS_HANDLER = 6002
|
||||
XML_I18N_CONV_FAILED = 6003
|
||||
XML_I18N_NO_OUTPUT = 6004
|
||||
XML_BUF_OVERFLOW = 7000
|
||||
|
||||
ctypedef enum xmlRelaxNGValidErr:
|
||||
XML_RELAXNG_OK = 0
|
||||
XML_RELAXNG_ERR_MEMORY = 1
|
||||
XML_RELAXNG_ERR_TYPE = 2
|
||||
XML_RELAXNG_ERR_TYPEVAL = 3
|
||||
XML_RELAXNG_ERR_DUPID = 4
|
||||
XML_RELAXNG_ERR_TYPECMP = 5
|
||||
XML_RELAXNG_ERR_NOSTATE = 6
|
||||
XML_RELAXNG_ERR_NODEFINE = 7
|
||||
XML_RELAXNG_ERR_LISTEXTRA = 8
|
||||
XML_RELAXNG_ERR_LISTEMPTY = 9
|
||||
XML_RELAXNG_ERR_INTERNODATA = 10
|
||||
XML_RELAXNG_ERR_INTERSEQ = 11
|
||||
XML_RELAXNG_ERR_INTEREXTRA = 12
|
||||
XML_RELAXNG_ERR_ELEMNAME = 13
|
||||
XML_RELAXNG_ERR_ATTRNAME = 14
|
||||
XML_RELAXNG_ERR_ELEMNONS = 15
|
||||
XML_RELAXNG_ERR_ATTRNONS = 16
|
||||
XML_RELAXNG_ERR_ELEMWRONGNS = 17
|
||||
XML_RELAXNG_ERR_ATTRWRONGNS = 18
|
||||
XML_RELAXNG_ERR_ELEMEXTRANS = 19
|
||||
XML_RELAXNG_ERR_ATTREXTRANS = 20
|
||||
XML_RELAXNG_ERR_ELEMNOTEMPTY = 21
|
||||
XML_RELAXNG_ERR_NOELEM = 22
|
||||
XML_RELAXNG_ERR_NOTELEM = 23
|
||||
XML_RELAXNG_ERR_ATTRVALID = 24
|
||||
XML_RELAXNG_ERR_CONTENTVALID = 25
|
||||
XML_RELAXNG_ERR_EXTRACONTENT = 26
|
||||
XML_RELAXNG_ERR_INVALIDATTR = 27
|
||||
XML_RELAXNG_ERR_DATAELEM = 28
|
||||
XML_RELAXNG_ERR_VALELEM = 29
|
||||
XML_RELAXNG_ERR_LISTELEM = 30
|
||||
XML_RELAXNG_ERR_DATATYPE = 31
|
||||
XML_RELAXNG_ERR_VALUE = 32
|
||||
XML_RELAXNG_ERR_LIST = 33
|
||||
XML_RELAXNG_ERR_NOGRAMMAR = 34
|
||||
XML_RELAXNG_ERR_EXTRADATA = 35
|
||||
XML_RELAXNG_ERR_LACKDATA = 36
|
||||
XML_RELAXNG_ERR_INTERNAL = 37
|
||||
XML_RELAXNG_ERR_ELEMWRONG = 38
|
||||
XML_RELAXNG_ERR_TEXTWRONG = 39
|
||||
# --- END: GENERATED CONSTANTS ---
|
||||
|
||||
cdef extern from "libxml/xmlerror.h":
|
||||
ctypedef struct xmlError:
|
||||
int domain
|
||||
int code
|
||||
char* message
|
||||
xmlErrorLevel level
|
||||
char* file
|
||||
char* str1
|
||||
char* str2
|
||||
char* str3
|
||||
int line
|
||||
int int1
|
||||
int int2
|
||||
void* node
|
||||
|
||||
ctypedef void (*xmlGenericErrorFunc)(void* ctxt, char* msg, ...) nogil
|
||||
ctypedef void (*xmlStructuredErrorFunc)(void* userData,
|
||||
xmlError* error) nogil
|
||||
|
||||
cdef void xmlSetGenericErrorFunc(
|
||||
void* ctxt, xmlGenericErrorFunc func) nogil
|
||||
cdef void xmlSetStructuredErrorFunc(
|
||||
void* ctxt, xmlStructuredErrorFunc func) nogil
|
||||
|
||||
cdef extern from "libxml/globals.h":
|
||||
cdef xmlStructuredErrorFunc xmlStructuredError
|
||||
cdef void* xmlStructuredErrorContext
|
||||
250
.venv/lib/python3.7/site-packages/lxml/includes/xmlparser.pxd
Normal file
250
.venv/lib/python3.7/site-packages/lxml/includes/xmlparser.pxd
Normal file
@@ -0,0 +1,250 @@
|
||||
from libc.string cimport const_char
|
||||
|
||||
from lxml.includes.tree cimport (
|
||||
xmlDoc, xmlNode, xmlDict, xmlDtd, xmlChar, const_xmlChar)
|
||||
from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
|
||||
from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc
|
||||
|
||||
|
||||
cdef extern from "libxml/parser.h":
|
||||
ctypedef void (*startElementNsSAX2Func)(void* ctx,
|
||||
const_xmlChar* localname,
|
||||
const_xmlChar* prefix,
|
||||
const_xmlChar* URI,
|
||||
int nb_namespaces,
|
||||
const_xmlChar** namespaces,
|
||||
int nb_attributes,
|
||||
int nb_defaulted,
|
||||
const_xmlChar** attributes)
|
||||
|
||||
ctypedef void (*endElementNsSAX2Func)(void* ctx,
|
||||
const_xmlChar* localname,
|
||||
const_xmlChar* prefix,
|
||||
const_xmlChar* URI)
|
||||
|
||||
ctypedef void (*startElementSAXFunc)(void* ctx, const_xmlChar* name, const_xmlChar** atts)
|
||||
|
||||
ctypedef void (*endElementSAXFunc)(void* ctx, const_xmlChar* name)
|
||||
|
||||
ctypedef void (*charactersSAXFunc)(void* ctx, const_xmlChar* ch, int len)
|
||||
|
||||
ctypedef void (*cdataBlockSAXFunc)(void* ctx, const_xmlChar* value, int len)
|
||||
|
||||
ctypedef void (*commentSAXFunc)(void* ctx, const_xmlChar* value)
|
||||
|
||||
ctypedef void (*processingInstructionSAXFunc)(void* ctx,
|
||||
const_xmlChar* target,
|
||||
const_xmlChar* data)
|
||||
|
||||
ctypedef void (*internalSubsetSAXFunc)(void* ctx,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* externalID,
|
||||
const_xmlChar* systemID)
|
||||
|
||||
ctypedef void (*endDocumentSAXFunc)(void* ctx)
|
||||
|
||||
ctypedef void (*startDocumentSAXFunc)(void* ctx)
|
||||
|
||||
ctypedef void (*referenceSAXFunc)(void * ctx, const_xmlChar* name)
|
||||
|
||||
cdef int XML_SAX2_MAGIC
|
||||
|
||||
cdef extern from "libxml/tree.h":
|
||||
ctypedef struct xmlParserInput:
|
||||
int line
|
||||
int length
|
||||
const_xmlChar* base
|
||||
const_xmlChar* cur
|
||||
const_xmlChar* end
|
||||
const_char *filename
|
||||
|
||||
ctypedef struct xmlParserInputBuffer:
|
||||
void* context
|
||||
xmlInputReadCallback readcallback
|
||||
xmlInputCloseCallback closecallback
|
||||
|
||||
ctypedef struct xmlSAXHandlerV1:
|
||||
# same as xmlSAXHandler, but without namespaces
|
||||
pass
|
||||
|
||||
ctypedef struct xmlSAXHandler:
|
||||
internalSubsetSAXFunc internalSubset
|
||||
startElementNsSAX2Func startElementNs
|
||||
endElementNsSAX2Func endElementNs
|
||||
startElementSAXFunc startElement
|
||||
endElementSAXFunc endElement
|
||||
charactersSAXFunc characters
|
||||
cdataBlockSAXFunc cdataBlock
|
||||
referenceSAXFunc reference
|
||||
commentSAXFunc comment
|
||||
processingInstructionSAXFunc processingInstruction
|
||||
startDocumentSAXFunc startDocument
|
||||
endDocumentSAXFunc endDocument
|
||||
int initialized
|
||||
xmlStructuredErrorFunc serror
|
||||
void* _private
|
||||
|
||||
|
||||
cdef extern from "libxml/SAX2.h" nogil:
|
||||
cdef void xmlSAX2StartDocument(void* ctxt)
|
||||
|
||||
|
||||
cdef extern from "libxml/xmlIO.h" nogil:
|
||||
cdef xmlParserInputBuffer* xmlAllocParserInputBuffer(int enc)
|
||||
|
||||
|
||||
cdef extern from "libxml/parser.h":
|
||||
|
||||
cdef xmlDict* xmlDictCreate() nogil
|
||||
cdef xmlDict* xmlDictCreateSub(xmlDict* subdict) nogil
|
||||
cdef void xmlDictFree(xmlDict* sub) nogil
|
||||
cdef int xmlDictReference(xmlDict* dict) nogil
|
||||
|
||||
cdef int XML_COMPLETE_ATTRS # SAX option for adding DTD default attributes
|
||||
cdef int XML_SKIP_IDS # SAX option for not building an XML ID dict
|
||||
|
||||
ctypedef enum xmlParserInputState:
|
||||
XML_PARSER_EOF = -1 # nothing is to be parsed
|
||||
XML_PARSER_START = 0 # nothing has been parsed
|
||||
XML_PARSER_MISC = 1 # Misc* before int subset
|
||||
XML_PARSER_PI = 2 # Within a processing instruction
|
||||
XML_PARSER_DTD = 3 # within some DTD content
|
||||
XML_PARSER_PROLOG = 4 # Misc* after internal subset
|
||||
XML_PARSER_COMMENT = 5 # within a comment
|
||||
XML_PARSER_START_TAG = 6 # within a start tag
|
||||
XML_PARSER_CONTENT = 7 # within the content
|
||||
XML_PARSER_CDATA_SECTION = 8 # within a CDATA section
|
||||
XML_PARSER_END_TAG = 9 # within a closing tag
|
||||
XML_PARSER_ENTITY_DECL = 10 # within an entity declaration
|
||||
XML_PARSER_ENTITY_VALUE = 11 # within an entity value in a decl
|
||||
XML_PARSER_ATTRIBUTE_VALUE = 12 # within an attribute value
|
||||
XML_PARSER_SYSTEM_LITERAL = 13 # within a SYSTEM value
|
||||
XML_PARSER_EPILOG = 14 # the Misc* after the last end tag
|
||||
XML_PARSER_IGNORE = 15 # within an IGNORED section
|
||||
XML_PARSER_PUBLIC_LITERAL = 16 # within a PUBLIC value
|
||||
|
||||
|
||||
ctypedef struct xmlParserCtxt:
|
||||
xmlDoc* myDoc
|
||||
xmlDict* dict
|
||||
int dictNames
|
||||
void* _private
|
||||
bint wellFormed
|
||||
bint recovery
|
||||
int options
|
||||
bint disableSAX
|
||||
int errNo
|
||||
xmlParserInputState instate
|
||||
bint replaceEntities
|
||||
int loadsubset # != 0 if enabled, int value == why
|
||||
bint validate
|
||||
xmlError lastError
|
||||
xmlNode* node
|
||||
xmlSAXHandler* sax
|
||||
void* userData
|
||||
int* spaceTab
|
||||
int spaceMax
|
||||
int nsNr
|
||||
bint html
|
||||
bint progressive
|
||||
int inSubset
|
||||
int charset
|
||||
xmlParserInput* input
|
||||
|
||||
ctypedef enum xmlParserOption:
|
||||
XML_PARSE_RECOVER = 1 # recover on errors
|
||||
XML_PARSE_NOENT = 2 # substitute entities
|
||||
XML_PARSE_DTDLOAD = 4 # load the external subset
|
||||
XML_PARSE_DTDATTR = 8 # default DTD attributes
|
||||
XML_PARSE_DTDVALID = 16 # validate with the DTD
|
||||
XML_PARSE_NOERROR = 32 # suppress error reports
|
||||
XML_PARSE_NOWARNING = 64 # suppress warning reports
|
||||
XML_PARSE_PEDANTIC = 128 # pedantic error reporting
|
||||
XML_PARSE_NOBLANKS = 256 # remove blank nodes
|
||||
XML_PARSE_SAX1 = 512 # use the SAX1 interface internally
|
||||
XML_PARSE_XINCLUDE = 1024 # Implement XInclude substitution
|
||||
XML_PARSE_NONET = 2048 # Forbid network access
|
||||
XML_PARSE_NODICT = 4096 # Do not reuse the context dictionary
|
||||
XML_PARSE_NSCLEAN = 8192 # remove redundant namespaces declarations
|
||||
XML_PARSE_NOCDATA = 16384 # merge CDATA as text nodes
|
||||
XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes
|
||||
# libxml2 2.6.21+ only:
|
||||
XML_PARSE_COMPACT = 65536 # compact small text nodes
|
||||
# libxml2 2.7.0+ only:
|
||||
XML_PARSE_OLD10 = 131072 # parse using XML-1.0 before update 5
|
||||
XML_PARSE_NOBASEFIX = 262144 # do not fixup XINCLUDE xml:base uris
|
||||
XML_PARSE_HUGE = 524288 # relax any hardcoded limit from the parser
|
||||
# libxml2 2.7.3+ only:
|
||||
XML_PARSE_OLDSAX = 1048576 # parse using SAX2 interface before 2.7.0
|
||||
# libxml2 2.8.0+ only:
|
||||
XML_PARSE_IGNORE_ENC = 2097152 # ignore internal document encoding hint
|
||||
# libxml2 2.9.0+ only:
|
||||
XML_PARSE_BIG_LINES = 4194304 # Store big lines numbers in text PSVI field
|
||||
|
||||
cdef void xmlInitParser() nogil
|
||||
cdef void xmlCleanupParser() nogil
|
||||
|
||||
cdef int xmlLineNumbersDefault(int onoff) nogil
|
||||
cdef xmlParserCtxt* xmlNewParserCtxt() nogil
|
||||
cdef xmlParserInput* xmlNewIOInputStream(xmlParserCtxt* ctxt,
|
||||
xmlParserInputBuffer* input,
|
||||
int enc) nogil
|
||||
cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil
|
||||
cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil
|
||||
cdef void xmlCtxtReset(xmlParserCtxt* ctxt) nogil
|
||||
cdef void xmlClearParserCtxt(xmlParserCtxt* ctxt) nogil
|
||||
cdef int xmlParseChunk(xmlParserCtxt* ctxt,
|
||||
char* chunk, int size, int terminate) nogil
|
||||
cdef xmlDoc* xmlCtxtReadDoc(xmlParserCtxt* ctxt,
|
||||
char* cur, char* URL, char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt,
|
||||
char* filename, char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* xmlCtxtReadIO(xmlParserCtxt* ctxt,
|
||||
xmlInputReadCallback ioread,
|
||||
xmlInputCloseCallback ioclose,
|
||||
void* ioctx,
|
||||
char* URL, char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* xmlCtxtReadMemory(xmlParserCtxt* ctxt,
|
||||
char* buffer, int size,
|
||||
char* filename, const_char* encoding,
|
||||
int options) nogil
|
||||
|
||||
# iterparse:
|
||||
|
||||
cdef xmlParserCtxt* xmlCreatePushParserCtxt(xmlSAXHandler* sax,
|
||||
void* user_data,
|
||||
char* chunk,
|
||||
int size,
|
||||
char* filename) nogil
|
||||
|
||||
cdef int xmlCtxtResetPush(xmlParserCtxt* ctxt,
|
||||
char* chunk,
|
||||
int size,
|
||||
char* filename,
|
||||
char* encoding) nogil
|
||||
|
||||
# entity loaders:
|
||||
|
||||
ctypedef xmlParserInput* (*xmlExternalEntityLoader)(
|
||||
const_char * URL, const_char * ID, xmlParserCtxt* context) nogil
|
||||
cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() nogil
|
||||
cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) nogil
|
||||
|
||||
# DTDs:
|
||||
|
||||
cdef xmlDtd* xmlParseDTD(const_xmlChar* ExternalID, const_xmlChar* SystemID) nogil
|
||||
cdef xmlDtd* xmlIOParseDTD(xmlSAXHandler* sax,
|
||||
xmlParserInputBuffer* input,
|
||||
int enc) nogil
|
||||
|
||||
cdef extern from "libxml/parserInternals.h":
|
||||
cdef xmlParserInput* xmlNewInputStream(xmlParserCtxt* ctxt)
|
||||
cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt,
|
||||
char* buffer) nogil
|
||||
cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt,
|
||||
char* filename) nogil
|
||||
cdef void xmlFreeInputStream(xmlParserInput* input) nogil
|
||||
cdef int xmlSwitchEncoding(xmlParserCtxt* ctxt, int enc) nogil
|
||||
@@ -0,0 +1,35 @@
|
||||
from lxml.includes.tree cimport xmlDoc
|
||||
from lxml.includes.xmlparser cimport xmlSAXHandler
|
||||
from lxml.includes.xmlerror cimport xmlStructuredErrorFunc
|
||||
|
||||
cdef extern from "libxml/xmlschemas.h":
|
||||
ctypedef struct xmlSchema
|
||||
ctypedef struct xmlSchemaParserCtxt
|
||||
|
||||
ctypedef struct xmlSchemaSAXPlugStruct
|
||||
ctypedef struct xmlSchemaValidCtxt
|
||||
|
||||
ctypedef enum xmlSchemaValidOption:
|
||||
XML_SCHEMA_VAL_VC_I_CREATE = 1
|
||||
|
||||
cdef xmlSchemaValidCtxt* xmlSchemaNewValidCtxt(xmlSchema* schema) nogil
|
||||
cdef void xmlSchemaSetParserStructuredErrors(xmlSchemaParserCtxt* ctxt,
|
||||
xmlStructuredErrorFunc serror, void *ctx)
|
||||
cdef void xmlSchemaSetValidStructuredErrors(xmlSchemaValidCtxt* ctxt,
|
||||
xmlStructuredErrorFunc serror, void *ctx)
|
||||
|
||||
cdef int xmlSchemaValidateDoc(xmlSchemaValidCtxt* ctxt, xmlDoc* doc) nogil
|
||||
cdef xmlSchema* xmlSchemaParse(xmlSchemaParserCtxt* ctxt) nogil
|
||||
cdef xmlSchemaParserCtxt* xmlSchemaNewParserCtxt(char* URL) nogil
|
||||
cdef xmlSchemaParserCtxt* xmlSchemaNewDocParserCtxt(xmlDoc* doc) nogil
|
||||
cdef void xmlSchemaFree(xmlSchema* schema) nogil
|
||||
cdef void xmlSchemaFreeParserCtxt(xmlSchemaParserCtxt* ctxt) nogil
|
||||
cdef void xmlSchemaFreeValidCtxt(xmlSchemaValidCtxt* ctxt) nogil
|
||||
cdef int xmlSchemaSetValidOptions(xmlSchemaValidCtxt* ctxt,
|
||||
int options) nogil
|
||||
|
||||
cdef xmlSchemaSAXPlugStruct* xmlSchemaSAXPlug(xmlSchemaValidCtxt* ctxt,
|
||||
xmlSAXHandler** sax,
|
||||
void** data) nogil
|
||||
cdef int xmlSchemaSAXUnplug(xmlSchemaSAXPlugStruct* sax_plug)
|
||||
cdef int xmlSchemaIsValid(xmlSchemaValidCtxt* ctxt)
|
||||
135
.venv/lib/python3.7/site-packages/lxml/includes/xpath.pxd
Normal file
135
.venv/lib/python3.7/site-packages/lxml/includes/xpath.pxd
Normal file
@@ -0,0 +1,135 @@
|
||||
from lxml.includes cimport tree
|
||||
from lxml.includes cimport xmlerror
|
||||
|
||||
from libc.string cimport const_char
|
||||
from lxml.includes.tree cimport xmlChar, const_xmlChar
|
||||
|
||||
cdef extern from "libxml/xpath.h":
|
||||
ctypedef enum xmlXPathObjectType:
|
||||
XPATH_UNDEFINED = 0
|
||||
XPATH_NODESET = 1
|
||||
XPATH_BOOLEAN = 2
|
||||
XPATH_NUMBER = 3
|
||||
XPATH_STRING = 4
|
||||
XPATH_POINT = 5
|
||||
XPATH_RANGE = 6
|
||||
XPATH_LOCATIONSET = 7
|
||||
XPATH_USERS = 8
|
||||
XPATH_XSLT_TREE = 9
|
||||
|
||||
ctypedef enum xmlXPathError:
|
||||
XPATH_EXPRESSION_OK = 0
|
||||
XPATH_NUMBER_ERROR = 1
|
||||
XPATH_UNFINISHED_LITERAL_ERROR = 2
|
||||
XPATH_START_LITERAL_ERROR = 3
|
||||
XPATH_VARIABLE_REF_ERROR = 4
|
||||
XPATH_UNDEF_VARIABLE_ERROR = 5
|
||||
XPATH_INVALID_PREDICATE_ERROR = 6
|
||||
XPATH_EXPR_ERROR = 7
|
||||
XPATH_UNCLOSED_ERROR = 8
|
||||
XPATH_UNKNOWN_FUNC_ERROR = 9
|
||||
XPATH_INVALID_OPERAND = 10
|
||||
XPATH_INVALID_TYPE = 11
|
||||
XPATH_INVALID_ARITY = 12
|
||||
XPATH_INVALID_CTXT_SIZE = 13
|
||||
XPATH_INVALID_CTXT_POSITION = 14
|
||||
XPATH_MEMORY_ERROR = 15
|
||||
XPTR_SYNTAX_ERROR = 16
|
||||
XPTR_RESOURCE_ERROR = 17
|
||||
XPTR_SUB_RESOURCE_ERROR = 18
|
||||
XPATH_UNDEF_PREFIX_ERROR = 19
|
||||
XPATH_ENCODING_ERROR = 20
|
||||
XPATH_INVALID_CHAR_ERROR = 21
|
||||
XPATH_INVALID_CTXT = 22
|
||||
|
||||
ctypedef struct xmlNodeSet:
|
||||
int nodeNr
|
||||
int nodeMax
|
||||
tree.xmlNode** nodeTab
|
||||
|
||||
ctypedef struct xmlXPathObject:
|
||||
xmlXPathObjectType type
|
||||
xmlNodeSet* nodesetval
|
||||
bint boolval
|
||||
double floatval
|
||||
xmlChar* stringval
|
||||
|
||||
ctypedef struct xmlXPathContext:
|
||||
tree.xmlDoc* doc
|
||||
tree.xmlNode* node
|
||||
tree.xmlDict* dict
|
||||
tree.xmlHashTable* nsHash
|
||||
const_xmlChar* function
|
||||
const_xmlChar* functionURI
|
||||
xmlerror.xmlStructuredErrorFunc error
|
||||
xmlerror.xmlError lastError
|
||||
void* userData
|
||||
|
||||
ctypedef struct xmlXPathParserContext:
|
||||
xmlXPathContext* context
|
||||
xmlXPathObject* value
|
||||
tree.xmlNode* ancestor
|
||||
int error
|
||||
|
||||
ctypedef struct xmlXPathCompExpr
|
||||
|
||||
ctypedef void (*xmlXPathFunction)(xmlXPathParserContext* ctxt, int nargs) nogil
|
||||
ctypedef xmlXPathFunction (*xmlXPathFuncLookupFunc)(void* ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* ns_uri) nogil
|
||||
|
||||
cdef xmlXPathContext* xmlXPathNewContext(tree.xmlDoc* doc) nogil
|
||||
cdef xmlXPathObject* xmlXPathEvalExpression(const_xmlChar* str,
|
||||
xmlXPathContext* ctxt) nogil
|
||||
cdef xmlXPathObject* xmlXPathCompiledEval(xmlXPathCompExpr* comp,
|
||||
xmlXPathContext* ctxt) nogil
|
||||
cdef xmlXPathCompExpr* xmlXPathCompile(const_xmlChar* str) nogil
|
||||
cdef xmlXPathCompExpr* xmlXPathCtxtCompile(xmlXPathContext* ctxt,
|
||||
const_xmlChar* str) nogil
|
||||
cdef void xmlXPathFreeContext(xmlXPathContext* ctxt) nogil
|
||||
cdef void xmlXPathFreeCompExpr(xmlXPathCompExpr* comp) nogil
|
||||
cdef void xmlXPathFreeObject(xmlXPathObject* obj) nogil
|
||||
cdef int xmlXPathRegisterNs(xmlXPathContext* ctxt,
|
||||
const_xmlChar* prefix, const_xmlChar* ns_uri) nogil
|
||||
|
||||
cdef xmlNodeSet* xmlXPathNodeSetCreate(tree.xmlNode* val) nogil
|
||||
cdef void xmlXPathFreeNodeSet(xmlNodeSet* val) nogil
|
||||
|
||||
|
||||
cdef extern from "libxml/xpathInternals.h":
|
||||
cdef int xmlXPathRegisterFunc(xmlXPathContext* ctxt,
|
||||
const_xmlChar* name,
|
||||
xmlXPathFunction f) nogil
|
||||
cdef int xmlXPathRegisterFuncNS(xmlXPathContext* ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* ns_uri,
|
||||
xmlXPathFunction f) nogil
|
||||
cdef void xmlXPathRegisterFuncLookup(xmlXPathContext *ctxt,
|
||||
xmlXPathFuncLookupFunc f,
|
||||
void *funcCtxt) nogil
|
||||
cdef int xmlXPathRegisterVariable(xmlXPathContext *ctxt,
|
||||
const_xmlChar* name,
|
||||
xmlXPathObject* value) nogil
|
||||
cdef int xmlXPathRegisterVariableNS(xmlXPathContext *ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* ns_uri,
|
||||
xmlXPathObject* value) nogil
|
||||
cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt) nogil
|
||||
cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt) nogil
|
||||
cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt) nogil
|
||||
cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value) nogil
|
||||
|
||||
cdef xmlXPathObject* xmlXPathNewCString(const_char *val) nogil
|
||||
cdef xmlXPathObject* xmlXPathWrapCString(const_char * val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewString(const_xmlChar *val) nogil
|
||||
cdef xmlXPathObject* xmlXPathWrapString(const_xmlChar * val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewFloat(double val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewBoolean(int val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewNodeSet(tree.xmlNode* val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewValueTree(tree.xmlNode* val) nogil
|
||||
cdef void xmlXPathNodeSetAdd(xmlNodeSet* cur,
|
||||
tree.xmlNode* val) nogil
|
||||
cdef void xmlXPathNodeSetAddUnique(xmlNodeSet* cur,
|
||||
tree.xmlNode* val) nogil
|
||||
cdef xmlXPathObject* xmlXPathWrapNodeSet(xmlNodeSet* val) nogil
|
||||
cdef void xmlXPathErr(xmlXPathParserContext* ctxt, int error) nogil
|
||||
191
.venv/lib/python3.7/site-packages/lxml/includes/xslt.pxd
Normal file
191
.venv/lib/python3.7/site-packages/lxml/includes/xslt.pxd
Normal file
@@ -0,0 +1,191 @@
|
||||
from lxml.includes.tree cimport xmlDoc, xmlNode, xmlDict, xmlChar, const_xmlChar, xmlOutputBuffer
|
||||
from lxml.includes.xmlerror cimport xmlGenericErrorFunc
|
||||
from lxml.includes.xpath cimport xmlXPathContext, xmlXPathFunction
|
||||
|
||||
from libc.string cimport const_char
|
||||
|
||||
cdef extern from "libxslt/xslt.h":
|
||||
cdef int xsltLibxsltVersion
|
||||
cdef int xsltMaxDepth
|
||||
|
||||
cdef extern from "libxslt/xsltconfig.h":
|
||||
cdef int LIBXSLT_VERSION
|
||||
|
||||
cdef extern from "libxslt/xsltInternals.h":
|
||||
ctypedef enum xsltTransformState:
|
||||
XSLT_STATE_OK # 0
|
||||
XSLT_STATE_ERROR # 1
|
||||
XSLT_STATE_STOPPED # 2
|
||||
|
||||
ctypedef struct xsltDocument:
|
||||
xmlDoc* doc
|
||||
|
||||
ctypedef struct xsltStylesheet:
|
||||
xmlChar* encoding
|
||||
xmlDoc* doc
|
||||
int errors
|
||||
|
||||
ctypedef struct xsltTransformContext:
|
||||
xsltStylesheet* style
|
||||
xmlXPathContext* xpathCtxt
|
||||
xsltDocument* document
|
||||
void* _private
|
||||
xmlDict* dict
|
||||
int profile
|
||||
xmlNode* node
|
||||
xmlDoc* output
|
||||
xmlNode* insert
|
||||
xmlNode* inst
|
||||
xsltTransformState state
|
||||
|
||||
ctypedef struct xsltStackElem
|
||||
|
||||
ctypedef struct xsltTemplate
|
||||
|
||||
cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) nogil
|
||||
cdef void xsltFreeStylesheet(xsltStylesheet* sheet) nogil
|
||||
|
||||
cdef extern from "libxslt/imports.h":
|
||||
# actually defined in "etree_defs.h"
|
||||
cdef void LXML_GET_XSLT_ENCODING(const_xmlChar* result_var, xsltStylesheet* style)
|
||||
|
||||
cdef extern from "libxslt/extensions.h":
|
||||
ctypedef void (*xsltTransformFunction)(xsltTransformContext* ctxt,
|
||||
xmlNode* context_node,
|
||||
xmlNode* inst,
|
||||
void* precomp_unused) nogil
|
||||
|
||||
cdef int xsltRegisterExtFunction(xsltTransformContext* ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* URI,
|
||||
xmlXPathFunction function) nogil
|
||||
cdef int xsltRegisterExtModuleFunction(const_xmlChar* name, const_xmlChar* URI,
|
||||
xmlXPathFunction function) nogil
|
||||
cdef int xsltUnregisterExtModuleFunction(const_xmlChar* name, const_xmlChar* URI)
|
||||
cdef xmlXPathFunction xsltExtModuleFunctionLookup(
|
||||
const_xmlChar* name, const_xmlChar* URI) nogil
|
||||
cdef int xsltRegisterExtPrefix(xsltStylesheet* style,
|
||||
const_xmlChar* prefix, const_xmlChar* URI) nogil
|
||||
cdef int xsltRegisterExtElement(xsltTransformContext* ctxt,
|
||||
const_xmlChar* name, const_xmlChar* URI,
|
||||
xsltTransformFunction function) nogil
|
||||
|
||||
cdef extern from "libxslt/documents.h":
|
||||
ctypedef enum xsltLoadType:
|
||||
XSLT_LOAD_START
|
||||
XSLT_LOAD_STYLESHEET
|
||||
XSLT_LOAD_DOCUMENT
|
||||
|
||||
ctypedef xmlDoc* (*xsltDocLoaderFunc)(const_xmlChar* URI, xmlDict* dict,
|
||||
int options,
|
||||
void* ctxt,
|
||||
xsltLoadType type) nogil
|
||||
cdef xsltDocLoaderFunc xsltDocDefaultLoader
|
||||
cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) nogil
|
||||
|
||||
cdef extern from "libxslt/transform.h":
|
||||
cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc,
|
||||
const_char** params) nogil
|
||||
cdef xmlDoc* xsltApplyStylesheetUser(xsltStylesheet* style, xmlDoc* doc,
|
||||
const_char** params, const_char* output,
|
||||
void* profile,
|
||||
xsltTransformContext* context) nogil
|
||||
cdef void xsltProcessOneNode(xsltTransformContext* ctxt,
|
||||
xmlNode* contextNode,
|
||||
xsltStackElem* params) nogil
|
||||
cdef xsltTransformContext* xsltNewTransformContext(xsltStylesheet* style,
|
||||
xmlDoc* doc) nogil
|
||||
cdef void xsltFreeTransformContext(xsltTransformContext* context) nogil
|
||||
cdef void xsltApplyOneTemplate(xsltTransformContext* ctxt,
|
||||
xmlNode* contextNode, xmlNode* list,
|
||||
xsltTemplate* templ,
|
||||
xsltStackElem* params) nogil
|
||||
|
||||
|
||||
cdef extern from "libxslt/xsltutils.h":
|
||||
cdef int xsltSaveResultToString(xmlChar** doc_txt_ptr,
|
||||
int* doc_txt_len,
|
||||
xmlDoc* result,
|
||||
xsltStylesheet* style) nogil
|
||||
cdef int xsltSaveResultToFilename(const_char *URL,
|
||||
xmlDoc* result,
|
||||
xsltStylesheet* style,
|
||||
int compression) nogil
|
||||
cdef int xsltSaveResultTo(xmlOutputBuffer* buf,
|
||||
xmlDoc* result,
|
||||
xsltStylesheet* style) nogil
|
||||
cdef xmlGenericErrorFunc xsltGenericError
|
||||
cdef void *xsltGenericErrorContext
|
||||
cdef void xsltSetGenericErrorFunc(
|
||||
void* ctxt, void (*handler)(void* ctxt, char* msg, ...)) nogil
|
||||
cdef void xsltSetTransformErrorFunc(
|
||||
xsltTransformContext*, void* ctxt,
|
||||
void (*handler)(void* ctxt, char* msg, ...) nogil) nogil
|
||||
cdef void xsltTransformError(xsltTransformContext* ctxt,
|
||||
xsltStylesheet* style,
|
||||
xmlNode* node, char* msg, ...)
|
||||
cdef void xsltSetCtxtParseOptions(
|
||||
xsltTransformContext* ctxt, int options)
|
||||
|
||||
|
||||
cdef extern from "libxslt/security.h":
|
||||
ctypedef struct xsltSecurityPrefs
|
||||
ctypedef enum xsltSecurityOption:
|
||||
XSLT_SECPREF_READ_FILE = 1
|
||||
XSLT_SECPREF_WRITE_FILE = 2
|
||||
XSLT_SECPREF_CREATE_DIRECTORY = 3
|
||||
XSLT_SECPREF_READ_NETWORK = 4
|
||||
XSLT_SECPREF_WRITE_NETWORK = 5
|
||||
|
||||
ctypedef int (*xsltSecurityCheck)(xsltSecurityPrefs* sec,
|
||||
xsltTransformContext* ctxt,
|
||||
char* value) nogil
|
||||
|
||||
cdef xsltSecurityPrefs* xsltNewSecurityPrefs() nogil
|
||||
cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec) nogil
|
||||
cdef int xsltSecurityForbid(xsltSecurityPrefs* sec,
|
||||
xsltTransformContext* ctxt,
|
||||
char* value) nogil
|
||||
cdef int xsltSecurityAllow(xsltSecurityPrefs* sec,
|
||||
xsltTransformContext* ctxt,
|
||||
char* value) nogil
|
||||
cdef int xsltSetSecurityPrefs(xsltSecurityPrefs* sec,
|
||||
xsltSecurityOption option,
|
||||
xsltSecurityCheck func) nogil
|
||||
cdef xsltSecurityCheck xsltGetSecurityPrefs(
|
||||
xsltSecurityPrefs* sec,
|
||||
xsltSecurityOption option) nogil
|
||||
cdef int xsltSetCtxtSecurityPrefs(xsltSecurityPrefs* sec,
|
||||
xsltTransformContext* ctxt) nogil
|
||||
cdef xmlDoc* xsltGetProfileInformation(xsltTransformContext* ctxt) nogil
|
||||
|
||||
cdef extern from "libxslt/variables.h":
|
||||
cdef int xsltQuoteUserParams(xsltTransformContext* ctxt,
|
||||
const_char** params)
|
||||
cdef int xsltQuoteOneUserParam(xsltTransformContext* ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* value)
|
||||
|
||||
cdef extern from "libxslt/extra.h":
|
||||
const_xmlChar* XSLT_LIBXSLT_NAMESPACE
|
||||
const_xmlChar* XSLT_XALAN_NAMESPACE
|
||||
const_xmlChar* XSLT_SAXON_NAMESPACE
|
||||
const_xmlChar* XSLT_XT_NAMESPACE
|
||||
|
||||
cdef xmlXPathFunction xsltFunctionNodeSet
|
||||
cdef void xsltRegisterAllExtras() nogil
|
||||
|
||||
cdef extern from "libexslt/exslt.h":
|
||||
cdef void exsltRegisterAll() nogil
|
||||
|
||||
# libexslt 1.1.25+
|
||||
const_xmlChar* EXSLT_DATE_NAMESPACE
|
||||
const_xmlChar* EXSLT_SETS_NAMESPACE
|
||||
const_xmlChar* EXSLT_MATH_NAMESPACE
|
||||
const_xmlChar* EXSLT_STRINGS_NAMESPACE
|
||||
|
||||
cdef int exsltDateXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix)
|
||||
cdef int exsltSetsXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix)
|
||||
cdef int exsltMathXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix)
|
||||
cdef int exsltStrXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix)
|
||||
|
||||
334
.venv/lib/python3.7/site-packages/lxml/isoschematron/__init__.py
Normal file
334
.venv/lib/python3.7/site-packages/lxml/isoschematron/__init__.py
Normal file
@@ -0,0 +1,334 @@
|
||||
"""The ``lxml.isoschematron`` package implements ISO Schematron support on top
|
||||
of the pure-xslt 'skeleton' implementation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os.path
|
||||
from lxml import etree as _etree # due to validator __init__ signature
|
||||
|
||||
|
||||
# some compat stuff, borrowed from lxml.html
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
# Python 3
|
||||
unicode = str
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# Python 3
|
||||
basestring = str
|
||||
|
||||
|
||||
__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
|
||||
'iso_abstract_expand', 'iso_svrl_for_xslt1',
|
||||
'svrl_validation_errors', 'schematron_schema_valid',
|
||||
'stylesheet_params', 'Schematron']
|
||||
|
||||
|
||||
# some namespaces
|
||||
#FIXME: Maybe lxml should provide a dedicated place for common namespace
|
||||
#FIXME: definitions?
|
||||
XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
|
||||
RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
|
||||
SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
|
||||
SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
|
||||
|
||||
|
||||
# some helpers
|
||||
_schematron_root = '{%s}schema' % SCHEMATRON_NS
|
||||
_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
|
||||
_resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
|
||||
|
||||
|
||||
# the iso-schematron skeleton implementation steps aka xsl transformations
|
||||
extract_xsd = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
|
||||
extract_rng = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
|
||||
iso_dsdl_include = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
|
||||
'iso_dsdl_include.xsl')))
|
||||
iso_abstract_expand = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
|
||||
'iso_abstract_expand.xsl')))
|
||||
iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir,
|
||||
'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
|
||||
|
||||
|
||||
# svrl result accessors
|
||||
svrl_validation_errors = _etree.XPath(
|
||||
'//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
|
||||
|
||||
|
||||
# RelaxNG validator for schematron schemas
|
||||
schematron_schema_valid = _etree.RelaxNG(
|
||||
file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
|
||||
|
||||
|
||||
def stylesheet_params(**kwargs):
|
||||
"""Convert keyword args to a dictionary of stylesheet parameters.
|
||||
XSL stylesheet parameters must be XPath expressions, i.e.:
|
||||
|
||||
* string expressions, like "'5'"
|
||||
* simple (number) expressions, like "5"
|
||||
* valid XPath expressions, like "/a/b/text()"
|
||||
|
||||
This function converts native Python keyword arguments to stylesheet
|
||||
parameters following these rules:
|
||||
If an arg is a string wrap it with XSLT.strparam().
|
||||
If an arg is an XPath object use its path string.
|
||||
If arg is None raise TypeError.
|
||||
Else convert arg to string.
|
||||
"""
|
||||
result = {}
|
||||
for key, val in kwargs.items():
|
||||
if isinstance(val, basestring):
|
||||
val = _etree.XSLT.strparam(val)
|
||||
elif val is None:
|
||||
raise TypeError('None not allowed as a stylesheet parameter')
|
||||
elif not isinstance(val, _etree.XPath):
|
||||
val = unicode(val)
|
||||
result[key] = val
|
||||
return result
|
||||
|
||||
|
||||
# helper function for use in Schematron __init__
|
||||
def _stylesheet_param_dict(paramsDict, kwargsDict):
|
||||
"""Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
|
||||
stylesheet arguments.
|
||||
kwargsDict entries with a value of None are ignored.
|
||||
"""
|
||||
# beware of changing mutable default arg
|
||||
paramsDict = dict(paramsDict)
|
||||
for k, v in kwargsDict.items():
|
||||
if v is not None: # None values do not override
|
||||
paramsDict[k] = v
|
||||
paramsDict = stylesheet_params(**paramsDict)
|
||||
return paramsDict
|
||||
|
||||
|
||||
class Schematron(_etree._Validator):
|
||||
"""An ISO Schematron validator.
|
||||
|
||||
Pass a root Element or an ElementTree to turn it into a validator.
|
||||
Alternatively, pass a filename as keyword argument 'file' to parse from
|
||||
the file system.
|
||||
|
||||
Schematron is a less well known, but very powerful schema language.
|
||||
The main idea is to use the capabilities of XPath to put restrictions on
|
||||
the structure and the content of XML documents.
|
||||
|
||||
The standard behaviour is to fail on ``failed-assert`` findings only
|
||||
(``ASSERTS_ONLY``). To change this, you can either pass a report filter
|
||||
function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
|
||||
or a custom ``XPath`` object), or subclass isoschematron.Schematron for
|
||||
complete control of the validation process.
|
||||
|
||||
Built on the Schematron language 'reference' skeleton pure-xslt
|
||||
implementation, the validator is created as an XSLT 1.0 stylesheet using
|
||||
these steps:
|
||||
|
||||
0) (Extract from XML Schema or RelaxNG schema)
|
||||
1) Process inclusions
|
||||
2) Process abstract patterns
|
||||
3) Compile the schematron schema to XSLT
|
||||
|
||||
The ``include`` and ``expand`` keyword arguments can be used to switch off
|
||||
steps 1) and 2).
|
||||
To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
|
||||
keyword arguments ``include_params``, ``expand_params`` or
|
||||
``compile_params``.
|
||||
For convenience, the compile-step parameter ``phase`` is also exposed as a
|
||||
keyword argument ``phase``. This takes precedence if the parameter is also
|
||||
given in the parameter dictionary.
|
||||
|
||||
If ``store_schematron`` is set to True, the (included-and-expanded)
|
||||
schematron document tree is stored and available through the ``schematron``
|
||||
property.
|
||||
If ``store_xslt`` is set to True, the validation XSLT document tree will be
|
||||
stored and can be retrieved through the ``validator_xslt`` property.
|
||||
With ``store_report`` set to True (default: False), the resulting validation
|
||||
report document gets stored and can be accessed as the ``validation_report``
|
||||
property.
|
||||
|
||||
Here is a usage example::
|
||||
|
||||
>>> from lxml import etree
|
||||
>>> from lxml.isoschematron import Schematron
|
||||
|
||||
>>> schematron = Schematron(etree.XML('''
|
||||
... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
|
||||
... <pattern id="id_only_attribute">
|
||||
... <title>id is the only permitted attribute name</title>
|
||||
... <rule context="*">
|
||||
... <report test="@*[not(name()='id')]">Attribute
|
||||
... <name path="@*[not(name()='id')]"/> is forbidden<name/>
|
||||
... </report>
|
||||
... </rule>
|
||||
... </pattern>
|
||||
... </schema>'''),
|
||||
... error_finder=Schematron.ASSERTS_AND_REPORTS)
|
||||
|
||||
>>> xml = etree.XML('''
|
||||
... <AAA name="aaa">
|
||||
... <BBB id="bbb"/>
|
||||
... <CCC color="ccc"/>
|
||||
... </AAA>
|
||||
... ''')
|
||||
|
||||
>>> schematron.validate(xml)
|
||||
False
|
||||
|
||||
>>> xml = etree.XML('''
|
||||
... <AAA id="aaa">
|
||||
... <BBB id="bbb"/>
|
||||
... <CCC/>
|
||||
... </AAA>
|
||||
... ''')
|
||||
|
||||
>>> schematron.validate(xml)
|
||||
True
|
||||
"""
|
||||
|
||||
# libxml2 error categorization for validation errors
|
||||
_domain = _etree.ErrorDomains.SCHEMATRONV
|
||||
_level = _etree.ErrorLevels.ERROR
|
||||
_error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
|
||||
|
||||
# convenience definitions for common behaviours
|
||||
ASSERTS_ONLY = svrl_validation_errors # Default
|
||||
ASSERTS_AND_REPORTS = _etree.XPath(
|
||||
'//svrl:failed-assert | //svrl:successful-report',
|
||||
namespaces={'svrl': SVRL_NS})
|
||||
|
||||
def _extract(self, element):
|
||||
"""Extract embedded schematron schema from non-schematron host schema.
|
||||
This method will only be called by __init__ if the given schema document
|
||||
is not a schematron schema by itself.
|
||||
Must return a schematron schema document tree or None.
|
||||
"""
|
||||
schematron = None
|
||||
if element.tag == _xml_schema_root:
|
||||
schematron = self._extract_xsd(element)
|
||||
elif element.nsmap[element.prefix] == RELAXNG_NS:
|
||||
# RelaxNG does not have a single unique root element
|
||||
schematron = self._extract_rng(element)
|
||||
return schematron
|
||||
|
||||
# customization points
|
||||
# etree.XSLT objects that provide the extract, include, expand, compile
|
||||
# steps
|
||||
_extract_xsd = extract_xsd
|
||||
_extract_rng = extract_rng
|
||||
_include = iso_dsdl_include
|
||||
_expand = iso_abstract_expand
|
||||
_compile = iso_svrl_for_xslt1
|
||||
|
||||
# etree.xpath object that determines input document validity when applied to
|
||||
# the svrl result report; must return a list of result elements (empty if
|
||||
# valid)
|
||||
_validation_errors = ASSERTS_ONLY
|
||||
|
||||
def __init__(self, etree=None, file=None, include=True, expand=True,
|
||||
include_params={}, expand_params={}, compile_params={},
|
||||
store_schematron=False, store_xslt=False, store_report=False,
|
||||
phase=None, error_finder=ASSERTS_ONLY):
|
||||
super(Schematron, self).__init__()
|
||||
|
||||
self._store_report = store_report
|
||||
self._schematron = None
|
||||
self._validator_xslt = None
|
||||
self._validation_report = None
|
||||
if error_finder is not self.ASSERTS_ONLY:
|
||||
self._validation_errors = error_finder
|
||||
|
||||
# parse schema document, may be a schematron schema or an XML Schema or
|
||||
# a RelaxNG schema with embedded schematron rules
|
||||
root = None
|
||||
try:
|
||||
if etree is not None:
|
||||
if _etree.iselement(etree):
|
||||
root = etree
|
||||
else:
|
||||
root = etree.getroot()
|
||||
elif file is not None:
|
||||
root = _etree.parse(file).getroot()
|
||||
except Exception:
|
||||
raise _etree.SchematronParseError(
|
||||
"No tree or file given: %s" % sys.exc_info()[1])
|
||||
if root is None:
|
||||
raise ValueError("Empty tree")
|
||||
if root.tag == _schematron_root:
|
||||
schematron = root
|
||||
else:
|
||||
schematron = self._extract(root)
|
||||
if schematron is None:
|
||||
raise _etree.SchematronParseError(
|
||||
"Document is not a schematron schema or schematron-extractable")
|
||||
# perform the iso-schematron skeleton implementation steps to get a
|
||||
# validating xslt
|
||||
if include:
|
||||
schematron = self._include(schematron, **include_params)
|
||||
if expand:
|
||||
schematron = self._expand(schematron, **expand_params)
|
||||
if not schematron_schema_valid(schematron):
|
||||
raise _etree.SchematronParseError(
|
||||
"invalid schematron schema: %s" %
|
||||
schematron_schema_valid.error_log)
|
||||
if store_schematron:
|
||||
self._schematron = schematron
|
||||
# add new compile keyword args here if exposing them
|
||||
compile_kwargs = {'phase': phase}
|
||||
compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)
|
||||
validator_xslt = self._compile(schematron, **compile_params)
|
||||
if store_xslt:
|
||||
self._validator_xslt = validator_xslt
|
||||
self._validator = _etree.XSLT(validator_xslt)
|
||||
|
||||
def __call__(self, etree):
|
||||
"""Validate doc using Schematron.
|
||||
|
||||
Returns true if document is valid, false if not.
|
||||
"""
|
||||
self._clear_error_log()
|
||||
result = self._validator(etree)
|
||||
if self._store_report:
|
||||
self._validation_report = result
|
||||
errors = self._validation_errors(result)
|
||||
if errors:
|
||||
if _etree.iselement(etree):
|
||||
fname = etree.getroottree().docinfo.URL or '<file>'
|
||||
else:
|
||||
fname = etree.docinfo.URL or '<file>'
|
||||
for error in errors:
|
||||
# Does svrl report the line number, anywhere? Don't think so.
|
||||
self._append_log_message(
|
||||
domain=self._domain, type=self._error_type,
|
||||
level=self._level, line=0,
|
||||
message=_etree.tostring(error, encoding='unicode'),
|
||||
filename=fname)
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def schematron(self):
|
||||
"""ISO-schematron schema document (None if object has been initialized
|
||||
with store_schematron=False).
|
||||
"""
|
||||
return self._schematron
|
||||
|
||||
@property
|
||||
def validator_xslt(self):
|
||||
"""ISO-schematron skeleton implementation XSLT validator document (None
|
||||
if object has been initialized with store_xslt=False).
|
||||
"""
|
||||
return self._validator_xslt
|
||||
|
||||
@property
|
||||
def validation_report(self):
|
||||
"""ISO-schematron validation result report (None if result-storing has
|
||||
been turned off).
|
||||
"""
|
||||
return self._validation_report
|
||||
Binary file not shown.
@@ -0,0 +1,709 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Copyright © ISO/IEC 2015 -->
|
||||
<!--
|
||||
The following permission notice and disclaimer shall be included in all
|
||||
copies of this XML schema ("the Schema"), and derivations of the Schema:
|
||||
|
||||
Permission is hereby granted, free of charge in perpetuity, to any
|
||||
person obtaining a copy of the Schema, to use, copy, modify, merge and
|
||||
distribute free of charge, copies of the Schema for the purposes of
|
||||
developing, implementing, installing and using software based on the
|
||||
Schema, and to permit persons to whom the Schema is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
THE SCHEMA IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SCHEMA OR THE USE OR
|
||||
OTHER DEALINGS IN THE SCHEMA.
|
||||
|
||||
In addition, any modified copy of the Schema shall include the following
|
||||
notice:
|
||||
|
||||
"THIS SCHEMA HAS BEEN MODIFIED FROM THE SCHEMA DEFINED IN ISO/IEC 19757-3,
|
||||
AND SHOULD NOT BE INTERPRETED AS COMPLYING WITH THAT STANDARD".
|
||||
-->
|
||||
<grammar ns="http://purl.oclc.org/dsdl/schematron" xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
||||
<start>
|
||||
<ref name="schema"/>
|
||||
</start>
|
||||
<!-- Element declarations -->
|
||||
<define name="schema">
|
||||
<element name="schema">
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="rich"/>
|
||||
<optional>
|
||||
<attribute name="schemaVersion">
|
||||
<ref name="non-empty-string"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="defaultPhase">
|
||||
<data type="IDREF"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="queryBinding">
|
||||
<ref name="non-empty-string"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<group>
|
||||
<optional>
|
||||
<ref name="title"/>
|
||||
</optional>
|
||||
<zeroOrMore>
|
||||
<ref name="ns"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="phase"/>
|
||||
</zeroOrMore>
|
||||
<oneOrMore>
|
||||
<ref name="pattern"/>
|
||||
</oneOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<optional>
|
||||
<ref name="diagnostics"/>
|
||||
</optional>
|
||||
<optional>
|
||||
<!-- edited (lxml): required in standard, optional here (since it can be empty anyway) -->
|
||||
<ref name="properties"/>
|
||||
</optional>
|
||||
</group>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="active">
|
||||
<element name="active">
|
||||
<attribute name="pattern">
|
||||
<data type="IDREF"/>
|
||||
</attribute>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="dir"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="assert">
|
||||
<element name="assert">
|
||||
<attribute name="test">
|
||||
<ref name="exprValue"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<attribute name="flag">
|
||||
<ref name="flagValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="diagnostics">
|
||||
<data type="IDREFS"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="properties">
|
||||
<data type="IDREFS"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="rich"/>
|
||||
<ref name="linkable"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="name"/>
|
||||
<ref name="value-of"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="dir"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="diagnostic">
|
||||
<element name="diagnostic">
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
<ref name="rich"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="value-of"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="dir"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="diagnostics">
|
||||
<element name="diagnostics">
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="diagnostic"/>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="dir">
|
||||
<element name="dir">
|
||||
<optional>
|
||||
<attribute name="value">
|
||||
<choice>
|
||||
<value>ltr</value>
|
||||
<value>rtl</value>
|
||||
</choice>
|
||||
</attribute>
|
||||
</optional>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<text/>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="emph">
|
||||
<element name="emph">
|
||||
<text/>
|
||||
</element>
|
||||
</define>
|
||||
<define name="extends">
|
||||
<element name="extends">
|
||||
<choice>
|
||||
<attribute name="rule">
|
||||
<data type="IDREF"/>
|
||||
</attribute>
|
||||
<attribute name="href">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
</choice>
|
||||
<ref name="foreign-empty"/>
|
||||
</element>
|
||||
</define>
|
||||
<define name="let">
|
||||
<element name="let">
|
||||
<attribute name="name">
|
||||
<ref name="nameValue"/>
|
||||
</attribute>
|
||||
<choice>
|
||||
<attribute name="value">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</attribute>
|
||||
<oneOrMore>
|
||||
<ref name="foreign-element"/>
|
||||
</oneOrMore>
|
||||
</choice>
|
||||
</element>
|
||||
</define>
|
||||
<define name="name">
|
||||
<element name="name">
|
||||
<optional>
|
||||
<attribute name="path">
|
||||
<ref name="pathValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="foreign-empty"/>
|
||||
</element>
|
||||
</define>
|
||||
<define name="ns">
|
||||
<element name="ns">
|
||||
<attribute name="uri">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
<attribute name="prefix">
|
||||
<ref name="nameValue"/>
|
||||
</attribute>
|
||||
<ref name="foreign-empty"/>
|
||||
</element>
|
||||
</define>
|
||||
<define name="p">
|
||||
<element name="p">
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="class">
|
||||
<ref name="classValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="icon">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="dir"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="param">
|
||||
<element name="param">
|
||||
<attribute name="name">
|
||||
<ref name="nameValue"/>
|
||||
</attribute>
|
||||
<attribute name="value">
|
||||
<ref name="non-empty-string"/>
|
||||
</attribute>
|
||||
</element>
|
||||
</define>
|
||||
<define name="pattern">
|
||||
<element name="pattern">
|
||||
<optional>
|
||||
<attribute name="documents">
|
||||
<ref name="pathValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="rich"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<choice>
|
||||
<group>
|
||||
<attribute name="abstract">
|
||||
<value>true</value>
|
||||
</attribute>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<ref name="title"/>
|
||||
</optional>
|
||||
<group>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="rule"/>
|
||||
</zeroOrMore>
|
||||
</group>
|
||||
</group>
|
||||
<group>
|
||||
<optional>
|
||||
<attribute name="abstract">
|
||||
<value>false</value>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<ref name="title"/>
|
||||
</optional>
|
||||
<group>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="rule"/>
|
||||
</zeroOrMore>
|
||||
</group>
|
||||
</group>
|
||||
<group>
|
||||
<optional>
|
||||
<attribute name="abstract">
|
||||
<value>false</value>
|
||||
</attribute>
|
||||
</optional>
|
||||
<attribute name="is-a">
|
||||
<data type="IDREF"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<ref name="title"/>
|
||||
</optional>
|
||||
<group>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="param"/>
|
||||
</zeroOrMore>
|
||||
</group>
|
||||
</group>
|
||||
</choice>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="phase">
|
||||
<element name="phase">
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
<ref name="rich"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<group>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="active"/>
|
||||
</zeroOrMore>
|
||||
</group>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="properties">
|
||||
<element name="properties">
|
||||
<zeroOrMore>
|
||||
<ref name="property"/>
|
||||
</zeroOrMore>
|
||||
</element>
|
||||
</define>
|
||||
<define name="property">
|
||||
<element name="property">
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<attribute name="role">
|
||||
<ref name="roleValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="scheme"/>
|
||||
</optional>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="name"/>
|
||||
<ref name="value-of"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="dir"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="report">
|
||||
<element name="report">
|
||||
<attribute name="test">
|
||||
<ref name="exprValue"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<attribute name="flag">
|
||||
<ref name="flagValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="diagnostics">
|
||||
<data type="IDREFS"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="properties">
|
||||
<data type="IDREFS"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="rich"/>
|
||||
<ref name="linkable"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="name"/>
|
||||
<ref name="value-of"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="dir"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="rule">
|
||||
<element name="rule">
|
||||
<optional>
|
||||
<attribute name="flag">
|
||||
<ref name="flagValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="rich"/>
|
||||
<ref name="linkable"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<choice>
|
||||
<group>
|
||||
<attribute name="abstract">
|
||||
<value>true</value>
|
||||
</attribute>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<oneOrMore>
|
||||
<choice>
|
||||
<ref name="assert"/>
|
||||
<ref name="report"/>
|
||||
<ref name="extends"/>
|
||||
<ref name="p"/>
|
||||
</choice>
|
||||
</oneOrMore>
|
||||
</group>
|
||||
<group>
|
||||
<attribute name="context">
|
||||
<ref name="pathValue"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="abstract">
|
||||
<value>false</value>
|
||||
</attribute>
|
||||
</optional>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<oneOrMore>
|
||||
<choice>
|
||||
<ref name="assert"/>
|
||||
<ref name="report"/>
|
||||
<ref name="extends"/>
|
||||
<ref name="p"/>
|
||||
</choice>
|
||||
</oneOrMore>
|
||||
</group>
|
||||
</choice>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="span">
|
||||
<element name="span">
|
||||
<attribute name="class">
|
||||
<ref name="classValue"/>
|
||||
</attribute>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<text/>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="title">
|
||||
<element name="title">
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="dir"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</element>
|
||||
</define>
|
||||
<define name="value-of">
|
||||
<element name="value-of">
|
||||
<attribute name="select">
|
||||
<ref name="pathValue"/>
|
||||
</attribute>
|
||||
<ref name="foreign-empty"/>
|
||||
</element>
|
||||
</define>
|
||||
<!-- common declarations -->
|
||||
<define name="inclusion">
|
||||
<element name="include">
|
||||
<attribute name="href">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
<ref name="foreign-empty"/>
|
||||
</element>
|
||||
</define>
|
||||
<define name="rich">
|
||||
<optional>
|
||||
<attribute name="icon">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="see">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="fpi">
|
||||
<ref name="fpiValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="xml:lang">
|
||||
<ref name="langValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="xml:space">
|
||||
<choice>
|
||||
<value>preserve</value>
|
||||
<value>default</value>
|
||||
</choice>
|
||||
</attribute>
|
||||
</optional>
|
||||
</define>
|
||||
<define name="linkable">
|
||||
<optional>
|
||||
<attribute name="role">
|
||||
<ref name="roleValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="subject">
|
||||
<ref name="pathValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
</define>
|
||||
<define name="foreign">
|
||||
<ref name="foreign-attributes"/>
|
||||
<zeroOrMore>
|
||||
<ref name="foreign-element"/>
|
||||
</zeroOrMore>
|
||||
</define>
|
||||
<define name="foreign-empty">
|
||||
<ref name="foreign-attributes"/>
|
||||
</define>
|
||||
<define name="foreign-attributes">
|
||||
<zeroOrMore>
|
||||
<attribute>
|
||||
<anyName>
|
||||
<except>
|
||||
<nsName ns=""/>
|
||||
<nsName ns="http://www.w3.org/XML/1998/namespace"/>
|
||||
</except>
|
||||
</anyName>
|
||||
</attribute>
|
||||
</zeroOrMore>
|
||||
</define>
|
||||
<define name="foreign-element">
|
||||
<element>
|
||||
<anyName>
|
||||
<except>
|
||||
<nsName/>
|
||||
</except>
|
||||
</anyName>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<attribute>
|
||||
<anyName/>
|
||||
</attribute>
|
||||
<ref name="foreign-element"/>
|
||||
<ref name="schema"/>
|
||||
<text/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</element>
|
||||
</define>
|
||||
<!-- Data types -->
|
||||
<define name="uriValue">
|
||||
<data type="anyURI"/>
|
||||
</define>
|
||||
<define name="pathValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="exprValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="fpiValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="langValue">
|
||||
<data type="language"/>
|
||||
</define>
|
||||
<define name="roleValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="flagValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="nameValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<!-- In the default query language binding, xsd:NCNAME -->
|
||||
<define name="classValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="non-empty-string">
|
||||
<data type="token">
|
||||
<param name="minLength">1</param>
|
||||
</data>
|
||||
</define>
|
||||
</grammar>
|
||||
@@ -0,0 +1,75 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Stylesheet for extracting Schematron information from a RELAX-NG schema.
|
||||
Based on the stylesheet for extracting Schematron information from W3C XML Schema.
|
||||
Created by Eddie Robertsson 2002/06/01
|
||||
2009/12/10 hj: changed Schematron namespace to ISO URI (Holger Joukl)
|
||||
-->
|
||||
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:sch="http://purl.oclc.org/dsdl/schematron" xmlns:rng="http://relaxng.org/ns/structure/1.0">
|
||||
<!-- Set the output to be XML with an XML declaration and use indentation -->
|
||||
<xsl:output method="xml" omit-xml-declaration="no" indent="yes" standalone="yes"/>
|
||||
<!-- -->
|
||||
<!-- match schema and call recursive template to extract included schemas -->
|
||||
<!-- -->
|
||||
<xsl:template match="/rng:grammar | /rng:element">
|
||||
<!-- call the schema definition template ... -->
|
||||
<xsl:call-template name="gatherSchema">
|
||||
<!-- ... with current node as the $schemas parameter ... -->
|
||||
<xsl:with-param name="schemas" select="."/>
|
||||
<!-- ... and any includes in the $include parameter -->
|
||||
<xsl:with-param name="includes" select="document(/rng:grammar/rng:include/@href
|
||||
| //rng:externalRef/@href)"/>
|
||||
</xsl:call-template>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
<!-- gather all included schemas into a single parameter variable -->
|
||||
<!-- -->
|
||||
<xsl:template name="gatherSchema">
|
||||
<xsl:param name="schemas"/>
|
||||
<xsl:param name="includes"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($schemas) < count($schemas | $includes)">
|
||||
<!-- when $includes includes something new, recurse ... -->
|
||||
<xsl:call-template name="gatherSchema">
|
||||
<!-- ... with current $includes added to the $schemas parameter ... -->
|
||||
<xsl:with-param name="schemas" select="$schemas | $includes"/>
|
||||
<!-- ... and any *new* includes in the $include parameter -->
|
||||
<xsl:with-param name="includes" select="document($includes/rng:grammar/rng:include/@href
|
||||
| $includes//rng:externalRef/@href)"/>
|
||||
</xsl:call-template>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- we have the complete set of included schemas, so now let's output the embedded schematron -->
|
||||
<xsl:call-template name="output">
|
||||
<xsl:with-param name="schemas" select="$schemas"/>
|
||||
</xsl:call-template>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
<!-- output the schematron information -->
|
||||
<!-- -->
|
||||
<xsl:template name="output">
|
||||
<xsl:param name="schemas"/>
|
||||
<!-- -->
|
||||
<sch:schema>
|
||||
<!-- get header-type elements - eg title and especially ns -->
|
||||
<!-- title (just one) -->
|
||||
<xsl:copy-of select="$schemas//sch:title[1]"/>
|
||||
<!-- get remaining schematron schema children -->
|
||||
<!-- get non-blank namespace elements, dropping duplicates -->
|
||||
<xsl:for-each select="$schemas//sch:ns">
|
||||
<xsl:if test="generate-id(.) = generate-id($schemas//sch:ns[@prefix = current()/@prefix][1])">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:copy-of select="$schemas//sch:phase"/>
|
||||
<xsl:copy-of select="$schemas//sch:pattern"/>
|
||||
<sch:diagnostics>
|
||||
<xsl:copy-of select="$schemas//sch:diagnostics/*"/>
|
||||
</sch:diagnostics>
|
||||
</sch:schema>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
</xsl:transform>
|
||||
@@ -0,0 +1,77 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
based on an original transform by Eddie Robertsson
|
||||
2001/04/21 fn: added support for included schemas
|
||||
2001/06/27 er: changed XMl Schema prefix from xsd: to xs: and changed to the Rec namespace
|
||||
2009/12/10 hj: changed Schematron namespace to ISO URI (Holger Joukl)
|
||||
-->
|
||||
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:sch="http://purl.oclc.org/dsdl/schematron" xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
||||
<!-- Set the output to be XML with an XML declaration and use indentation -->
|
||||
<xsl:output method="xml" omit-xml-declaration="no" indent="yes" standalone="yes"/>
|
||||
<!-- -->
|
||||
<!-- match schema and call recursive template to extract included schemas -->
|
||||
<!-- -->
|
||||
<xsl:template match="xs:schema">
|
||||
<!-- call the schema definition template ... -->
|
||||
<xsl:call-template name="gatherSchema">
|
||||
<!-- ... with current current root as the $schemas parameter ... -->
|
||||
<xsl:with-param name="schemas" select="/"/>
|
||||
<!-- ... and any includes in the $include parameter -->
|
||||
<xsl:with-param name="includes"
|
||||
select="document(/xs:schema/xs:*[self::xs:include or self::xs:import or self::xs:redefine]/@schemaLocation)"/>
|
||||
</xsl:call-template>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
<!-- gather all included schemas into a single parameter variable -->
|
||||
<!-- -->
|
||||
<xsl:template name="gatherSchema">
|
||||
<xsl:param name="schemas"/>
|
||||
<xsl:param name="includes"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($schemas) < count($schemas | $includes)">
|
||||
<!-- when $includes includes something new, recurse ... -->
|
||||
<xsl:call-template name="gatherSchema">
|
||||
<!-- ... with current $includes added to the $schemas parameter ... -->
|
||||
<xsl:with-param name="schemas" select="$schemas | $includes"/>
|
||||
<!-- ... and any *new* includes in the $include parameter -->
|
||||
<xsl:with-param name="includes"
|
||||
select="document($includes/xs:schema/xs:*[self::xs:include or self::xs:import or self::xs:redefine]/@schemaLocation)"/>
|
||||
</xsl:call-template>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- we have the complete set of included schemas,
|
||||
so now let's output the embedded schematron -->
|
||||
<xsl:call-template name="output">
|
||||
<xsl:with-param name="schemas" select="$schemas"/>
|
||||
</xsl:call-template>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
<!-- output the schematron information -->
|
||||
<!-- -->
|
||||
<xsl:template name="output">
|
||||
<xsl:param name="schemas"/>
|
||||
<!-- -->
|
||||
<sch:schema>
|
||||
<!-- get header-type elements - eg title and especially ns -->
|
||||
<!-- title (just one) -->
|
||||
<xsl:copy-of select="$schemas//xs:appinfo/sch:title[1]"/>
|
||||
<!-- get remaining schematron schema children -->
|
||||
<!-- get non-blank namespace elements, dropping duplicates -->
|
||||
<xsl:for-each select="$schemas//xs:appinfo/sch:ns">
|
||||
<xsl:if test="generate-id(.) =
|
||||
generate-id($schemas//xs:appinfo/sch:ns[@prefix = current()/@prefix][1])">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:copy-of select="$schemas//xs:appinfo/sch:phase"/>
|
||||
<xsl:copy-of select="$schemas//xs:appinfo/sch:pattern"/>
|
||||
<sch:diagnostics>
|
||||
<xsl:copy-of select="$schemas//xs:appinfo/sch:diagnostics/*"/>
|
||||
</sch:diagnostics>
|
||||
</sch:schema>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
</xsl:transform>
|
||||
@@ -0,0 +1,313 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?><?xar XSLT?>
|
||||
|
||||
<!--
|
||||
OVERVIEW - iso_abstract_expand.xsl
|
||||
|
||||
This is a preprocessor for ISO Schematron, which implements abstract patterns.
|
||||
It also
|
||||
* extracts a particular schema using an ID, where there are multiple
|
||||
schemas, such as when they are embedded in the same NVDL script
|
||||
* allows parameter substitution inside @context, @test, @select, @path
|
||||
* experimentally, allows parameter recognition and substitution inside
|
||||
text (NOTE: to be removed, for compataibility with other implementations,
|
||||
please do not use this)
|
||||
|
||||
This should be used after iso-dsdl-include.xsl and before the skeleton or
|
||||
meta-stylesheet (e.g. iso-svrl.xsl) . It only requires XSLT 1.
|
||||
|
||||
Each kind of inclusion can be turned off (or on) on the command line.
|
||||
|
||||
-->
|
||||
|
||||
<!--
|
||||
Open Source Initiative OSI - The MIT License:Licensing
|
||||
[OSI Approved License]
|
||||
|
||||
This source code was previously available under the zlib/libpng license.
|
||||
Attribution is polite.
|
||||
|
||||
The MIT License
|
||||
|
||||
Copyright (c) 2004-2010 Rick Jellife and Academia Sinica Computing Centre, Taiwan
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
-->
|
||||
|
||||
<!--
|
||||
VERSION INFORMATION
|
||||
2013-09-19 RJ
|
||||
* Allow macro expansion in @path attributes, eg. for sch:name/@path
|
||||
|
||||
2010-07-10 RJ
|
||||
* Move to MIT license
|
||||
|
||||
2008-09-18 RJ
|
||||
* move out param test from iso:schema template to work with XSLT 1. (Noah Fontes)
|
||||
|
||||
2008-07-29 RJ
|
||||
* Create. Pull out as distinct XSL in its own namespace from old iso_pre_pro.xsl
|
||||
* Put everything in private namespace
|
||||
* Rewrite replace_substring named template so that copyright is clear
|
||||
|
||||
2008-07-24 RJ
|
||||
* correct abstract patterns so for correct names: param/@name and
|
||||
param/@value
|
||||
|
||||
2007-01-12 RJ
|
||||
* Use ISO namespace
|
||||
* Use pattern/@id not pattern/@name
|
||||
* Add Oliver Becker's suggests from old Schematron-love-in list for <copy>
|
||||
* Add XT -ism?
|
||||
2003 RJ
|
||||
* Original written for old namespace
|
||||
* http://www.topologi.com/resources/iso-pre-pro.xsl
|
||||
-->
|
||||
<xslt:stylesheet version="1.0" xmlns:xslt="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:iso="http://purl.oclc.org/dsdl/schematron"
|
||||
xmlns:nvdl="http://purl.oclc.org/dsdl/nvdl"
|
||||
|
||||
xmlns:iae="http://www.schematron.com/namespace/iae"
|
||||
|
||||
>
|
||||
|
||||
<xslt:param name="schema-id"></xslt:param>
|
||||
|
||||
|
||||
<!-- Driver for the mode -->
|
||||
<xsl:template match="/">
|
||||
<xsl:apply-templates select="." mode="iae:go" />
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- ================================================================================== -->
|
||||
<!-- Normal processing rules -->
|
||||
<!-- ================================================================================== -->
|
||||
<!-- Output only the selected schema -->
|
||||
<xslt:template match="iso:schema" >
|
||||
<xsl:if test="string-length($schema-id) =0 or @id= $schema-id ">
|
||||
<xslt:copy>
|
||||
<xslt:copy-of select="@*" />
|
||||
<xslt:apply-templates mode="iae:go" />
|
||||
</xslt:copy>
|
||||
</xsl:if>
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- Strip out any foreign elements above the Schematron schema .
|
||||
-->
|
||||
<xslt:template match="*[not(ancestor-or-self::iso:*)]" mode="iae:go" >
|
||||
<xslt:apply-templates mode="iae:go" />
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- ================================================================================== -->
|
||||
<!-- Handle Schematron abstract pattern preprocessing -->
|
||||
<!-- abstract-to-real calls
|
||||
do-pattern calls
|
||||
macro-expand calls
|
||||
multi-macro-expand
|
||||
replace-substring -->
|
||||
<!-- ================================================================================== -->
|
||||
|
||||
<!--
|
||||
Abstract patterns allow you to say, for example
|
||||
|
||||
<pattern name="htmlTable" is-a="table">
|
||||
<param name="row" value="html:tr"/>
|
||||
<param name="cell" value="html:td" />
|
||||
<param name="table" value="html:table" />
|
||||
</pattern>
|
||||
|
||||
For a good introduction, see Uche Ogbujii's article for IBM DeveloperWorks
|
||||
"Discover the flexibility of Schematron abstract patterns"
|
||||
http://www-128.ibm.com/developerworks/xml/library/x-stron.html
|
||||
However, note that ISO Schematron uses @name and @value attributes on
|
||||
the iso:param element, and @id not @name on the pattern element.
|
||||
|
||||
-->
|
||||
|
||||
<!-- Suppress declarations of abstract patterns -->
|
||||
<xslt:template match="iso:pattern[@abstract='true']" mode="iae:go" >
|
||||
<xslt:comment>Suppressed abstract pattern <xslt:value-of select="@id"/> was here</xslt:comment>
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- Suppress uses of abstract patterns -->
|
||||
<xslt:template match="iso:pattern[@is-a]" mode="iae:go" >
|
||||
|
||||
<xslt:comment>Start pattern based on abstract <xslt:value-of select="@is-a"/></xslt:comment>
|
||||
|
||||
<xslt:call-template name="iae:abstract-to-real" >
|
||||
<xslt:with-param name="caller" select="@id" />
|
||||
<xslt:with-param name="is-a" select="@is-a" />
|
||||
</xslt:call-template>
|
||||
|
||||
</xslt:template>
|
||||
|
||||
|
||||
|
||||
<!-- output everything else unchanged -->
|
||||
<xslt:template match="*" priority="-1" mode="iae:go" >
|
||||
<xslt:copy>
|
||||
<xslt:copy-of select="@*" />
|
||||
<xslt:apply-templates mode="iae:go"/>
|
||||
</xslt:copy>
|
||||
</xslt:template>
|
||||
|
||||
<!-- Templates for macro expansion of abstract patterns -->
|
||||
<!-- Sets up the initial conditions for the recursive call -->
|
||||
<xslt:template name="iae:macro-expand">
|
||||
<xslt:param name="caller"/>
|
||||
<xslt:param name="text" />
|
||||
<xslt:call-template name="iae:multi-macro-expand">
|
||||
<xslt:with-param name="caller" select="$caller"/>
|
||||
<xslt:with-param name="text" select="$text"/>
|
||||
<xslt:with-param name="paramNumber" select="1"/>
|
||||
</xslt:call-template>
|
||||
|
||||
</xslt:template>
|
||||
|
||||
<!-- Template to replace the current parameter and then
|
||||
recurse to replace subsequent parameters. -->
|
||||
|
||||
<xslt:template name="iae:multi-macro-expand">
|
||||
<xslt:param name="caller"/>
|
||||
<xslt:param name="text" />
|
||||
<xslt:param name="paramNumber" />
|
||||
|
||||
|
||||
<xslt:choose>
|
||||
<xslt:when test="//iso:pattern[@id=$caller]/iso:param[ $paramNumber]">
|
||||
|
||||
<xslt:call-template name="iae:multi-macro-expand">
|
||||
<xslt:with-param name="caller" select="$caller"/>
|
||||
<xslt:with-param name="paramNumber" select="$paramNumber + 1"/>
|
||||
<xslt:with-param name="text" >
|
||||
<xslt:call-template name="iae:replace-substring">
|
||||
<xslt:with-param name="original" select="$text"/>
|
||||
<xslt:with-param name="substring"
|
||||
select="concat('$', //iso:pattern[@id=$caller]/iso:param[ $paramNumber ]/@name)"/>
|
||||
<xslt:with-param name="replacement"
|
||||
select="//iso:pattern[@id=$caller]/iso:param[ $paramNumber ]/@value"/>
|
||||
</xslt:call-template>
|
||||
</xslt:with-param>
|
||||
</xslt:call-template>
|
||||
</xslt:when>
|
||||
<xslt:otherwise><xslt:value-of select="$text" /></xslt:otherwise>
|
||||
|
||||
</xslt:choose>
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- generate the real pattern from an abstract pattern + parameters-->
|
||||
<xslt:template name="iae:abstract-to-real" >
|
||||
<xslt:param name="caller"/>
|
||||
<xslt:param name="is-a" />
|
||||
<xslt:for-each select="//iso:pattern[@id= $is-a]">
|
||||
<xslt:copy>
|
||||
|
||||
<xslt:choose>
|
||||
<xslt:when test=" string-length( $caller ) = 0">
|
||||
<xslt:attribute name="id"><xslt:value-of select="concat( generate-id(.) , $is-a)" /></xslt:attribute>
|
||||
</xslt:when>
|
||||
<xslt:otherwise>
|
||||
<xslt:attribute name="id"><xslt:value-of select="$caller" /></xslt:attribute>
|
||||
</xslt:otherwise>
|
||||
</xslt:choose>
|
||||
|
||||
<xslt:apply-templates select="*|text()" mode="iae:do-pattern" >
|
||||
<xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param>
|
||||
</xslt:apply-templates>
|
||||
|
||||
</xslt:copy>
|
||||
</xslt:for-each>
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- Generate a non-abstract pattern -->
|
||||
<xslt:template mode="iae:do-pattern" match="*">
|
||||
<xslt:param name="caller"/>
|
||||
<xslt:copy>
|
||||
<xslt:for-each select="@*[name()='test' or name()='context' or name()='select' or name()='path' ]">
|
||||
<xslt:attribute name="{name()}">
|
||||
<xslt:call-template name="iae:macro-expand">
|
||||
<xslt:with-param name="text"><xslt:value-of select="."/></xslt:with-param>
|
||||
<xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param>
|
||||
</xslt:call-template>
|
||||
</xslt:attribute>
|
||||
</xslt:for-each>
|
||||
<xslt:copy-of select="@*[name()!='test'][name()!='context'][name()!='select'][name()!='path']" />
|
||||
<xsl:for-each select="node()">
|
||||
<xsl:choose>
|
||||
<!-- Experiment: replace macros in text as well, to allow parameterized assertions
|
||||
and so on, without having to have spurious <iso:value-of> calls and multiple
|
||||
delimiting.
|
||||
NOTE: THIS FUNCTIONALITY WILL BE REMOVED IN THE FUTURE -->
|
||||
<xsl:when test="self::text()">
|
||||
<xslt:call-template name="iae:macro-expand">
|
||||
<xslt:with-param name="text"><xslt:value-of select="."/></xslt:with-param>
|
||||
<xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param>
|
||||
</xslt:call-template>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xslt:apply-templates select="." mode="iae:do-pattern">
|
||||
<xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param>
|
||||
</xslt:apply-templates>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:for-each>
|
||||
</xslt:copy>
|
||||
</xslt:template>
|
||||
|
||||
<!-- UTILITIES -->
|
||||
<!-- Simple version of replace-substring function -->
|
||||
<xslt:template name="iae:replace-substring">
|
||||
<xslt:param name="original" />
|
||||
<xslt:param name="substring" />
|
||||
<xslt:param name="replacement" select="''"/>
|
||||
|
||||
<xsl:choose>
|
||||
<xsl:when test="not($original)" />
|
||||
<xsl:when test="not(string($substring))">
|
||||
<xsl:value-of select="$original" />
|
||||
</xsl:when>
|
||||
<xsl:when test="contains($original, $substring)">
|
||||
<xsl:variable name="before" select="substring-before($original, $substring)" />
|
||||
<xsl:variable name="after" select="substring-after($original, $substring)" />
|
||||
|
||||
<xsl:value-of select="$before" />
|
||||
<xsl:value-of select="$replacement" />
|
||||
<!-- recursion -->
|
||||
<xsl:call-template name="iae:replace-substring">
|
||||
<xsl:with-param name="original" select="$after" />
|
||||
<xsl:with-param name="substring" select="$substring" />
|
||||
<xsl:with-param name="replacement" select="$replacement" />
|
||||
</xsl:call-template>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- no substitution -->
|
||||
<xsl:value-of select="$original" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xslt:template>
|
||||
|
||||
|
||||
|
||||
</xslt:stylesheet>
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,55 @@
|
||||
<?xml version="1.0" ?><?xar XSLT?>
|
||||
<!-- Implmentation for the Schematron XML Schema Language.
|
||||
http://www.ascc.net/xml/resource/schematron/schematron.html
|
||||
|
||||
Copyright (c) 2000,2001 Rick Jelliffe and Academia Sinica Computing Center, Taiwan
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from
|
||||
the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim
|
||||
that you wrote the original software. If you use this software in a product,
|
||||
an acknowledgment in the product documentation would be appreciated but is
|
||||
not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
-->
|
||||
|
||||
<!-- Schematron message -->
|
||||
|
||||
<xsl:stylesheet
|
||||
version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:axsl="http://www.w3.org/1999/XSL/TransformAlias">
|
||||
|
||||
<xsl:import href="iso_schematron_skeleton_for_xslt1.xsl"/>
|
||||
|
||||
<xsl:template name="process-prolog">
|
||||
<axsl:output method="text" />
|
||||
</xsl:template>
|
||||
|
||||
<!-- use default rule for process-root: copy contens / ignore title -->
|
||||
<!-- use default rule for process-pattern: ignore name and see -->
|
||||
<!-- use default rule for process-name: output name -->
|
||||
<!-- use default rule for process-assert and process-report:
|
||||
call process-message -->
|
||||
|
||||
<xsl:template name="process-message">
|
||||
<xsl:param name="pattern" />
|
||||
<xsl:param name="role" />
|
||||
<axsl:message>
|
||||
<xsl:apply-templates mode="text"
|
||||
/> (<xsl:value-of select="$pattern" />
|
||||
<xsl:if test="$role"> / <xsl:value-of select="$role" />
|
||||
</xsl:if>)</axsl:message>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,588 @@
|
||||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
ISO_SVRL.xsl
|
||||
|
||||
Implementation of Schematron Validation Report Language from ISO Schematron
|
||||
ISO/IEC 19757 Document Schema Definition Languages (DSDL)
|
||||
Part 3: Rule-based validation Schematron
|
||||
Annex D: Schematron Validation Report Language
|
||||
|
||||
This ISO Standard is available free as a Publicly Available Specification in PDF from ISO.
|
||||
Also see www.schematron.com for drafts and other information.
|
||||
|
||||
This implementation of SVRL is designed to run with the "Skeleton" implementation
|
||||
of Schematron which Oliver Becker devised. The skeleton code provides a
|
||||
Schematron implementation but with named templates for handling all output;
|
||||
the skeleton provides basic templates for output using this API, but client
|
||||
validators can be written to import the skeleton and override the default output
|
||||
templates as required. (In order to understand this, you must understand that
|
||||
a named template such as "process-assert" in this XSLT stylesheet overrides and
|
||||
replaces any template with the same name in the imported skeleton XSLT file.)
|
||||
|
||||
The other important thing to understand in this code is that there are different
|
||||
versions of the Schematron skeleton. These track the development of Schematron through
|
||||
Schematron 1.5, Schematron 1.6 and now ISO Schematron. One only skeleton must be
|
||||
imported. The code has templates for the different skeletons commented out for
|
||||
convenience. ISO Schematron has a different namespace than Schematron 1.5 and 1.6;
|
||||
so the ISO Schematron skeleton has been written itself with an optional import
|
||||
statement to in turn import the Schematron 1.6 skeleton. This will allow you to
|
||||
validate with schemas from either namespace.
|
||||
|
||||
|
||||
History:
|
||||
2009-03-18
|
||||
* Fix atrribute with space "see " which generates wrong name in some processors
|
||||
2008-08-11
|
||||
* RJ Fix attribute/@select which saxon allows in XSLT 1
|
||||
2008-08-07
|
||||
* RJ Add output-encoding attribute to specify final encoding to use
|
||||
* Alter allow-foreign functionality so that Schematron span, emph and dir elements make
|
||||
it to the output, for better formatting and because span can be used to mark up
|
||||
semantically interesting information embedded in diagnostics, which reduces the
|
||||
need to extend SVRL itself
|
||||
* Diagnostic-reference had an invalid attribute @id that duplicated @diagnostic: removed
|
||||
2008-08-06
|
||||
* RJ Fix invalid output: svrl:diagnostic-reference is not contained in an svrl:text
|
||||
* Output comment to SVRL file giving filename if available (from command-line parameter)
|
||||
2008-08-04
|
||||
* RJ move sch: prefix to schold: prefix to prevent confusion (we want people to
|
||||
be able to switch from old namespace to new namespace without changing the
|
||||
sch: prefix, so it is better to keep that prefix completely out of the XSLT)
|
||||
* Extra signature fixes (PH)
|
||||
2008-08-03
|
||||
* Repair missing class parameter on process-p
|
||||
2008-07-31
|
||||
* Update skeleton names
|
||||
2007-04-03
|
||||
* Add option generate-fired-rule (RG)
|
||||
2007-02-07
|
||||
* Prefer true|false for parameters. But allow yes|no on some old for compatability
|
||||
* DP Diagnostics output to svrl:text. Diagnosis put out after assertion text.
|
||||
* Removed non-SVRL elements and attributes: better handled as an extra layer that invokes this one
|
||||
* Add more formal parameters
|
||||
* Correct confusion between $schemaVersion and $queryBinding
|
||||
* Indent
|
||||
* Validate against RNC schemas for XSLT 1 and 2 (with regex tests removed)
|
||||
* Validate output with UniversalTest.sch against RNC schema for ISO SVRL
|
||||
|
||||
2007-02-01
|
||||
* DP. Update formal parameters of overriding named templates to handle more attributes.
|
||||
* DP. Refactor handling of rich and linkable parameters to a named template.
|
||||
|
||||
2007-01-22
|
||||
* DP change svrl:ns to svrl:ns-in-attribute-value
|
||||
* Change default when no queryBinding from "unknown" to "xslt"
|
||||
|
||||
2007-01-18:
|
||||
* Improve documentation
|
||||
* KH Add command-line options to generate paths or not
|
||||
* Use axsl:attribute rather than xsl:attribute to shut XSLT2 up
|
||||
* Add extra command-line options to pass to the iso_schematron_skeleton
|
||||
|
||||
2006-12-01: iso_svrl.xsl Rick Jelliffe,
|
||||
* update namespace,
|
||||
* update phase handling,
|
||||
* add flag param to process-assert and process-report & @ flag on output
|
||||
|
||||
2001: Conformance1-5.xsl Rick Jelliffe,
|
||||
* Created, using the skeleton code contributed by Oliver Becker
|
||||
-->
|
||||
<!--
|
||||
Derived from Conformance1-5.xsl.
|
||||
|
||||
Copyright (c) 2001, 2006 Rick Jelliffe and Academia Sinica Computing Center, Taiwan
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from
|
||||
the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim
|
||||
that you wrote the original software. If you use this software in a product,
|
||||
an acknowledgment in the product documentation would be appreciated but is
|
||||
not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
-->
|
||||
|
||||
<!-- Ideas nabbed from schematrons by Francis N., Miloslav N. and David C. -->
|
||||
|
||||
<!-- The command-line parameters are:
|
||||
phase NMTOKEN | "#ALL" (default) Select the phase for validation
|
||||
allow-foreign "true" | "false" (default) Pass non-Schematron elements and rich markup to the generated stylesheet
|
||||
diagnose= true | false|yes|no Add the diagnostics to the assertion test in reports (yes|no are obsolete)
|
||||
generate-paths=true|false|yes|no generate the @location attribute with XPaths (yes|no are obsolete)
|
||||
sch.exslt.imports semi-colon delimited string of filenames for some EXSLT implementations
|
||||
optimize "visit-no-attributes" Use only when the schema has no attributes as the context nodes
|
||||
generate-fired-rule "true"(default) | "false" Generate fired-rule elements
|
||||
|
||||
-->
|
||||
|
||||
<xsl:stylesheet
|
||||
version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:axsl="http://www.w3.org/1999/XSL/TransformAlias"
|
||||
xmlns:schold="http://www.ascc.net/xml/schematron"
|
||||
xmlns:iso="http://purl.oclc.org/dsdl/schematron"
|
||||
xmlns:svrl="http://purl.oclc.org/dsdl/svrl"
|
||||
>
|
||||
|
||||
<!-- Select the import statement and adjust the path as
|
||||
necessary for your system.
|
||||
If not XSLT2 then also remove svrl:active-pattern/@document="{document-uri()}" from process-pattern()
|
||||
-->
|
||||
<!--
|
||||
<xsl:import href="iso_schematron_skeleton_for_saxon.xsl"/>
|
||||
-->
|
||||
|
||||
<xsl:import href="iso_schematron_skeleton_for_xslt1.xsl"/>
|
||||
<!--
|
||||
<xsl:import href="iso_schematron_skeleton.xsl"/>
|
||||
<xsl:import href="skeleton1-5.xsl"/>
|
||||
<xsl:import href="skeleton1-6.xsl"/>
|
||||
-->
|
||||
|
||||
<xsl:param name="diagnose" >true</xsl:param>
|
||||
<xsl:param name="phase" >
|
||||
<xsl:choose>
|
||||
<!-- Handle Schematron 1.5 and 1.6 phases -->
|
||||
<xsl:when test="//schold:schema/@defaultPhase">
|
||||
<xsl:value-of select="//schold:schema/@defaultPhase"/>
|
||||
</xsl:when>
|
||||
<!-- Handle ISO Schematron phases -->
|
||||
<xsl:when test="//iso:schema/@defaultPhase">
|
||||
<xsl:value-of select="//iso:schema/@defaultPhase"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>#ALL</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:param>
|
||||
<xsl:param name="allow-foreign" >false</xsl:param>
|
||||
<xsl:param name="generate-paths" >true</xsl:param>
|
||||
<xsl:param name="generate-fired-rule" >true</xsl:param>
|
||||
<xsl:param name="optimize"/>
|
||||
|
||||
<xsl:param name="output-encoding" ></xsl:param>
|
||||
|
||||
<!-- e.g. saxon file.xml file.xsl "sch.exslt.imports=.../string.xsl;.../math.xsl" -->
|
||||
<xsl:param name="sch.exslt.imports" />
|
||||
|
||||
|
||||
|
||||
<!-- Experimental: If this file called, then must be generating svrl -->
|
||||
<xsl:variable name="svrlTest" select="true()" />
|
||||
|
||||
|
||||
|
||||
<!-- ================================================================ -->
|
||||
|
||||
<xsl:template name="process-prolog">
|
||||
<axsl:output method="xml" omit-xml-declaration="no" standalone="yes"
|
||||
indent="yes">
|
||||
<xsl:if test=" string-length($output-encoding) > 0">
|
||||
<xsl:attribute name="encoding"><xsl:value-of select=" $output-encoding" /></xsl:attribute>
|
||||
</xsl:if>
|
||||
</axsl:output>
|
||||
|
||||
</xsl:template>
|
||||
|
||||
<!-- Overrides skeleton.xsl -->
|
||||
<xsl:template name="process-root">
|
||||
<xsl:param name="title"/>
|
||||
<xsl:param name="contents" />
|
||||
<xsl:param name="queryBinding" >xslt1</xsl:param>
|
||||
<xsl:param name="schemaVersion" />
|
||||
<xsl:param name="id" />
|
||||
<xsl:param name="version"/>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
|
||||
<svrl:schematron-output title="{$title}" schemaVersion="{$schemaVersion}" >
|
||||
<xsl:if test=" string-length( normalize-space( $phase )) > 0 and
|
||||
not( normalize-space( $phase ) = '#ALL') ">
|
||||
<axsl:attribute name="phase">
|
||||
<xsl:value-of select=" $phase " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" $allow-foreign = 'true'">
|
||||
</xsl:if>
|
||||
<xsl:if test=" $allow-foreign = 'true'">
|
||||
|
||||
<xsl:call-template name='richParms'>
|
||||
<xsl:with-param name="fpi" select="$fpi" />
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
</xsl:if>
|
||||
|
||||
<axsl:comment><axsl:value-of select="$archiveDirParameter"/>  
|
||||
<axsl:value-of select="$archiveNameParameter"/>  
|
||||
<axsl:value-of select="$fileNameParameter"/>  
|
||||
<axsl:value-of select="$fileDirParameter"/></axsl:comment>
|
||||
|
||||
|
||||
<xsl:apply-templates mode="do-schema-p" />
|
||||
<xsl:copy-of select="$contents" />
|
||||
</svrl:schematron-output>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="process-assert">
|
||||
<xsl:param name="test"/>
|
||||
<xsl:param name="diagnostics" />
|
||||
<xsl:param name="id" />
|
||||
<xsl:param name="flag" />
|
||||
<!-- "Linkable" parameters -->
|
||||
<xsl:param name="role"/>
|
||||
<xsl:param name="subject"/>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<svrl:failed-assert test="{$test}" >
|
||||
<xsl:if test="string-length( $id ) > 0">
|
||||
<axsl:attribute name="id">
|
||||
<xsl:value-of select=" $id " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" string-length( $flag ) > 0">
|
||||
<axsl:attribute name="flag">
|
||||
<xsl:value-of select=" $flag " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<!-- Process rich attributes. -->
|
||||
<xsl:call-template name="richParms">
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name='linkableParms'>
|
||||
<xsl:with-param name="role" select="$role" />
|
||||
<xsl:with-param name="subject" select="$subject"/>
|
||||
</xsl:call-template>
|
||||
<xsl:if test=" $generate-paths = 'true' or $generate-paths= 'yes' ">
|
||||
<!-- true/false is the new way -->
|
||||
<axsl:attribute name="location">
|
||||
<axsl:apply-templates select="." mode="schematron-get-full-path"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
|
||||
<svrl:text>
|
||||
<xsl:apply-templates mode="text" />
|
||||
|
||||
</svrl:text>
|
||||
<xsl:if test="$diagnose = 'yes' or $diagnose= 'true' ">
|
||||
<!-- true/false is the new way -->
|
||||
<xsl:call-template name="diagnosticsSplit">
|
||||
<xsl:with-param name="str" select="$diagnostics"/>
|
||||
</xsl:call-template>
|
||||
</xsl:if>
|
||||
</svrl:failed-assert>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-report">
|
||||
<xsl:param name="id"/>
|
||||
<xsl:param name="test"/>
|
||||
<xsl:param name="diagnostics"/>
|
||||
<xsl:param name="flag" />
|
||||
<!-- "Linkable" parameters -->
|
||||
<xsl:param name="role"/>
|
||||
<xsl:param name="subject"/>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<svrl:successful-report test="{$test}" >
|
||||
<xsl:if test=" string-length( $id ) > 0">
|
||||
<axsl:attribute name="id">
|
||||
<xsl:value-of select=" $id " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" string-length( $flag ) > 0">
|
||||
<axsl:attribute name="flag">
|
||||
<xsl:value-of select=" $flag " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
|
||||
<!-- Process rich attributes. -->
|
||||
<xsl:call-template name="richParms">
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name='linkableParms'>
|
||||
<xsl:with-param name="role" select="$role" />
|
||||
<xsl:with-param name="subject" select="$subject"/>
|
||||
</xsl:call-template>
|
||||
<xsl:if test=" $generate-paths = 'yes' or $generate-paths = 'true' ">
|
||||
<!-- true/false is the new way -->
|
||||
<axsl:attribute name="location">
|
||||
<axsl:apply-templates select="." mode="schematron-get-full-path"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
|
||||
<svrl:text>
|
||||
<xsl:apply-templates mode="text" />
|
||||
|
||||
</svrl:text>
|
||||
<xsl:if test="$diagnose = 'yes' or $diagnose='true' ">
|
||||
<!-- true/false is the new way -->
|
||||
<xsl:call-template name="diagnosticsSplit">
|
||||
<xsl:with-param name="str" select="$diagnostics"/>
|
||||
</xsl:call-template>
|
||||
</xsl:if>
|
||||
</svrl:successful-report>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Overrides skeleton -->
|
||||
<xsl:template name="process-dir" >
|
||||
<xsl:param name="value" />
|
||||
<xsl:choose>
|
||||
<xsl:when test=" $allow-foreign = 'true'">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:when>
|
||||
|
||||
<xsl:otherwise>
|
||||
<!-- We generate too much whitespace rather than risking concatenation -->
|
||||
<axsl:text> </axsl:text>
|
||||
<xsl:apply-templates mode="inline-text"/>
|
||||
<axsl:text> </axsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-diagnostic">
|
||||
<xsl:param name="id"/>
|
||||
<!-- Rich parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<svrl:diagnostic-reference diagnostic="{$id}" >
|
||||
|
||||
<xsl:call-template name="richParms">
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
<xsl:text>
|
||||
</xsl:text>
|
||||
|
||||
<xsl:apply-templates mode="text"/>
|
||||
|
||||
</svrl:diagnostic-reference>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Overrides skeleton -->
|
||||
<xsl:template name="process-emph" >
|
||||
<xsl:param name="class" />
|
||||
<xsl:choose>
|
||||
<xsl:when test=" $allow-foreign = 'true'">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- We generate too much whitespace rather than risking concatenation -->
|
||||
<axsl:text> </axsl:text>
|
||||
<xsl:apply-templates mode="inline-text"/>
|
||||
<axsl:text> </axsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-rule">
|
||||
<xsl:param name="id"/>
|
||||
<xsl:param name="context"/>
|
||||
<xsl:param name="flag"/>
|
||||
<!-- "Linkable" parameters -->
|
||||
<xsl:param name="role"/>
|
||||
<xsl:param name="subject"/>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<xsl:if test=" $generate-fired-rule = 'true'">
|
||||
<svrl:fired-rule context="{$context}" >
|
||||
<!-- Process rich attributes. -->
|
||||
<xsl:call-template name="richParms">
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
<xsl:if test=" string( $id )">
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select=" $id " />
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" string-length( $role ) > 0">
|
||||
<xsl:attribute name="role">
|
||||
<xsl:value-of select=" $role " />
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
</svrl:fired-rule>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-ns">
|
||||
<xsl:param name="prefix"/>
|
||||
<xsl:param name="uri"/>
|
||||
<svrl:ns-prefix-in-attribute-values uri="{$uri}" prefix="{$prefix}" />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-p">
|
||||
<xsl:param name="icon"/>
|
||||
<xsl:param name="class"/>
|
||||
<xsl:param name="id"/>
|
||||
<xsl:param name="lang"/>
|
||||
|
||||
<svrl:text>
|
||||
<xsl:apply-templates mode="text"/>
|
||||
</svrl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-pattern">
|
||||
<xsl:param name="name"/>
|
||||
<xsl:param name="id"/>
|
||||
<xsl:param name="is-a"/>
|
||||
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<svrl:active-pattern >
|
||||
<xsl:if test=" string( $id )">
|
||||
<axsl:attribute name="id">
|
||||
<xsl:value-of select=" $id " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" string( $name )">
|
||||
<axsl:attribute name="name">
|
||||
<xsl:value-of select=" $name " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
|
||||
<xsl:call-template name='richParms'>
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
|
||||
<!-- ?? report that this screws up iso:title processing -->
|
||||
<xsl:apply-templates mode="do-pattern-p"/>
|
||||
<!-- ?? Seems that this apply-templates is never triggered DP -->
|
||||
<axsl:apply-templates />
|
||||
</svrl:active-pattern>
|
||||
</xsl:template>
|
||||
|
||||
<!-- Overrides skeleton -->
|
||||
<xsl:template name="process-message" >
|
||||
<xsl:param name="pattern"/>
|
||||
<xsl:param name="role"/>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Overrides skeleton -->
|
||||
<xsl:template name="process-span" >
|
||||
<xsl:param name="class" />
|
||||
<xsl:choose>
|
||||
<xsl:when test=" $allow-foreign = 'true'">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- We generate too much whitespace rather than risking concatenation -->
|
||||
<axsl:text> </axsl:text>
|
||||
<xsl:apply-templates mode="inline-text"/>
|
||||
<axsl:text> </axsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
|
||||
<!-- =========================================================================== -->
|
||||
<!-- processing rich parameters. -->
|
||||
<xsl:template name='richParms'>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<!-- Process rich attributes. -->
|
||||
<xsl:if test=" $allow-foreign = 'true'">
|
||||
<xsl:if test="string($fpi)">
|
||||
<axsl:attribute name="fpi">
|
||||
<xsl:value-of select="$fpi"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test="string($icon)">
|
||||
<axsl:attribute name="icon">
|
||||
<xsl:value-of select="$icon"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test="string($see)">
|
||||
<axsl:attribute name="see">
|
||||
<xsl:value-of select="$see"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
</xsl:if>
|
||||
<xsl:if test="string($space)">
|
||||
<axsl:attribute name="xml:space">
|
||||
<xsl:value-of select="$space"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test="string($lang)">
|
||||
<axsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="$lang"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
<!-- processing linkable parameters. -->
|
||||
<xsl:template name='linkableParms'>
|
||||
<xsl:param name="role"/>
|
||||
<xsl:param name="subject"/>
|
||||
|
||||
<!-- ISO SVRL has a role attribute to match the Schematron role attribute -->
|
||||
<xsl:if test=" string($role )">
|
||||
<axsl:attribute name="role">
|
||||
<xsl:value-of select=" $role " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<!-- ISO SVRL does not have a subject attribute to match the Schematron subject attribute.
|
||||
Instead, the Schematron subject attribute is folded into the location attribute -->
|
||||
</xsl:template>
|
||||
|
||||
|
||||
</xsl:stylesheet>
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
ISO SCHEMATRON 2010
|
||||
|
||||
XSLT implementation by Rick Jelliffe with assistance from members of Schematron-love-in maillist.
|
||||
|
||||
2010-04-21
|
||||
|
||||
Two distributions are available. One is for XSLT1 engines.
|
||||
The other is for XSLT2 engines, such as SAXON 9.
|
||||
|
||||
|
||||
This version of Schematron splits the process into a pipeline of several different XSLT stages.
|
||||
|
||||
1) First, preprocess your Schematron schema with iso_dsdl_include.xsl.
|
||||
This is a macro processor to assemble the schema from various parts.
|
||||
If your schema is not in separate parts, you can skip this stage.
|
||||
This stage also generates error messages for some common XPath syntax problems.
|
||||
|
||||
2) Second, preprocess the output from stage 1 with iso_abstract_expand.xsl.
|
||||
This is a macro processor to convert abstract patterns to real patterns.
|
||||
If your schema does not use abstract patterns, you can skip this
|
||||
stage.
|
||||
|
||||
3) Third, compile the Schematron schema into an XSLT script.
|
||||
This will typically use iso_svrl_for_xslt1.xsl or iso_svrl_for_xslt2.xsl
|
||||
(which in turn invoke iso_schematron_skeleton_for_xslt1.xsl or iso_schematron_skeleton_for_saxon.xsl)
|
||||
However, other "meta-stylesheets" are also in common use; the principle of operation is the same.
|
||||
If your schema uses Schematron phases, supply these as command line/invocation parameters
|
||||
to this process.
|
||||
|
||||
4) Fourth, run the script generated by stage 3 against the document being validated.
|
||||
If you are using the SVRL script, then the output of validation will be an XML document.
|
||||
If your schema uses Schematron parameters, supply these as command line/invocation parameters
|
||||
to this process.
|
||||
|
||||
|
||||
The XSLT2 distribution also features several next generation features,
|
||||
such as validating multiple documents. See the source code for details.
|
||||
|
||||
Schematron assertions can be written in any language, of course; the file
|
||||
sch-messages-en.xhtml contains the diagnostics messages from the XSLT2 skeleton
|
||||
in English, and this can be used as template to localize the skeleton's
|
||||
error messages. Note that typically programming errors in Schematron are XPath
|
||||
errors, which requires localized messages from the XSLT engine.
|
||||
|
||||
ANT
|
||||
---
|
||||
To give an example of how to process a document, here is a sample ANT task.
|
||||
|
||||
<target name="schematron-compile-test" >
|
||||
|
||||
<!-- expand inclusions -->
|
||||
<xslt basedir="test/schematron"
|
||||
style="iso_dsdl_include.xsl" in="test.sch" out="test1.sch">
|
||||
<classpath>
|
||||
<pathelement location="${lib.dir}/saxon9.jar"/>
|
||||
</classpath>
|
||||
</xslt>
|
||||
|
||||
<!-- expand abstract patterns -->
|
||||
<xslt basedir="test/schematron"
|
||||
style="iso_abstract_expand.xsl" in="test1.sch" out="test2.sch">
|
||||
<classpath>
|
||||
<pathelement location="${lib.dir}/saxon9.jar"/>
|
||||
</classpath>
|
||||
</xslt>
|
||||
|
||||
|
||||
|
||||
<!-- compile it -->
|
||||
<xslt basedir="test/schematron"
|
||||
style="iso_svrl_for_xslt2.xsl" in="test2.sch" out="test.xsl">
|
||||
<classpath>
|
||||
<pathelement location="${lib.dir}/saxon9.jar"/>
|
||||
</classpath>
|
||||
</xslt>
|
||||
|
||||
<!-- validate -->
|
||||
<xslt basedir="test/schematron"
|
||||
style="test.xsl" in="instance.xml" out="instance.svrlt">
|
||||
<classpath>
|
||||
<pathelement location="${lib.dir}/saxon9.jar"/>
|
||||
</classpath>
|
||||
</xslt>
|
||||
</target>
|
||||
440
.venv/lib/python3.7/site-packages/lxml/iterparse.pxi
Normal file
440
.venv/lib/python3.7/site-packages/lxml/iterparse.pxi
Normal file
@@ -0,0 +1,440 @@
|
||||
# iterparse -- event-driven parsing
|
||||
|
||||
DEF __ITERPARSE_CHUNK_SIZE = 32768
|
||||
|
||||
cdef class iterparse:
|
||||
u"""iterparse(self, source, events=("end",), tag=None, \
|
||||
attribute_defaults=False, dtd_validation=False, \
|
||||
load_dtd=False, no_network=True, remove_blank_text=False, \
|
||||
remove_comments=False, remove_pis=False, encoding=None, \
|
||||
html=False, recover=None, huge_tree=False, schema=None)
|
||||
|
||||
Incremental parser.
|
||||
|
||||
Parses XML into a tree and generates tuples (event, element) in a
|
||||
SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns',
|
||||
'end-ns'.
|
||||
|
||||
For 'start' and 'end', ``element`` is the Element that the parser just
|
||||
found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of
|
||||
a new namespace declaration. For 'end-ns', it is simply None. Note that
|
||||
all start and end events are guaranteed to be properly nested.
|
||||
|
||||
The keyword argument ``events`` specifies a sequence of event type names
|
||||
that should be generated. By default, only 'end' events will be
|
||||
generated.
|
||||
|
||||
The additional ``tag`` argument restricts the 'start' and 'end' events to
|
||||
those elements that match the given tag. The ``tag`` argument can also be
|
||||
a sequence of tags to allow matching more than one tag. By default,
|
||||
events are generated for all elements. Note that the 'start-ns' and
|
||||
'end-ns' events are not impacted by this restriction.
|
||||
|
||||
The other keyword arguments in the constructor are mainly based on the
|
||||
libxml2 parser configuration. A DTD will also be loaded if validation or
|
||||
attribute default values are requested.
|
||||
|
||||
Available boolean keyword arguments:
|
||||
- attribute_defaults: read default attributes from DTD
|
||||
- dtd_validation: validate (if DTD is available)
|
||||
- load_dtd: use DTD for parsing
|
||||
- no_network: prevent network access for related files
|
||||
- remove_blank_text: discard blank text nodes
|
||||
- remove_comments: discard comments
|
||||
- remove_pis: discard processing instructions
|
||||
- strip_cdata: replace CDATA sections by normal text content (default: True)
|
||||
- compact: safe memory for short text content (default: True)
|
||||
- resolve_entities: replace entities by their text value (default: True)
|
||||
- huge_tree: disable security restrictions and support very deep trees
|
||||
and very long text content (only affects libxml2 2.7+)
|
||||
- html: parse input as HTML (default: XML)
|
||||
- recover: try hard to parse through broken input (default: True for HTML,
|
||||
False otherwise)
|
||||
|
||||
Other keyword arguments:
|
||||
- encoding: override the document encoding
|
||||
- schema: an XMLSchema to validate against
|
||||
"""
|
||||
cdef _FeedParser _parser
|
||||
cdef object _tag
|
||||
cdef object _events
|
||||
cdef readonly object root
|
||||
cdef object _source
|
||||
cdef object _filename
|
||||
cdef object _error
|
||||
cdef bint _close_source_after_read
|
||||
|
||||
def __init__(self, source, events=(u"end",), *, tag=None,
|
||||
attribute_defaults=False, dtd_validation=False,
|
||||
load_dtd=False, no_network=True, remove_blank_text=False,
|
||||
compact=True, resolve_entities=True, remove_comments=False,
|
||||
remove_pis=False, strip_cdata=True, encoding=None,
|
||||
html=False, recover=None, huge_tree=False, collect_ids=True,
|
||||
XMLSchema schema=None):
|
||||
if not hasattr(source, 'read'):
|
||||
source = _getFSPathOrObject(source)
|
||||
self._filename = source
|
||||
if python.IS_PYTHON2:
|
||||
source = _encodeFilename(source)
|
||||
source = open(source, 'rb')
|
||||
self._close_source_after_read = True
|
||||
else:
|
||||
self._filename = _getFilenameForFile(source)
|
||||
self._close_source_after_read = False
|
||||
|
||||
if recover is None:
|
||||
recover = html
|
||||
|
||||
if html:
|
||||
# make sure we're not looking for namespaces
|
||||
events = [event for event in events
|
||||
if event not in ('start-ns', 'end-ns')]
|
||||
parser = HTMLPullParser(
|
||||
events,
|
||||
tag=tag,
|
||||
recover=recover,
|
||||
base_url=self._filename,
|
||||
encoding=encoding,
|
||||
remove_blank_text=remove_blank_text,
|
||||
remove_comments=remove_comments,
|
||||
remove_pis=remove_pis,
|
||||
strip_cdata=strip_cdata,
|
||||
no_network=no_network,
|
||||
target=None, # TODO
|
||||
schema=schema,
|
||||
compact=compact)
|
||||
else:
|
||||
parser = XMLPullParser(
|
||||
events,
|
||||
tag=tag,
|
||||
recover=recover,
|
||||
base_url=self._filename,
|
||||
encoding=encoding,
|
||||
attribute_defaults=attribute_defaults,
|
||||
dtd_validation=dtd_validation,
|
||||
load_dtd=load_dtd,
|
||||
no_network=no_network,
|
||||
schema=schema,
|
||||
huge_tree=huge_tree,
|
||||
remove_blank_text=remove_blank_text,
|
||||
resolve_entities=resolve_entities,
|
||||
remove_comments=remove_comments,
|
||||
remove_pis=remove_pis,
|
||||
strip_cdata=strip_cdata,
|
||||
collect_ids=True,
|
||||
target=None, # TODO
|
||||
compact=compact)
|
||||
|
||||
self._events = parser.read_events()
|
||||
self._parser = parser
|
||||
self._source = source
|
||||
|
||||
@property
|
||||
def error_log(self):
|
||||
"""The error log of the last (or current) parser run.
|
||||
"""
|
||||
return self._parser.feed_error_log
|
||||
|
||||
@property
|
||||
def resolvers(self):
|
||||
"""The custom resolver registry of the last (or current) parser run.
|
||||
"""
|
||||
return self._parser.resolvers
|
||||
|
||||
@property
|
||||
def version(self):
|
||||
"""The version of the underlying XML parser."""
|
||||
return self._parser.version
|
||||
|
||||
def set_element_class_lookup(self, ElementClassLookup lookup = None):
|
||||
u"""set_element_class_lookup(self, lookup = None)
|
||||
|
||||
Set a lookup scheme for element classes generated from this parser.
|
||||
|
||||
Reset it by passing None or nothing.
|
||||
"""
|
||||
self._parser.set_element_class_lookup(lookup)
|
||||
|
||||
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
|
||||
u"""makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
|
||||
|
||||
Creates a new element associated with this parser.
|
||||
"""
|
||||
self._parser.makeelement(
|
||||
_tag, attrib=None, nsmap=None, **_extra)
|
||||
|
||||
@cython.final
|
||||
cdef _close_source(self):
|
||||
if self._source is None:
|
||||
return
|
||||
if not self._close_source_after_read:
|
||||
self._source = None
|
||||
return
|
||||
try:
|
||||
close = self._source.close
|
||||
except AttributeError:
|
||||
close = None
|
||||
finally:
|
||||
self._source = None
|
||||
if close is not None:
|
||||
close()
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
return next(self._events)
|
||||
except StopIteration:
|
||||
pass
|
||||
context = <_SaxParserContext>self._parser._getPushParserContext()
|
||||
if self._source is not None:
|
||||
done = False
|
||||
while not done:
|
||||
try:
|
||||
done = self._read_more_events(context)
|
||||
return next(self._events)
|
||||
except StopIteration:
|
||||
pass # no events yet
|
||||
except Exception as e:
|
||||
self._error = e
|
||||
self._close_source()
|
||||
try:
|
||||
return next(self._events)
|
||||
except StopIteration:
|
||||
break
|
||||
# nothing left to read or return
|
||||
if self._error is not None:
|
||||
error = self._error
|
||||
self._error = None
|
||||
raise error
|
||||
if (context._validator is not None
|
||||
and not context._validator.isvalid()):
|
||||
_raiseParseError(context._c_ctxt, self._filename,
|
||||
context._error_log)
|
||||
# no errors => all done
|
||||
raise StopIteration
|
||||
|
||||
@cython.final
|
||||
cdef bint _read_more_events(self, _SaxParserContext context) except -123:
|
||||
data = self._source.read(__ITERPARSE_CHUNK_SIZE)
|
||||
if not isinstance(data, bytes):
|
||||
self._close_source()
|
||||
raise TypeError("reading file objects must return bytes objects")
|
||||
if not data:
|
||||
try:
|
||||
self.root = self._parser.close()
|
||||
finally:
|
||||
self._close_source()
|
||||
return True
|
||||
self._parser.feed(data)
|
||||
return False
|
||||
|
||||
|
||||
cdef enum _IterwalkSkipStates:
|
||||
IWSKIP_NEXT_IS_START
|
||||
IWSKIP_SKIP_NEXT
|
||||
IWSKIP_CAN_SKIP
|
||||
IWSKIP_CANNOT_SKIP
|
||||
|
||||
|
||||
cdef class iterwalk:
|
||||
u"""iterwalk(self, element_or_tree, events=("end",), tag=None)
|
||||
|
||||
A tree walker that generates events from an existing tree as if it
|
||||
was parsing XML data with ``iterparse()``.
|
||||
|
||||
Just as for ``iterparse()``, the ``tag`` argument can be a single tag or a
|
||||
sequence of tags.
|
||||
|
||||
After receiving a 'start' or 'start-ns' event, the children and
|
||||
descendants of the current element can be excluded from iteration
|
||||
by calling the ``skip_subtree()`` method.
|
||||
"""
|
||||
cdef _MultiTagMatcher _matcher
|
||||
cdef list _node_stack
|
||||
cdef list _events
|
||||
cdef object _pop_event
|
||||
cdef object _include_siblings
|
||||
cdef int _index
|
||||
cdef int _event_filter
|
||||
cdef _IterwalkSkipStates _skip_state
|
||||
|
||||
def __init__(self, element_or_tree, events=(u"end",), tag=None):
|
||||
cdef _Element root
|
||||
cdef int ns_count
|
||||
root = _rootNodeOrRaise(element_or_tree)
|
||||
self._event_filter = _buildParseEventFilter(events)
|
||||
if tag is None or tag == '*':
|
||||
self._matcher = None
|
||||
else:
|
||||
self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
|
||||
self._node_stack = []
|
||||
self._events = []
|
||||
self._pop_event = self._events.pop
|
||||
self._skip_state = IWSKIP_CANNOT_SKIP # ignore all skip requests by default
|
||||
|
||||
if self._event_filter:
|
||||
self._index = 0
|
||||
if self._matcher is not None and self._event_filter & PARSE_EVENT_FILTER_START:
|
||||
self._matcher.cacheTags(root._doc)
|
||||
|
||||
# When processing an ElementTree, add events for the preceding comments/PIs.
|
||||
if self._event_filter & (PARSE_EVENT_FILTER_COMMENT | PARSE_EVENT_FILTER_PI):
|
||||
if isinstance(element_or_tree, _ElementTree):
|
||||
self._include_siblings = root
|
||||
for elem in list(root.itersiblings(preceding=True))[::-1]:
|
||||
if self._event_filter & PARSE_EVENT_FILTER_COMMENT and elem.tag is Comment:
|
||||
self._events.append((u'comment', elem))
|
||||
elif self._event_filter & PARSE_EVENT_FILTER_PI and elem.tag is PI:
|
||||
self._events.append((u'pi', elem))
|
||||
|
||||
ns_count = self._start_node(root)
|
||||
self._node_stack.append( (root, ns_count) )
|
||||
else:
|
||||
self._index = -1
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
cdef xmlNode* c_child
|
||||
cdef _Element node
|
||||
cdef _Element next_node
|
||||
cdef int ns_count = 0
|
||||
if self._events:
|
||||
return self._next_event()
|
||||
if self._matcher is not None and self._index >= 0:
|
||||
node = self._node_stack[self._index][0]
|
||||
self._matcher.cacheTags(node._doc)
|
||||
|
||||
# find next node
|
||||
while self._index >= 0:
|
||||
node = self._node_stack[self._index][0]
|
||||
|
||||
if self._skip_state == IWSKIP_SKIP_NEXT:
|
||||
c_child = NULL
|
||||
else:
|
||||
c_child = self._process_non_elements(
|
||||
node._doc, _findChildForwards(node._c_node, 0))
|
||||
self._skip_state = IWSKIP_CANNOT_SKIP
|
||||
|
||||
while c_child is NULL:
|
||||
# back off through parents
|
||||
self._index -= 1
|
||||
node = self._end_node()
|
||||
if self._index < 0:
|
||||
break
|
||||
c_child = self._process_non_elements(
|
||||
node._doc, _nextElement(node._c_node))
|
||||
|
||||
if c_child is not NULL:
|
||||
next_node = _elementFactory(node._doc, c_child)
|
||||
if self._event_filter & (PARSE_EVENT_FILTER_START |
|
||||
PARSE_EVENT_FILTER_START_NS):
|
||||
ns_count = self._start_node(next_node)
|
||||
elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
|
||||
ns_count = _countNsDefs(next_node._c_node)
|
||||
self._node_stack.append( (next_node, ns_count) )
|
||||
self._index += 1
|
||||
if self._events:
|
||||
return self._next_event()
|
||||
|
||||
if self._include_siblings is not None:
|
||||
node, self._include_siblings = self._include_siblings, None
|
||||
self._process_non_elements(node._doc, _nextElement(node._c_node))
|
||||
if self._events:
|
||||
return self._next_event()
|
||||
|
||||
raise StopIteration
|
||||
|
||||
@cython.final
|
||||
cdef xmlNode* _process_non_elements(self, _Document doc, xmlNode* c_node):
|
||||
while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE:
|
||||
if c_node.type == tree.XML_COMMENT_NODE:
|
||||
if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
|
||||
self._events.append(
|
||||
(u"comment", _elementFactory(doc, c_node)))
|
||||
c_node = _nextElement(c_node)
|
||||
elif c_node.type == tree.XML_PI_NODE:
|
||||
if self._event_filter & PARSE_EVENT_FILTER_PI:
|
||||
self._events.append(
|
||||
(u"pi", _elementFactory(doc, c_node)))
|
||||
c_node = _nextElement(c_node)
|
||||
else:
|
||||
break
|
||||
return c_node
|
||||
|
||||
@cython.final
|
||||
cdef _next_event(self):
|
||||
if self._skip_state == IWSKIP_NEXT_IS_START:
|
||||
if self._events[0][0] in (u'start', u'start-ns'):
|
||||
self._skip_state = IWSKIP_CAN_SKIP
|
||||
return self._pop_event(0)
|
||||
|
||||
def skip_subtree(self):
|
||||
"""Prevent descending into the current subtree.
|
||||
Instead, the next returned event will be the 'end' event of the current element
|
||||
(if included), ignoring any children or descendants.
|
||||
|
||||
This has no effect right after an 'end' or 'end-ns' event.
|
||||
"""
|
||||
if self._skip_state == IWSKIP_CAN_SKIP:
|
||||
self._skip_state = IWSKIP_SKIP_NEXT
|
||||
|
||||
@cython.final
|
||||
cdef int _start_node(self, _Element node) except -1:
|
||||
cdef int ns_count
|
||||
if self._event_filter & PARSE_EVENT_FILTER_START_NS:
|
||||
ns_count = _appendStartNsEvents(node._c_node, self._events)
|
||||
if self._events:
|
||||
self._skip_state = IWSKIP_NEXT_IS_START
|
||||
elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
|
||||
ns_count = _countNsDefs(node._c_node)
|
||||
else:
|
||||
ns_count = 0
|
||||
if self._event_filter & PARSE_EVENT_FILTER_START:
|
||||
if self._matcher is None or self._matcher.matches(node._c_node):
|
||||
self._events.append( (u"start", node) )
|
||||
self._skip_state = IWSKIP_NEXT_IS_START
|
||||
return ns_count
|
||||
|
||||
@cython.final
|
||||
cdef _Element _end_node(self):
|
||||
cdef _Element node
|
||||
cdef int i, ns_count
|
||||
node, ns_count = self._node_stack.pop()
|
||||
if self._event_filter & PARSE_EVENT_FILTER_END:
|
||||
if self._matcher is None or self._matcher.matches(node._c_node):
|
||||
self._events.append( (u"end", node) )
|
||||
if self._event_filter & PARSE_EVENT_FILTER_END_NS and ns_count:
|
||||
event = (u"end-ns", None)
|
||||
for i in range(ns_count):
|
||||
self._events.append(event)
|
||||
return node
|
||||
|
||||
|
||||
cdef int _countNsDefs(xmlNode* c_node):
|
||||
cdef xmlNs* c_ns
|
||||
cdef int count
|
||||
count = 0
|
||||
c_ns = c_node.nsDef
|
||||
while c_ns is not NULL:
|
||||
count += (c_ns.href is not NULL)
|
||||
c_ns = c_ns.next
|
||||
return count
|
||||
|
||||
|
||||
cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
|
||||
cdef xmlNs* c_ns
|
||||
cdef int count
|
||||
count = 0
|
||||
c_ns = c_node.nsDef
|
||||
while c_ns is not NULL:
|
||||
if c_ns.href:
|
||||
ns_tuple = (funicodeOrEmpty(c_ns.prefix),
|
||||
funicode(c_ns.href))
|
||||
event_list.append( (u"start-ns", ns_tuple) )
|
||||
count += 1
|
||||
c_ns = c_ns.next
|
||||
return count
|
||||
224
.venv/lib/python3.7/site-packages/lxml/lxml.etree.h
Normal file
224
.venv/lib/python3.7/site-packages/lxml/lxml.etree.h
Normal file
@@ -0,0 +1,224 @@
|
||||
/* Generated by Cython 0.29.36 */
|
||||
|
||||
#ifndef __PYX_HAVE__lxml__etree
|
||||
#define __PYX_HAVE__lxml__etree
|
||||
|
||||
#include "Python.h"
|
||||
struct LxmlDocument;
|
||||
struct LxmlElement;
|
||||
struct LxmlElementTree;
|
||||
struct LxmlElementTagMatcher;
|
||||
struct LxmlElementIterator;
|
||||
struct LxmlElementBase;
|
||||
struct LxmlElementClassLookup;
|
||||
struct LxmlFallbackElementClassLookup;
|
||||
|
||||
/* "lxml/etree.pyx":338
|
||||
*
|
||||
* # type of a function that steps from node to node
|
||||
* ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<<
|
||||
*
|
||||
*
|
||||
*/
|
||||
typedef xmlNode *(*_node_to_node_function)(xmlNode *);
|
||||
|
||||
/* "lxml/etree.pyx":354
|
||||
* @cython.final
|
||||
* @cython.freelist(8)
|
||||
* cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # <<<<<<<<<<<<<<
|
||||
* u"""Internal base class to reference a libxml document.
|
||||
*
|
||||
*/
|
||||
struct LxmlDocument {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab;
|
||||
int _ns_counter;
|
||||
PyObject *_prefix_tail;
|
||||
xmlDoc *_c_doc;
|
||||
struct __pyx_obj_4lxml_5etree__BaseParser *_parser;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":703
|
||||
*
|
||||
* @cython.no_gc_clear
|
||||
* cdef public class _Element [ type LxmlElementType, object LxmlElement ]: # <<<<<<<<<<<<<<
|
||||
* u"""Element class.
|
||||
*
|
||||
*/
|
||||
struct LxmlElement {
|
||||
PyObject_HEAD
|
||||
struct LxmlDocument *_doc;
|
||||
xmlNode *_c_node;
|
||||
PyObject *_tag;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":1872
|
||||
*
|
||||
*
|
||||
* cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementTree ]:
|
||||
* cdef _Document _doc
|
||||
*/
|
||||
struct LxmlElementTree {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab;
|
||||
struct LxmlDocument *_doc;
|
||||
struct LxmlElement *_context_node;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":2618
|
||||
*
|
||||
*
|
||||
* cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<<
|
||||
* type LxmlElementTagMatcherType ]:
|
||||
* """
|
||||
*/
|
||||
struct LxmlElementTagMatcher {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab;
|
||||
PyObject *_pystrings;
|
||||
int _node_type;
|
||||
char *_href;
|
||||
char *_name;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":2649
|
||||
* self._name = NULL
|
||||
*
|
||||
* cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<<
|
||||
* object LxmlElementIterator, type LxmlElementIteratorType ]:
|
||||
* """
|
||||
*/
|
||||
struct LxmlElementIterator {
|
||||
struct LxmlElementTagMatcher __pyx_base;
|
||||
struct LxmlElement *_node;
|
||||
_node_to_node_function _next_element;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":6
|
||||
* # Custom Element classes
|
||||
*
|
||||
* cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementBase ]:
|
||||
* u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
||||
*/
|
||||
struct LxmlElementBase {
|
||||
struct LxmlElement __pyx_base;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":210
|
||||
* # Element class lookup
|
||||
*
|
||||
* ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<<
|
||||
*
|
||||
* # class to store element class lookup functions
|
||||
*/
|
||||
typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *);
|
||||
|
||||
/* "src/lxml/classlookup.pxi":213
|
||||
*
|
||||
* # class to store element class lookup functions
|
||||
* cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementClassLookup ]:
|
||||
* u"""ElementClassLookup(self)
|
||||
*/
|
||||
struct LxmlElementClassLookup {
|
||||
PyObject_HEAD
|
||||
_element_class_lookup_function _lookup_function;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":221
|
||||
*
|
||||
*
|
||||
* cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<<
|
||||
* [ type LxmlFallbackElementClassLookupType,
|
||||
* object LxmlFallbackElementClassLookup ]:
|
||||
*/
|
||||
struct LxmlFallbackElementClassLookup {
|
||||
struct LxmlElementClassLookup __pyx_base;
|
||||
struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab;
|
||||
struct LxmlElementClassLookup *fallback;
|
||||
_element_class_lookup_function _fallback_function;
|
||||
};
|
||||
|
||||
#ifndef __PYX_HAVE_API__lxml__etree
|
||||
|
||||
#ifndef __PYX_EXTERN_C
|
||||
#ifdef __cplusplus
|
||||
#define __PYX_EXTERN_C extern "C"
|
||||
#else
|
||||
#define __PYX_EXTERN_C extern
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef DL_IMPORT
|
||||
#define DL_IMPORT(_T) _T
|
||||
#endif
|
||||
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType;
|
||||
|
||||
__PYX_EXTERN_C struct LxmlElement *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C struct LxmlElementTree *elementTreeFactory(struct LxmlElement *);
|
||||
__PYX_EXTERN_C struct LxmlElementTree *newElementTree(struct LxmlElement *, PyObject *);
|
||||
__PYX_EXTERN_C struct LxmlElementTree *adoptExternalDocument(xmlDoc *, PyObject *, int);
|
||||
__PYX_EXTERN_C struct LxmlElement *elementFactory(struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C struct LxmlElement *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C struct LxmlElement *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C void setElementClassLookupFunction(_element_class_lookup_function, PyObject *);
|
||||
__PYX_EXTERN_C PyObject *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C int tagMatches(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C struct LxmlDocument *documentOrRaise(PyObject *);
|
||||
__PYX_EXTERN_C struct LxmlElement *rootNodeOrRaise(PyObject *);
|
||||
__PYX_EXTERN_C int hasText(xmlNode *);
|
||||
__PYX_EXTERN_C int hasTail(xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *textOf(xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *tailOf(xmlNode *);
|
||||
__PYX_EXTERN_C int setNodeText(xmlNode *, PyObject *);
|
||||
__PYX_EXTERN_C int setTailText(xmlNode *, PyObject *);
|
||||
__PYX_EXTERN_C PyObject *attributeValue(xmlNode *, xmlAttr *);
|
||||
__PYX_EXTERN_C PyObject *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C PyObject *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C PyObject *iterattributes(struct LxmlElement *, int);
|
||||
__PYX_EXTERN_C PyObject *collectAttributes(xmlNode *, int);
|
||||
__PYX_EXTERN_C int setAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C int delAttribute(struct LxmlElement *, PyObject *);
|
||||
__PYX_EXTERN_C int delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C int hasChild(xmlNode *);
|
||||
__PYX_EXTERN_C xmlNode *findChild(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C xmlNode *findChildForwards(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C xmlNode *findChildBackwards(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C xmlNode *nextElement(xmlNode *);
|
||||
__PYX_EXTERN_C xmlNode *previousElement(xmlNode *);
|
||||
__PYX_EXTERN_C void appendChild(struct LxmlElement *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C int appendChildToElement(struct LxmlElement *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C PyObject *pyunicode(const xmlChar *);
|
||||
__PYX_EXTERN_C PyObject *utf8(PyObject *);
|
||||
__PYX_EXTERN_C PyObject *getNsTag(PyObject *);
|
||||
__PYX_EXTERN_C PyObject *getNsTagWithEmptyNs(PyObject *);
|
||||
__PYX_EXTERN_C PyObject *namespacedName(xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *namespacedNameFromNsName(const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C void iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C void initTagMatch(struct LxmlElementTagMatcher *, PyObject *);
|
||||
__PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *);
|
||||
|
||||
#endif /* !__PYX_HAVE_API__lxml__etree */
|
||||
|
||||
/* WARNING: the interface of the module init function changed in CPython 3.5. */
|
||||
/* It now returns a PyModuleDef instance instead of a PyModule instance. */
|
||||
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
PyMODINIT_FUNC initetree(void);
|
||||
#else
|
||||
PyMODINIT_FUNC PyInit_etree(void);
|
||||
#endif
|
||||
|
||||
#endif /* !__PYX_HAVE__lxml__etree */
|
||||
219
.venv/lib/python3.7/site-packages/lxml/lxml.etree_api.h
Normal file
219
.venv/lib/python3.7/site-packages/lxml/lxml.etree_api.h
Normal file
@@ -0,0 +1,219 @@
|
||||
/* Generated by Cython 0.29.36 */
|
||||
|
||||
#ifndef __PYX_HAVE_API__lxml__etree
|
||||
#define __PYX_HAVE_API__lxml__etree
|
||||
#ifdef __MINGW64__
|
||||
#define MS_WIN64
|
||||
#endif
|
||||
#include "Python.h"
|
||||
#include "lxml.etree.h"
|
||||
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0;
|
||||
#define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0;
|
||||
#define newElementTree __pyx_api_f_4lxml_5etree_newElementTree
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_adoptExternalDocument)(xmlDoc *, PyObject *, int) = 0;
|
||||
#define adoptExternalDocument __pyx_api_f_4lxml_5etree_adoptExternalDocument
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define elementFactory __pyx_api_f_4lxml_5etree_elementFactory
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
||||
#define makeElement __pyx_api_f_4lxml_5etree_makeElement
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
||||
#define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement
|
||||
static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0;
|
||||
#define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
||||
#define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
||||
#define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback
|
||||
static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define tagMatches __pyx_api_f_4lxml_5etree_tagMatches
|
||||
static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0;
|
||||
#define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0;
|
||||
#define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0;
|
||||
#define hasText __pyx_api_f_4lxml_5etree_hasText
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0;
|
||||
#define hasTail __pyx_api_f_4lxml_5etree_hasTail
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0;
|
||||
#define textOf __pyx_api_f_4lxml_5etree_textOf
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0;
|
||||
#define tailOf __pyx_api_f_4lxml_5etree_tailOf
|
||||
static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0;
|
||||
#define setNodeText __pyx_api_f_4lxml_5etree_setNodeText
|
||||
static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0;
|
||||
#define setTailText __pyx_api_f_4lxml_5etree_setTailText
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0;
|
||||
#define attributeValue __pyx_api_f_4lxml_5etree_attributeValue
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
||||
#define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0;
|
||||
#define iterattributes __pyx_api_f_4lxml_5etree_iterattributes
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0;
|
||||
#define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes
|
||||
static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
||||
#define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue
|
||||
static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0;
|
||||
#define delAttribute __pyx_api_f_4lxml_5etree_delAttribute
|
||||
static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0;
|
||||
#define hasChild __pyx_api_f_4lxml_5etree_hasChild
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChild __pyx_api_f_4lxml_5etree_findChild
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0;
|
||||
#define nextElement __pyx_api_f_4lxml_5etree_nextElement
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0;
|
||||
#define previousElement __pyx_api_f_4lxml_5etree_previousElement
|
||||
static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0;
|
||||
#define appendChild __pyx_api_f_4lxml_5etree_appendChild
|
||||
static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0;
|
||||
#define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0;
|
||||
#define pyunicode __pyx_api_f_4lxml_5etree_pyunicode
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0;
|
||||
#define utf8 __pyx_api_f_4lxml_5etree_utf8
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0;
|
||||
#define getNsTag __pyx_api_f_4lxml_5etree_getNsTag
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0;
|
||||
#define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0;
|
||||
#define namespacedName __pyx_api_f_4lxml_5etree_namespacedName
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0;
|
||||
#define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName
|
||||
static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0;
|
||||
#define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext
|
||||
static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0;
|
||||
#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch
|
||||
static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix
|
||||
#if !defined(__Pyx_PyIdentifier_FromString)
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
#define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
|
||||
#else
|
||||
#define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef __PYX_HAVE_RT_ImportFunction_0_29_36
|
||||
#define __PYX_HAVE_RT_ImportFunction_0_29_36
|
||||
static int __Pyx_ImportFunction_0_29_36(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
|
||||
PyObject *d = 0;
|
||||
PyObject *cobj = 0;
|
||||
union {
|
||||
void (*fp)(void);
|
||||
void *p;
|
||||
} tmp;
|
||||
d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
|
||||
if (!d)
|
||||
goto bad;
|
||||
cobj = PyDict_GetItemString(d, funcname);
|
||||
if (!cobj) {
|
||||
PyErr_Format(PyExc_ImportError,
|
||||
"%.200s does not export expected C function %.200s",
|
||||
PyModule_GetName(module), funcname);
|
||||
goto bad;
|
||||
}
|
||||
#if PY_VERSION_HEX >= 0x02070000
|
||||
if (!PyCapsule_IsValid(cobj, sig)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
|
||||
PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
|
||||
goto bad;
|
||||
}
|
||||
tmp.p = PyCapsule_GetPointer(cobj, sig);
|
||||
#else
|
||||
{const char *desc, *s1, *s2;
|
||||
desc = (const char *)PyCObject_GetDesc(cobj);
|
||||
if (!desc)
|
||||
goto bad;
|
||||
s1 = desc; s2 = sig;
|
||||
while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
|
||||
if (*s1 != *s2) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
|
||||
PyModule_GetName(module), funcname, sig, desc);
|
||||
goto bad;
|
||||
}
|
||||
tmp.p = PyCObject_AsVoidPtr(cobj);}
|
||||
#endif
|
||||
*f = tmp.fp;
|
||||
if (!(*f))
|
||||
goto bad;
|
||||
Py_DECREF(d);
|
||||
return 0;
|
||||
bad:
|
||||
Py_XDECREF(d);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static int import_lxml__etree(void) {
|
||||
PyObject *module = 0;
|
||||
module = PyImport_ImportModule("lxml.etree");
|
||||
if (!module) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_0_29_36(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
Py_DECREF(module); module = 0;
|
||||
return 0;
|
||||
bad:
|
||||
Py_XDECREF(module);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif /* !__PYX_HAVE_API__lxml__etree */
|
||||
281
.venv/lib/python3.7/site-packages/lxml/nsclasses.pxi
Normal file
281
.venv/lib/python3.7/site-packages/lxml/nsclasses.pxi
Normal file
@@ -0,0 +1,281 @@
|
||||
# module-level API for namespace implementations
|
||||
|
||||
cdef class LxmlRegistryError(LxmlError):
|
||||
"""Base class of lxml registry errors.
|
||||
"""
|
||||
|
||||
cdef class NamespaceRegistryError(LxmlRegistryError):
|
||||
"""Error registering a namespace extension.
|
||||
"""
|
||||
|
||||
|
||||
@cython.internal
|
||||
cdef class _NamespaceRegistry:
|
||||
u"Dictionary-like namespace registry"
|
||||
cdef object _ns_uri
|
||||
cdef bytes _ns_uri_utf
|
||||
cdef dict _entries
|
||||
cdef char* _c_ns_uri_utf
|
||||
def __cinit__(self, ns_uri):
|
||||
self._ns_uri = ns_uri
|
||||
if ns_uri is None:
|
||||
self._ns_uri_utf = None
|
||||
self._c_ns_uri_utf = NULL
|
||||
else:
|
||||
self._ns_uri_utf = _utf8(ns_uri)
|
||||
self._c_ns_uri_utf = _cstr(self._ns_uri_utf)
|
||||
self._entries = {}
|
||||
|
||||
def update(self, class_dict_iterable):
|
||||
u"""update(self, class_dict_iterable)
|
||||
|
||||
Forgivingly update the registry.
|
||||
|
||||
``class_dict_iterable`` may be a dict or some other iterable
|
||||
that yields (name, value) pairs.
|
||||
|
||||
If a value does not match the required type for this registry,
|
||||
or if the name starts with '_', it will be silently discarded.
|
||||
This allows registrations at the module or class level using
|
||||
vars(), globals() etc."""
|
||||
if hasattr(class_dict_iterable, u'items'):
|
||||
class_dict_iterable = class_dict_iterable.items()
|
||||
for name, item in class_dict_iterable:
|
||||
if (name is None or name[:1] != '_') and callable(item):
|
||||
self[name] = item
|
||||
|
||||
def __getitem__(self, name):
|
||||
if name is not None:
|
||||
name = _utf8(name)
|
||||
return self._get(name)
|
||||
|
||||
def __delitem__(self, name):
|
||||
if name is not None:
|
||||
name = _utf8(name)
|
||||
del self._entries[name]
|
||||
|
||||
cdef object _get(self, object name):
|
||||
cdef python.PyObject* dict_result
|
||||
dict_result = python.PyDict_GetItem(self._entries, name)
|
||||
if dict_result is NULL:
|
||||
raise KeyError, u"Name not registered."
|
||||
return <object>dict_result
|
||||
|
||||
cdef object _getForString(self, char* name):
|
||||
cdef python.PyObject* dict_result
|
||||
dict_result = python.PyDict_GetItem(self._entries, name)
|
||||
if dict_result is NULL:
|
||||
raise KeyError, u"Name not registered."
|
||||
return <object>dict_result
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._entries)
|
||||
|
||||
def items(self):
|
||||
return list(self._entries.items())
|
||||
|
||||
def iteritems(self):
|
||||
return iter(self._entries.items())
|
||||
|
||||
def clear(self):
|
||||
self._entries.clear()
|
||||
|
||||
def __call__(self, obj):
|
||||
# Usage as decorator:
|
||||
# ns = lookup.get_namespace("...")
|
||||
# @ns('abc')
|
||||
# class element(ElementBase): pass
|
||||
#
|
||||
# @ns
|
||||
# class elementname(ElementBase): pass
|
||||
|
||||
if obj is None or python._isString(obj):
|
||||
# @ns(None) or @ns('tag')
|
||||
return partial(self.__deco, obj)
|
||||
# plain @ns decorator
|
||||
self[obj.__name__] = obj
|
||||
return obj
|
||||
|
||||
def __deco(self, name, obj):
|
||||
self[name] = obj
|
||||
return obj
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _ClassNamespaceRegistry(_NamespaceRegistry):
|
||||
u"Dictionary-like registry for namespace implementation classes"
|
||||
def __setitem__(self, name, item):
|
||||
if not isinstance(item, type) or not issubclass(item, ElementBase):
|
||||
raise NamespaceRegistryError, \
|
||||
u"Registered element classes must be subtypes of ElementBase"
|
||||
if name is not None:
|
||||
name = _utf8(name)
|
||||
self._entries[name] = item
|
||||
|
||||
def __repr__(self):
|
||||
return u"Namespace(%r)" % self._ns_uri
|
||||
|
||||
|
||||
cdef class ElementNamespaceClassLookup(FallbackElementClassLookup):
|
||||
u"""ElementNamespaceClassLookup(self, fallback=None)
|
||||
|
||||
Element class lookup scheme that searches the Element class in the
|
||||
Namespace registry.
|
||||
|
||||
Usage:
|
||||
|
||||
>>> lookup = ElementNamespaceClassLookup()
|
||||
>>> ns_elements = lookup.get_namespace("http://schema.org/Movie")
|
||||
|
||||
>>> @ns_elements
|
||||
... class movie(ElementBase):
|
||||
... "Element implementation for 'movie' tag (using class name) in schema namespace."
|
||||
|
||||
>>> @ns_elements("movie")
|
||||
... class MovieElement(ElementBase):
|
||||
... "Element implementation for 'movie' tag (explicit tag name) in schema namespace."
|
||||
"""
|
||||
cdef dict _namespace_registries
|
||||
def __cinit__(self):
|
||||
self._namespace_registries = {}
|
||||
|
||||
def __init__(self, ElementClassLookup fallback=None):
|
||||
FallbackElementClassLookup.__init__(self, fallback)
|
||||
self._lookup_function = _find_nselement_class
|
||||
|
||||
def get_namespace(self, ns_uri):
|
||||
u"""get_namespace(self, ns_uri)
|
||||
|
||||
Retrieve the namespace object associated with the given URI.
|
||||
Pass None for the empty namespace.
|
||||
|
||||
Creates a new namespace object if it does not yet exist."""
|
||||
if ns_uri:
|
||||
ns_utf = _utf8(ns_uri)
|
||||
else:
|
||||
ns_utf = None
|
||||
try:
|
||||
return self._namespace_registries[ns_utf]
|
||||
except KeyError:
|
||||
registry = self._namespace_registries[ns_utf] = \
|
||||
_ClassNamespaceRegistry(ns_uri)
|
||||
return registry
|
||||
|
||||
cdef object _find_nselement_class(state, _Document doc, xmlNode* c_node):
|
||||
cdef python.PyObject* dict_result
|
||||
cdef ElementNamespaceClassLookup lookup
|
||||
cdef _NamespaceRegistry registry
|
||||
if state is None:
|
||||
return _lookupDefaultElementClass(None, doc, c_node)
|
||||
|
||||
lookup = <ElementNamespaceClassLookup>state
|
||||
if c_node.type != tree.XML_ELEMENT_NODE:
|
||||
return _callLookupFallback(lookup, doc, c_node)
|
||||
|
||||
c_namespace_utf = _getNs(c_node)
|
||||
if c_namespace_utf is not NULL:
|
||||
dict_result = python.PyDict_GetItem(
|
||||
lookup._namespace_registries, <unsigned char*>c_namespace_utf)
|
||||
else:
|
||||
dict_result = python.PyDict_GetItem(
|
||||
lookup._namespace_registries, None)
|
||||
if dict_result is not NULL:
|
||||
registry = <_NamespaceRegistry>dict_result
|
||||
classes = registry._entries
|
||||
|
||||
if c_node.name is not NULL:
|
||||
dict_result = python.PyDict_GetItem(
|
||||
classes, <unsigned char*>c_node.name)
|
||||
else:
|
||||
dict_result = NULL
|
||||
|
||||
if dict_result is NULL:
|
||||
dict_result = python.PyDict_GetItem(classes, None)
|
||||
|
||||
if dict_result is not NULL:
|
||||
return <object>dict_result
|
||||
return _callLookupFallback(lookup, doc, c_node)
|
||||
|
||||
|
||||
################################################################################
|
||||
# XPath extension functions
|
||||
|
||||
cdef dict __FUNCTION_NAMESPACE_REGISTRIES
|
||||
__FUNCTION_NAMESPACE_REGISTRIES = {}
|
||||
|
||||
def FunctionNamespace(ns_uri):
|
||||
u"""FunctionNamespace(ns_uri)
|
||||
|
||||
Retrieve the function namespace object associated with the given
|
||||
URI.
|
||||
|
||||
Creates a new one if it does not yet exist. A function namespace
|
||||
can only be used to register extension functions.
|
||||
|
||||
Usage:
|
||||
|
||||
>>> ns_functions = FunctionNamespace("http://schema.org/Movie")
|
||||
|
||||
>>> @ns_functions # uses function name
|
||||
... def add2(x):
|
||||
... return x + 2
|
||||
|
||||
>>> @ns_functions("add3") # uses explicit name
|
||||
... def add_three(x):
|
||||
... return x + 3
|
||||
"""
|
||||
ns_utf = _utf8(ns_uri) if ns_uri else None
|
||||
try:
|
||||
return __FUNCTION_NAMESPACE_REGISTRIES[ns_utf]
|
||||
except KeyError:
|
||||
registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] = \
|
||||
_XPathFunctionNamespaceRegistry(ns_uri)
|
||||
return registry
|
||||
|
||||
@cython.internal
|
||||
cdef class _FunctionNamespaceRegistry(_NamespaceRegistry):
|
||||
def __setitem__(self, name, item):
|
||||
if not callable(item):
|
||||
raise NamespaceRegistryError, \
|
||||
u"Registered functions must be callable."
|
||||
if not name:
|
||||
raise ValueError, \
|
||||
u"extensions must have non empty names"
|
||||
self._entries[_utf8(name)] = item
|
||||
|
||||
def __repr__(self):
|
||||
return u"FunctionNamespace(%r)" % self._ns_uri
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry):
|
||||
cdef object _prefix
|
||||
cdef bytes _prefix_utf
|
||||
|
||||
property prefix:
|
||||
u"Namespace prefix for extension functions."
|
||||
def __del__(self):
|
||||
self._prefix = None # no prefix configured
|
||||
self._prefix_utf = None
|
||||
def __get__(self):
|
||||
if self._prefix is None:
|
||||
return ''
|
||||
else:
|
||||
return self._prefix
|
||||
def __set__(self, prefix):
|
||||
if prefix == '':
|
||||
prefix = None # empty prefix
|
||||
self._prefix_utf = _utf8(prefix) if prefix is not None else None
|
||||
self._prefix = prefix
|
||||
|
||||
cdef list _find_all_extension_prefixes():
|
||||
u"Internal lookup function to find all function prefixes for XSLT/XPath."
|
||||
cdef _XPathFunctionNamespaceRegistry registry
|
||||
cdef list ns_prefixes = []
|
||||
for registry in __FUNCTION_NAMESPACE_REGISTRIES.itervalues():
|
||||
if registry._prefix_utf is not None:
|
||||
if registry._ns_uri_utf is not None:
|
||||
ns_prefixes.append(
|
||||
(registry._prefix_utf, registry._ns_uri_utf))
|
||||
return ns_prefixes
|
||||
Binary file not shown.
2183
.venv/lib/python3.7/site-packages/lxml/objectify.pyx
Normal file
2183
.venv/lib/python3.7/site-packages/lxml/objectify.pyx
Normal file
File diff suppressed because it is too large
Load Diff
332
.venv/lib/python3.7/site-packages/lxml/objectpath.pxi
Normal file
332
.venv/lib/python3.7/site-packages/lxml/objectpath.pxi
Normal file
@@ -0,0 +1,332 @@
|
||||
################################################################################
|
||||
# ObjectPath
|
||||
|
||||
ctypedef struct _ObjectPath:
|
||||
const_xmlChar* href
|
||||
const_xmlChar* name
|
||||
Py_ssize_t index
|
||||
|
||||
|
||||
cdef object _NO_DEFAULT = object()
|
||||
|
||||
|
||||
cdef class ObjectPath:
|
||||
u"""ObjectPath(path)
|
||||
Immutable object that represents a compiled object path.
|
||||
|
||||
Example for a path: 'root.child[1].{other}child[25]'
|
||||
"""
|
||||
cdef readonly object find
|
||||
cdef list _path
|
||||
cdef object _path_str
|
||||
cdef _ObjectPath* _c_path
|
||||
cdef Py_ssize_t _path_len
|
||||
def __init__(self, path):
|
||||
if python._isString(path):
|
||||
self._path = _parse_object_path_string(path)
|
||||
self._path_str = path
|
||||
else:
|
||||
self._path = _parse_object_path_list(path)
|
||||
self._path_str = u'.'.join(path)
|
||||
self._path_len = len(self._path)
|
||||
self._c_path = _build_object_path_segments(self._path)
|
||||
self.find = self.__call__
|
||||
|
||||
def __dealloc__(self):
|
||||
if self._c_path is not NULL:
|
||||
python.lxml_free(self._c_path)
|
||||
|
||||
def __str__(self):
|
||||
return self._path_str
|
||||
|
||||
def __call__(self, _Element root not None, *_default):
|
||||
u"""Follow the attribute path in the object structure and return the
|
||||
target attribute value.
|
||||
|
||||
If it it not found, either returns a default value (if one was passed
|
||||
as second argument) or raises AttributeError.
|
||||
"""
|
||||
if _default:
|
||||
if len(_default) > 1:
|
||||
raise TypeError, u"invalid number of arguments: needs one or two"
|
||||
default = _default[0]
|
||||
else:
|
||||
default = _NO_DEFAULT
|
||||
return _find_object_path(root, self._c_path, self._path_len, default)
|
||||
|
||||
def hasattr(self, _Element root not None):
|
||||
u"hasattr(self, root)"
|
||||
try:
|
||||
_find_object_path(root, self._c_path, self._path_len, _NO_DEFAULT)
|
||||
except AttributeError:
|
||||
return False
|
||||
return True
|
||||
|
||||
def setattr(self, _Element root not None, value):
|
||||
u"""setattr(self, root, value)
|
||||
|
||||
Set the value of the target element in a subtree.
|
||||
|
||||
If any of the children on the path does not exist, it is created.
|
||||
"""
|
||||
_create_object_path(root, self._c_path, self._path_len, 1, value)
|
||||
|
||||
def addattr(self, _Element root not None, value):
|
||||
u"""addattr(self, root, value)
|
||||
|
||||
Append a value to the target element in a subtree.
|
||||
|
||||
If any of the children on the path does not exist, it is created.
|
||||
"""
|
||||
_create_object_path(root, self._c_path, self._path_len, 0, value)
|
||||
|
||||
|
||||
cdef object __MATCH_PATH_SEGMENT = re.compile(
|
||||
ur"(\.?)\s*(?:\{([^}]*)\})?\s*([^.{}\[\]\s]+)\s*(?:\[\s*([-0-9]+)\s*\])?",
|
||||
re.U).match
|
||||
|
||||
cdef tuple _RELATIVE_PATH_SEGMENT = (None, None, 0)
|
||||
|
||||
|
||||
cdef list _parse_object_path_string(_path):
|
||||
u"""Parse object path string into a (ns, name, index) list.
|
||||
"""
|
||||
cdef bint has_dot
|
||||
cdef unicode path
|
||||
new_path = []
|
||||
if isinstance(_path, bytes):
|
||||
path = (<bytes>_path).decode('ascii')
|
||||
elif type(_path) is not unicode:
|
||||
path = unicode(_path)
|
||||
else:
|
||||
path = _path
|
||||
path = path.strip()
|
||||
if path == u'.':
|
||||
return [_RELATIVE_PATH_SEGMENT]
|
||||
path_pos = 0
|
||||
while path:
|
||||
match = __MATCH_PATH_SEGMENT(path, path_pos)
|
||||
if match is None:
|
||||
break
|
||||
|
||||
dot, ns, name, index = match.groups()
|
||||
index = int(index) if index else 0
|
||||
has_dot = dot == u'.'
|
||||
if not new_path:
|
||||
if has_dot:
|
||||
# path '.child' => ignore root
|
||||
new_path.append(_RELATIVE_PATH_SEGMENT)
|
||||
elif index:
|
||||
raise ValueError, u"index not allowed on root node"
|
||||
elif not has_dot:
|
||||
raise ValueError, u"invalid path"
|
||||
if ns is not None:
|
||||
ns = python.PyUnicode_AsUTF8String(ns)
|
||||
name = python.PyUnicode_AsUTF8String(name)
|
||||
new_path.append( (ns, name, index) )
|
||||
|
||||
path_pos = match.end()
|
||||
if not new_path or len(path) > path_pos:
|
||||
raise ValueError, u"invalid path"
|
||||
return new_path
|
||||
|
||||
|
||||
cdef list _parse_object_path_list(path):
|
||||
u"""Parse object path sequence into a (ns, name, index) list.
|
||||
"""
|
||||
new_path = []
|
||||
for item in path:
|
||||
item = item.strip()
|
||||
if not new_path and item == u'':
|
||||
# path '.child' => ignore root
|
||||
ns = name = None
|
||||
index = 0
|
||||
else:
|
||||
ns, name = cetree.getNsTag(item)
|
||||
c_name = _xcstr(name)
|
||||
index_pos = tree.xmlStrchr(c_name, c'[')
|
||||
if index_pos is NULL:
|
||||
index = 0
|
||||
else:
|
||||
index_end = tree.xmlStrchr(index_pos + 1, c']')
|
||||
if index_end is NULL:
|
||||
raise ValueError, u"index must be enclosed in []"
|
||||
index = int(index_pos[1:index_end - index_pos])
|
||||
if not new_path and index != 0:
|
||||
raise ValueError, u"index not allowed on root node"
|
||||
name = <bytes>c_name[:index_pos - c_name]
|
||||
new_path.append( (ns, name, index) )
|
||||
if not new_path:
|
||||
raise ValueError, u"invalid path"
|
||||
return new_path
|
||||
|
||||
|
||||
cdef _ObjectPath* _build_object_path_segments(list path_list) except NULL:
|
||||
cdef _ObjectPath* c_path
|
||||
cdef _ObjectPath* c_path_segments
|
||||
c_path_segments = <_ObjectPath*>python.lxml_malloc(len(path_list), sizeof(_ObjectPath))
|
||||
if c_path_segments is NULL:
|
||||
raise MemoryError()
|
||||
c_path = c_path_segments
|
||||
for href, name, index in path_list:
|
||||
c_path[0].href = _xcstr(href) if href is not None else NULL
|
||||
c_path[0].name = _xcstr(name) if name is not None else NULL
|
||||
c_path[0].index = index
|
||||
c_path += 1
|
||||
return c_path_segments
|
||||
|
||||
|
||||
cdef _find_object_path(_Element root, _ObjectPath* c_path, Py_ssize_t c_path_len, default_value):
|
||||
u"""Follow the path to find the target element.
|
||||
"""
|
||||
cdef tree.xmlNode* c_node
|
||||
cdef Py_ssize_t c_index
|
||||
c_node = root._c_node
|
||||
c_name = c_path[0].name
|
||||
c_href = c_path[0].href
|
||||
if c_href is NULL or c_href[0] == c'\0':
|
||||
c_href = tree._getNs(c_node)
|
||||
if not cetree.tagMatches(c_node, c_href, c_name):
|
||||
if default_value is not _NO_DEFAULT:
|
||||
return default_value
|
||||
else:
|
||||
raise ValueError(
|
||||
f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}")
|
||||
|
||||
while c_node is not NULL:
|
||||
c_path_len -= 1
|
||||
if c_path_len <= 0:
|
||||
break
|
||||
|
||||
c_path += 1
|
||||
if c_path[0].href is not NULL:
|
||||
c_href = c_path[0].href # otherwise: keep parent namespace
|
||||
c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
|
||||
if c_name is NULL:
|
||||
c_name = c_path[0].name
|
||||
c_node = NULL
|
||||
break
|
||||
c_index = c_path[0].index
|
||||
c_node = c_node.last if c_index < 0 else c_node.children
|
||||
c_node = _findFollowingSibling(c_node, c_href, c_name, c_index)
|
||||
|
||||
if c_node is not NULL:
|
||||
return cetree.elementFactory(root._doc, c_node)
|
||||
elif default_value is not _NO_DEFAULT:
|
||||
return default_value
|
||||
else:
|
||||
tag = cetree.namespacedNameFromNsName(c_href, c_name)
|
||||
raise AttributeError, f"no such child: {tag}"
|
||||
|
||||
|
||||
cdef _create_object_path(_Element root, _ObjectPath* c_path,
|
||||
Py_ssize_t c_path_len, int replace, value):
|
||||
u"""Follow the path to find the target element, build the missing children
|
||||
as needed and set the target element to 'value'. If replace is true, an
|
||||
existing value is replaced, otherwise the new value is added.
|
||||
"""
|
||||
cdef _Element child
|
||||
cdef tree.xmlNode* c_node
|
||||
cdef tree.xmlNode* c_child
|
||||
cdef Py_ssize_t c_index
|
||||
if c_path_len == 1:
|
||||
raise TypeError, u"cannot update root node"
|
||||
|
||||
c_node = root._c_node
|
||||
c_name = c_path[0].name
|
||||
c_href = c_path[0].href
|
||||
if c_href is NULL or c_href[0] == c'\0':
|
||||
c_href = tree._getNs(c_node)
|
||||
if not cetree.tagMatches(c_node, c_href, c_name):
|
||||
raise ValueError(
|
||||
f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}")
|
||||
|
||||
while c_path_len > 1:
|
||||
c_path_len -= 1
|
||||
c_path += 1
|
||||
if c_path[0].href is not NULL:
|
||||
c_href = c_path[0].href # otherwise: keep parent namespace
|
||||
c_index = c_path[0].index
|
||||
c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
|
||||
if c_name is NULL:
|
||||
c_name = c_path[0].name
|
||||
c_child = NULL
|
||||
else:
|
||||
c_child = c_node.last if c_index < 0 else c_node.children
|
||||
c_child = _findFollowingSibling(c_child, c_href, c_name, c_index)
|
||||
|
||||
if c_child is not NULL:
|
||||
c_node = c_child
|
||||
elif c_index != 0:
|
||||
raise TypeError, u"creating indexed path attributes is not supported"
|
||||
elif c_path_len == 1:
|
||||
_appendValue(cetree.elementFactory(root._doc, c_node),
|
||||
cetree.namespacedNameFromNsName(c_href, c_name),
|
||||
value)
|
||||
return
|
||||
else:
|
||||
child = cetree.makeSubElement(
|
||||
cetree.elementFactory(root._doc, c_node),
|
||||
cetree.namespacedNameFromNsName(c_href, c_name),
|
||||
None, None, None, None)
|
||||
c_node = child._c_node
|
||||
|
||||
# if we get here, the entire path was already there
|
||||
if replace:
|
||||
element = cetree.elementFactory(root._doc, c_node)
|
||||
_replaceElement(element, value)
|
||||
else:
|
||||
_appendValue(cetree.elementFactory(root._doc, c_node.parent),
|
||||
cetree.namespacedName(c_node), value)
|
||||
|
||||
|
||||
cdef list _build_descendant_paths(tree.xmlNode* c_node, prefix_string):
|
||||
u"""Returns a list of all descendant paths.
|
||||
"""
|
||||
cdef list path, path_list
|
||||
tag = cetree.namespacedName(c_node)
|
||||
if prefix_string:
|
||||
if prefix_string[-1] != u'.':
|
||||
prefix_string += u'.'
|
||||
prefix_string = prefix_string + tag
|
||||
else:
|
||||
prefix_string = tag
|
||||
path = [prefix_string]
|
||||
path_list = []
|
||||
_recursive_build_descendant_paths(c_node, path, path_list)
|
||||
return path_list
|
||||
|
||||
|
||||
cdef int _recursive_build_descendant_paths(tree.xmlNode* c_node,
|
||||
list path, list path_list) except -1:
|
||||
u"""Fills the list 'path_list' with all descendant paths, initial prefix
|
||||
being in the list 'path'.
|
||||
"""
|
||||
cdef tree.xmlNode* c_child
|
||||
tags = {}
|
||||
path_list.append(u'.'.join(path))
|
||||
c_href = tree._getNs(c_node)
|
||||
c_child = c_node.children
|
||||
while c_child is not NULL:
|
||||
while c_child.type != tree.XML_ELEMENT_NODE:
|
||||
c_child = c_child.next
|
||||
if c_child is NULL:
|
||||
return 0
|
||||
if c_href is tree._getNs(c_child):
|
||||
tag = pyunicode(c_child.name)
|
||||
elif c_href is not NULL and tree._getNs(c_child) is NULL:
|
||||
# special case: parent has namespace, child does not
|
||||
tag = u'{}' + pyunicode(c_child.name)
|
||||
else:
|
||||
tag = cetree.namespacedName(c_child)
|
||||
count = tags.get(tag)
|
||||
if count is None:
|
||||
tags[tag] = 1
|
||||
else:
|
||||
tags[tag] = count + 1
|
||||
tag += f'[{count}]'
|
||||
path.append(tag)
|
||||
_recursive_build_descendant_paths(c_child, path, path_list)
|
||||
del path[-1]
|
||||
c_child = c_child.next
|
||||
return 0
|
||||
1921
.venv/lib/python3.7/site-packages/lxml/parser.pxi
Normal file
1921
.venv/lib/python3.7/site-packages/lxml/parser.pxi
Normal file
File diff suppressed because it is too large
Load Diff
194
.venv/lib/python3.7/site-packages/lxml/parsertarget.pxi
Normal file
194
.venv/lib/python3.7/site-packages/lxml/parsertarget.pxi
Normal file
@@ -0,0 +1,194 @@
|
||||
# Parser target context (ET target interface)
|
||||
|
||||
cdef object inspect_getargspec
|
||||
try:
|
||||
from inspect import getfullargspec as inspect_getargspec
|
||||
except ImportError:
|
||||
from inspect import getargspec as inspect_getargspec
|
||||
|
||||
|
||||
class _TargetParserResult(Exception):
|
||||
# Admittedly, this is somewhat ugly, but it's the easiest way
|
||||
# to push the Python level parser result through the parser
|
||||
# machinery towards the API level functions
|
||||
def __init__(self, result):
|
||||
self.result = result
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _PythonSaxParserTarget(_SaxParserTarget):
|
||||
cdef object _target_start
|
||||
cdef object _target_end
|
||||
cdef object _target_data
|
||||
cdef object _target_start_ns
|
||||
cdef object _target_end_ns
|
||||
cdef object _target_doctype
|
||||
cdef object _target_pi
|
||||
cdef object _target_comment
|
||||
cdef bint _start_takes_nsmap
|
||||
|
||||
def __cinit__(self, target):
|
||||
cdef int event_filter
|
||||
event_filter = 0
|
||||
self._start_takes_nsmap = 0
|
||||
try:
|
||||
self._target_start = target.start
|
||||
if self._target_start is not None:
|
||||
event_filter |= SAX_EVENT_START
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
arguments = inspect_getargspec(self._target_start)
|
||||
if len(arguments[0]) > 3 or arguments[1] is not None:
|
||||
self._start_takes_nsmap = 1
|
||||
except TypeError:
|
||||
pass
|
||||
try:
|
||||
self._target_end = target.end
|
||||
if self._target_end is not None:
|
||||
event_filter |= SAX_EVENT_END
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
self._target_start_ns = target.start_ns
|
||||
if self._target_start_ns is not None:
|
||||
event_filter |= SAX_EVENT_START_NS
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
self._target_end_ns = target.end_ns
|
||||
if self._target_end_ns is not None:
|
||||
event_filter |= SAX_EVENT_END_NS
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
self._target_data = target.data
|
||||
if self._target_data is not None:
|
||||
event_filter |= SAX_EVENT_DATA
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
self._target_doctype = target.doctype
|
||||
if self._target_doctype is not None:
|
||||
event_filter |= SAX_EVENT_DOCTYPE
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
self._target_pi = target.pi
|
||||
if self._target_pi is not None:
|
||||
event_filter |= SAX_EVENT_PI
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
self._target_comment = target.comment
|
||||
if self._target_comment is not None:
|
||||
event_filter |= SAX_EVENT_COMMENT
|
||||
except AttributeError:
|
||||
pass
|
||||
self._sax_event_filter = event_filter
|
||||
|
||||
cdef _handleSaxStart(self, tag, attrib, nsmap):
|
||||
if self._start_takes_nsmap:
|
||||
return self._target_start(tag, attrib, nsmap)
|
||||
else:
|
||||
return self._target_start(tag, attrib)
|
||||
|
||||
cdef _handleSaxEnd(self, tag):
|
||||
return self._target_end(tag)
|
||||
|
||||
cdef _handleSaxStartNs(self, prefix, uri):
|
||||
return self._target_start_ns(prefix, uri)
|
||||
|
||||
cdef _handleSaxEndNs(self, prefix):
|
||||
return self._target_end_ns(prefix)
|
||||
|
||||
cdef int _handleSaxData(self, data) except -1:
|
||||
self._target_data(data)
|
||||
|
||||
cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
|
||||
self._target_doctype(root_tag, public_id, system_id)
|
||||
|
||||
cdef _handleSaxPi(self, target, data):
|
||||
return self._target_pi(target, data)
|
||||
|
||||
cdef _handleSaxComment(self, comment):
|
||||
return self._target_comment(comment)
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.no_gc_clear # Required because parent class uses it - Cython bug.
|
||||
cdef class _TargetParserContext(_SaxParserContext):
|
||||
u"""This class maps SAX2 events to the ET parser target interface.
|
||||
"""
|
||||
cdef object _python_target
|
||||
cdef int _setTarget(self, target) except -1:
|
||||
self._python_target = target
|
||||
if not isinstance(target, _SaxParserTarget) or \
|
||||
hasattr(target, u'__dict__'):
|
||||
target = _PythonSaxParserTarget(target)
|
||||
self._setSaxParserTarget(target)
|
||||
return 0
|
||||
|
||||
cdef _ParserContext _copy(self):
|
||||
cdef _TargetParserContext context
|
||||
context = _ParserContext._copy(self)
|
||||
context._setTarget(self._python_target)
|
||||
return context
|
||||
|
||||
cdef void _cleanupTargetParserContext(self, xmlDoc* result):
|
||||
if self._c_ctxt.myDoc is not NULL:
|
||||
if self._c_ctxt.myDoc is not result and \
|
||||
self._c_ctxt.myDoc._private is NULL:
|
||||
# no _Document proxy => orphen
|
||||
tree.xmlFreeDoc(self._c_ctxt.myDoc)
|
||||
self._c_ctxt.myDoc = NULL
|
||||
|
||||
cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result,
|
||||
filename):
|
||||
cdef bint recover
|
||||
recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
|
||||
try:
|
||||
if self._has_raised():
|
||||
self._cleanupTargetParserContext(result)
|
||||
self._raise_if_stored()
|
||||
if not self._c_ctxt.wellFormed and not recover:
|
||||
_raiseParseError(self._c_ctxt, filename, self._error_log)
|
||||
except:
|
||||
if python.IS_PYTHON2:
|
||||
exc = sys.exc_info()
|
||||
# Python 2 can't chain exceptions
|
||||
try: self._python_target.close()
|
||||
except: pass
|
||||
raise exc[0], exc[1], exc[2]
|
||||
else:
|
||||
self._python_target.close()
|
||||
raise
|
||||
return self._python_target.close()
|
||||
|
||||
cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser,
|
||||
xmlDoc* result, filename) except NULL:
|
||||
cdef bint recover
|
||||
recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER
|
||||
if result is not NULL and result._private is NULL:
|
||||
# no _Document proxy => orphen
|
||||
tree.xmlFreeDoc(result)
|
||||
try:
|
||||
self._cleanupTargetParserContext(result)
|
||||
self._raise_if_stored()
|
||||
if not self._c_ctxt.wellFormed and not recover:
|
||||
_raiseParseError(self._c_ctxt, filename, self._error_log)
|
||||
except:
|
||||
if python.IS_PYTHON2:
|
||||
exc = sys.exc_info()
|
||||
# Python 2 can't chain exceptions
|
||||
try: self._python_target.close()
|
||||
except: pass
|
||||
raise exc[0], exc[1], exc[2]
|
||||
else:
|
||||
self._python_target.close()
|
||||
raise
|
||||
parse_result = self._python_target.close()
|
||||
raise _TargetParserResult(parse_result)
|
||||
619
.venv/lib/python3.7/site-packages/lxml/proxy.pxi
Normal file
619
.venv/lib/python3.7/site-packages/lxml/proxy.pxi
Normal file
@@ -0,0 +1,619 @@
|
||||
# Proxy functions and low level node allocation stuff
|
||||
|
||||
# Proxies represent elements, their reference is stored in the C
|
||||
# structure of the respective node to avoid multiple instantiation of
|
||||
# the Python class.
|
||||
|
||||
@cython.linetrace(False)
|
||||
@cython.profile(False)
|
||||
cdef inline _Element getProxy(xmlNode* c_node):
|
||||
u"""Get a proxy for a given node.
|
||||
"""
|
||||
#print "getProxy for:", <int>c_node
|
||||
if c_node is not NULL and c_node._private is not NULL:
|
||||
return <_Element>c_node._private
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@cython.linetrace(False)
|
||||
@cython.profile(False)
|
||||
cdef inline bint hasProxy(xmlNode* c_node):
|
||||
if c_node._private is NULL:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@cython.linetrace(False)
|
||||
@cython.profile(False)
|
||||
cdef inline int _registerProxy(_Element proxy, _Document doc,
|
||||
xmlNode* c_node) except -1:
|
||||
u"""Register a proxy and type for the node it's proxying for.
|
||||
"""
|
||||
#print "registering for:", <int>proxy._c_node
|
||||
assert not hasProxy(c_node), u"double registering proxy!"
|
||||
proxy._doc = doc
|
||||
proxy._c_node = c_node
|
||||
c_node._private = <void*>proxy
|
||||
return 0
|
||||
|
||||
|
||||
@cython.linetrace(False)
|
||||
@cython.profile(False)
|
||||
cdef inline int _unregisterProxy(_Element proxy) except -1:
|
||||
u"""Unregister a proxy for the node it's proxying for.
|
||||
"""
|
||||
cdef xmlNode* c_node = proxy._c_node
|
||||
assert c_node._private is <void*>proxy, u"Tried to unregister unknown proxy"
|
||||
c_node._private = NULL
|
||||
return 0
|
||||
|
||||
|
||||
################################################################################
|
||||
# temporarily make a node the root node of its document
|
||||
|
||||
cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node) except NULL:
|
||||
return _plainFakeRootDoc(c_base_doc, c_node, 1)
|
||||
|
||||
cdef xmlDoc* _plainFakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node,
|
||||
bint with_siblings) except NULL:
|
||||
# build a temporary document that has the given node as root node
|
||||
# note that copy and original must not be modified during its lifetime!!
|
||||
# always call _destroyFakeDoc() after use!
|
||||
cdef xmlNode* c_child
|
||||
cdef xmlNode* c_root
|
||||
cdef xmlNode* c_new_root
|
||||
cdef xmlDoc* c_doc
|
||||
if with_siblings or (c_node.prev is NULL and c_node.next is NULL):
|
||||
c_root = tree.xmlDocGetRootElement(c_base_doc)
|
||||
if c_root is c_node:
|
||||
# already the root node, no siblings
|
||||
return c_base_doc
|
||||
|
||||
c_doc = _copyDoc(c_base_doc, 0) # non recursive!
|
||||
c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
|
||||
tree.xmlDocSetRootElement(c_doc, c_new_root)
|
||||
_copyParentNamespaces(c_node, c_new_root)
|
||||
|
||||
c_new_root.children = c_node.children
|
||||
c_new_root.last = c_node.last
|
||||
c_new_root.next = c_new_root.prev = NULL
|
||||
|
||||
# store original node
|
||||
c_doc._private = c_node
|
||||
|
||||
# divert parent pointers of children
|
||||
c_child = c_new_root.children
|
||||
while c_child is not NULL:
|
||||
c_child.parent = c_new_root
|
||||
c_child = c_child.next
|
||||
|
||||
c_doc.children = c_new_root
|
||||
return c_doc
|
||||
|
||||
cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc):
|
||||
# delete a temporary document
|
||||
cdef xmlNode* c_child
|
||||
cdef xmlNode* c_parent
|
||||
cdef xmlNode* c_root
|
||||
if c_doc is c_base_doc:
|
||||
return
|
||||
c_root = tree.xmlDocGetRootElement(c_doc)
|
||||
|
||||
# restore parent pointers of children
|
||||
c_parent = <xmlNode*>c_doc._private
|
||||
c_child = c_root.children
|
||||
while c_child is not NULL:
|
||||
c_child.parent = c_parent
|
||||
c_child = c_child.next
|
||||
|
||||
# prevent recursive removal of children
|
||||
c_root.children = c_root.last = NULL
|
||||
tree.xmlFreeDoc(c_doc)
|
||||
|
||||
cdef _Element _fakeDocElementFactory(_Document doc, xmlNode* c_element):
|
||||
u"""Special element factory for cases where we need to create a fake
|
||||
root document, but still need to instantiate arbitrary nodes from
|
||||
it. If we instantiate the fake root node, things will turn bad
|
||||
when it's destroyed.
|
||||
|
||||
Instead, if we are asked to instantiate the fake root node, we
|
||||
instantiate the original node instead.
|
||||
"""
|
||||
if c_element.doc is not doc._c_doc:
|
||||
if c_element.doc._private is not NULL:
|
||||
if c_element is c_element.doc.children:
|
||||
c_element = <xmlNode*>c_element.doc._private
|
||||
#assert c_element.type == tree.XML_ELEMENT_NODE
|
||||
return _elementFactory(doc, c_element)
|
||||
|
||||
################################################################################
|
||||
# support for freeing tree elements when proxy objects are destroyed
|
||||
|
||||
cdef int attemptDeallocation(xmlNode* c_node):
|
||||
u"""Attempt deallocation of c_node (or higher up in tree).
|
||||
"""
|
||||
cdef xmlNode* c_top
|
||||
# could be we actually aren't referring to the tree at all
|
||||
if c_node is NULL:
|
||||
#print "not freeing, node is NULL"
|
||||
return 0
|
||||
c_top = getDeallocationTop(c_node)
|
||||
if c_top is not NULL:
|
||||
#print "freeing:", c_top.name
|
||||
_removeText(c_top.next) # tail
|
||||
tree.xmlFreeNode(c_top)
|
||||
return 1
|
||||
return 0
|
||||
|
||||
cdef xmlNode* getDeallocationTop(xmlNode* c_node):
|
||||
u"""Return the top of the tree that can be deallocated, or NULL.
|
||||
"""
|
||||
cdef xmlNode* c_next
|
||||
#print "trying to do deallocating:", c_node.type
|
||||
if hasProxy(c_node):
|
||||
#print "Not freeing: proxies still exist"
|
||||
return NULL
|
||||
while c_node.parent is not NULL:
|
||||
c_node = c_node.parent
|
||||
#print "checking:", c_current.type
|
||||
if c_node.type == tree.XML_DOCUMENT_NODE or \
|
||||
c_node.type == tree.XML_HTML_DOCUMENT_NODE:
|
||||
#print "not freeing: still in doc"
|
||||
return NULL
|
||||
# if we're still attached to the document, don't deallocate
|
||||
if hasProxy(c_node):
|
||||
#print "Not freeing: proxies still exist"
|
||||
return NULL
|
||||
# see whether we have children to deallocate
|
||||
if not canDeallocateChildNodes(c_node):
|
||||
return NULL
|
||||
# see whether we have siblings to deallocate
|
||||
c_next = c_node.prev
|
||||
while c_next:
|
||||
if _isElement(c_next):
|
||||
if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
|
||||
return NULL
|
||||
c_next = c_next.prev
|
||||
c_next = c_node.next
|
||||
while c_next:
|
||||
if _isElement(c_next):
|
||||
if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
|
||||
return NULL
|
||||
c_next = c_next.next
|
||||
return c_node
|
||||
|
||||
cdef int canDeallocateChildNodes(xmlNode* c_parent):
|
||||
cdef xmlNode* c_node
|
||||
c_node = c_parent.children
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
|
||||
if hasProxy(c_node):
|
||||
return 0
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
||||
return 1
|
||||
|
||||
################################################################################
|
||||
# fix _Document references and namespaces when a node changes documents
|
||||
|
||||
cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) nogil:
|
||||
u"""Copy the namespaces of all ancestors of c_from_node to c_to_node.
|
||||
"""
|
||||
cdef xmlNode* c_parent
|
||||
cdef xmlNs* c_ns
|
||||
cdef xmlNs* c_new_ns
|
||||
cdef int prefix_known
|
||||
c_parent = c_from_node.parent
|
||||
while c_parent and (tree._isElementOrXInclude(c_parent) or
|
||||
c_parent.type == tree.XML_DOCUMENT_NODE):
|
||||
c_new_ns = c_parent.nsDef
|
||||
while c_new_ns:
|
||||
# libxml2 will check if the prefix is already defined
|
||||
tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix)
|
||||
c_new_ns = c_new_ns.next
|
||||
c_parent = c_parent.parent
|
||||
|
||||
|
||||
ctypedef struct _ns_update_map:
|
||||
xmlNs* old
|
||||
xmlNs* new
|
||||
|
||||
|
||||
ctypedef struct _nscache:
|
||||
_ns_update_map* ns_map
|
||||
size_t size
|
||||
size_t last
|
||||
|
||||
|
||||
cdef int _growNsCache(_nscache* c_ns_cache) except -1:
|
||||
cdef _ns_update_map* ns_map_ptr
|
||||
if c_ns_cache.size == 0:
|
||||
c_ns_cache.size = 20
|
||||
else:
|
||||
c_ns_cache.size *= 2
|
||||
ns_map_ptr = <_ns_update_map*> python.lxml_realloc(
|
||||
c_ns_cache.ns_map, c_ns_cache.size, sizeof(_ns_update_map))
|
||||
if not ns_map_ptr:
|
||||
python.lxml_free(c_ns_cache.ns_map)
|
||||
c_ns_cache.ns_map = NULL
|
||||
raise MemoryError()
|
||||
c_ns_cache.ns_map = ns_map_ptr
|
||||
return 0
|
||||
|
||||
|
||||
cdef inline int _appendToNsCache(_nscache* c_ns_cache,
|
||||
xmlNs* c_old_ns, xmlNs* c_new_ns) except -1:
|
||||
if c_ns_cache.last >= c_ns_cache.size:
|
||||
_growNsCache(c_ns_cache)
|
||||
c_ns_cache.ns_map[c_ns_cache.last] = _ns_update_map(old=c_old_ns, new=c_new_ns)
|
||||
c_ns_cache.last += 1
|
||||
|
||||
|
||||
cdef int _stripRedundantNamespaceDeclarations(xmlNode* c_element, _nscache* c_ns_cache,
|
||||
xmlNs** c_del_ns_list) except -1:
|
||||
u"""Removes namespace declarations from an element that are already
|
||||
defined in its parents. Does not free the xmlNs's, just prepends
|
||||
them to the c_del_ns_list.
|
||||
"""
|
||||
cdef xmlNs* c_ns
|
||||
cdef xmlNs* c_ns_next
|
||||
cdef xmlNs** c_nsdef
|
||||
# use a xmlNs** to handle assignments to "c_element.nsDef" correctly
|
||||
c_nsdef = &c_element.nsDef
|
||||
while c_nsdef[0] is not NULL:
|
||||
c_ns = tree.xmlSearchNsByHref(
|
||||
c_element.doc, c_element.parent, c_nsdef[0].href)
|
||||
if c_ns is NULL:
|
||||
# new namespace href => keep and cache the ns declaration
|
||||
_appendToNsCache(c_ns_cache, c_nsdef[0], c_nsdef[0])
|
||||
c_nsdef = &c_nsdef[0].next
|
||||
else:
|
||||
# known namespace href => cache mapping and strip old ns
|
||||
_appendToNsCache(c_ns_cache, c_nsdef[0], c_ns)
|
||||
# cut out c_nsdef.next and prepend it to garbage chain
|
||||
c_ns_next = c_nsdef[0].next
|
||||
c_nsdef[0].next = c_del_ns_list[0]
|
||||
c_del_ns_list[0] = c_nsdef[0]
|
||||
c_nsdef[0] = c_ns_next
|
||||
return 0
|
||||
|
||||
|
||||
cdef void _cleanUpFromNamespaceAdaptation(xmlNode* c_start_node,
|
||||
_nscache* c_ns_cache, xmlNs* c_del_ns_list):
|
||||
# Try to recover from exceptions with really bad timing. We were in the middle
|
||||
# of ripping out xmlNS-es and likely ran out of memory. Try to fix up the tree
|
||||
# by re-adding the original xmlNs declarations (which might still be used in some
|
||||
# places).
|
||||
if c_ns_cache.ns_map:
|
||||
python.lxml_free(c_ns_cache.ns_map)
|
||||
if c_del_ns_list:
|
||||
if not c_start_node.nsDef:
|
||||
c_start_node.nsDef = c_del_ns_list
|
||||
else:
|
||||
c_ns = c_start_node.nsDef
|
||||
while c_ns.next:
|
||||
c_ns = c_ns.next
|
||||
c_ns.next = c_del_ns_list
|
||||
|
||||
|
||||
cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
|
||||
xmlNode* c_element) except -1:
|
||||
u"""Fix the xmlNs pointers of a node and its subtree that were moved.
|
||||
|
||||
Originally copied from libxml2's xmlReconciliateNs(). Expects
|
||||
libxml2 doc pointers of node to be correct already, but fixes
|
||||
_Document references.
|
||||
|
||||
For each node in the subtree, we do this:
|
||||
|
||||
1) Remove redundant declarations of namespace that are already
|
||||
defined in its parents.
|
||||
|
||||
2) Replace namespaces that are *not* defined on the node or its
|
||||
parents by the equivalent namespace declarations that *are*
|
||||
defined on the node or its parents (possibly using a different
|
||||
prefix). If a namespace is unknown, declare a new one on the
|
||||
node.
|
||||
|
||||
3) Reassign the names of tags and attribute from the dict of the
|
||||
target document *iff* it is different from the dict used in the
|
||||
source subtree.
|
||||
|
||||
4) Set the Document reference to the new Document (if different).
|
||||
This is done on backtracking to keep the original Document
|
||||
alive as long as possible, until all its elements are updated.
|
||||
|
||||
Note that the namespace declarations are removed from the tree in
|
||||
step 1), but freed only after the complete subtree was traversed
|
||||
and all occurrences were replaced by tree-internal pointers.
|
||||
"""
|
||||
cdef xmlNode* c_start_node
|
||||
cdef xmlNode* c_node
|
||||
cdef xmlDoc* c_doc = doc._c_doc
|
||||
cdef tree.xmlAttr* c_attr
|
||||
cdef char* c_name
|
||||
cdef _nscache c_ns_cache = [NULL, 0, 0]
|
||||
cdef xmlNs* c_del_ns_list = NULL
|
||||
cdef proxy_count = 0
|
||||
|
||||
if not tree._isElementOrXInclude(c_element):
|
||||
return 0
|
||||
|
||||
c_start_node = c_element
|
||||
|
||||
tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
|
||||
if tree._isElementOrXInclude(c_element):
|
||||
if hasProxy(c_element):
|
||||
proxy_count += 1
|
||||
|
||||
# 1) cut out namespaces defined here that are already known by
|
||||
# the ancestors
|
||||
if c_element.nsDef is not NULL:
|
||||
try:
|
||||
_stripRedundantNamespaceDeclarations(c_element, &c_ns_cache, &c_del_ns_list)
|
||||
except:
|
||||
_cleanUpFromNamespaceAdaptation(c_start_node, &c_ns_cache, c_del_ns_list)
|
||||
raise
|
||||
|
||||
# 2) make sure the namespaces of an element and its attributes
|
||||
# are declared in this document (i.e. on the node or its parents)
|
||||
if c_element.ns is not NULL:
|
||||
_fixCNs(doc, c_start_node, c_element, &c_ns_cache, c_del_ns_list)
|
||||
|
||||
c_node = <xmlNode*>c_element.properties
|
||||
while c_node is not NULL:
|
||||
if c_node.ns is not NULL:
|
||||
_fixCNs(doc, c_start_node, c_node, &c_ns_cache, c_del_ns_list)
|
||||
c_node = c_node.next
|
||||
|
||||
tree.END_FOR_EACH_FROM(c_element)
|
||||
|
||||
# free now unused namespace declarations
|
||||
if c_del_ns_list is not NULL:
|
||||
tree.xmlFreeNsList(c_del_ns_list)
|
||||
|
||||
# cleanup
|
||||
if c_ns_cache.ns_map is not NULL:
|
||||
python.lxml_free(c_ns_cache.ns_map)
|
||||
|
||||
# 3) fix the names in the tree if we moved it from a different thread
|
||||
if doc._c_doc.dict is not c_source_doc.dict:
|
||||
fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict)
|
||||
|
||||
# 4) fix _Document references
|
||||
# (and potentially deallocate the source document)
|
||||
if proxy_count > 0:
|
||||
if proxy_count == 1 and c_start_node._private is not NULL:
|
||||
proxy = getProxy(c_start_node)
|
||||
if proxy is not None:
|
||||
if proxy._doc is not doc:
|
||||
proxy._doc = doc
|
||||
else:
|
||||
fixElementDocument(c_start_node, doc, proxy_count)
|
||||
else:
|
||||
fixElementDocument(c_start_node, doc, proxy_count)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
cdef void _setTreeDoc(xmlNode* c_node, xmlDoc* c_doc):
|
||||
"""Adaptation of 'xmlSetTreeDoc()' that deep-fixes the document links iteratively.
|
||||
It avoids https://gitlab.gnome.org/GNOME/libxml2/issues/42
|
||||
"""
|
||||
tree.BEGIN_FOR_EACH_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
c_attr = <tree.xmlAttr*>c_node.properties
|
||||
while c_attr:
|
||||
if c_attr.atype == tree.XML_ATTRIBUTE_ID:
|
||||
tree.xmlRemoveID(c_node.doc, c_attr)
|
||||
c_attr.doc = c_doc
|
||||
_fixDocChildren(c_attr.children, c_doc)
|
||||
c_attr = c_attr.next
|
||||
# Set doc link for all nodes, not only elements.
|
||||
c_node.doc = c_doc
|
||||
tree.END_FOR_EACH_FROM(c_node)
|
||||
|
||||
|
||||
cdef inline void _fixDocChildren(xmlNode* c_child, xmlDoc* c_doc):
|
||||
while c_child:
|
||||
c_child.doc = c_doc
|
||||
if c_child.children:
|
||||
_fixDocChildren(c_child.children, c_doc)
|
||||
c_child = c_child.next
|
||||
|
||||
|
||||
cdef int _fixCNs(_Document doc, xmlNode* c_start_node, xmlNode* c_node,
|
||||
_nscache* c_ns_cache, xmlNs* c_del_ns_list) except -1:
|
||||
cdef xmlNs* c_ns = NULL
|
||||
cdef bint is_prefixed_attr = (c_node.type == tree.XML_ATTRIBUTE_NODE and c_node.ns.prefix)
|
||||
|
||||
for ns_map in c_ns_cache.ns_map[:c_ns_cache.last]:
|
||||
if c_node.ns is ns_map.old:
|
||||
if is_prefixed_attr and not ns_map.new.prefix:
|
||||
# avoid dropping prefix from attributes
|
||||
continue
|
||||
c_ns = ns_map.new
|
||||
break
|
||||
|
||||
if c_ns:
|
||||
c_node.ns = c_ns
|
||||
else:
|
||||
# not in cache or not acceptable
|
||||
# => find a replacement from this document
|
||||
try:
|
||||
c_ns = doc._findOrBuildNodeNs(
|
||||
c_start_node, c_node.ns.href, c_node.ns.prefix,
|
||||
c_node.type == tree.XML_ATTRIBUTE_NODE)
|
||||
c_node.ns = c_ns
|
||||
_appendToNsCache(c_ns_cache, c_node.ns, c_ns)
|
||||
except:
|
||||
_cleanUpFromNamespaceAdaptation(c_start_node, c_ns_cache, c_del_ns_list)
|
||||
raise
|
||||
return 0
|
||||
|
||||
|
||||
cdef void fixElementDocument(xmlNode* c_element, _Document doc,
|
||||
size_t proxy_count):
|
||||
cdef xmlNode* c_node = c_element
|
||||
cdef _Element proxy = None # init-to-None required due to fake-loop below
|
||||
tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
|
||||
if c_node._private is not NULL:
|
||||
proxy = getProxy(c_node)
|
||||
if proxy is not None:
|
||||
if proxy._doc is not doc:
|
||||
proxy._doc = doc
|
||||
proxy_count -= 1
|
||||
if proxy_count == 0:
|
||||
return
|
||||
tree.END_FOR_EACH_FROM(c_node)
|
||||
|
||||
|
||||
cdef void fixThreadDictNames(xmlNode* c_element,
|
||||
tree.xmlDict* c_src_dict,
|
||||
tree.xmlDict* c_dict) nogil:
|
||||
# re-assign the names of tags and attributes
|
||||
#
|
||||
# this should only be called when the element is based on a
|
||||
# different libxml2 tag name dictionary
|
||||
if c_element.type == tree.XML_DOCUMENT_NODE or \
|
||||
c_element.type == tree.XML_HTML_DOCUMENT_NODE:
|
||||
# may define "xml" namespace
|
||||
fixThreadDictNsForNode(c_element, c_src_dict, c_dict)
|
||||
if c_element.doc.extSubset:
|
||||
fixThreadDictNamesForDtd(c_element.doc.extSubset, c_src_dict, c_dict)
|
||||
if c_element.doc.intSubset:
|
||||
fixThreadDictNamesForDtd(c_element.doc.intSubset, c_src_dict, c_dict)
|
||||
c_element = c_element.children
|
||||
while c_element is not NULL:
|
||||
fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
|
||||
c_element = c_element.next
|
||||
elif tree._isElementOrXInclude(c_element):
|
||||
fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
|
||||
|
||||
|
||||
cdef inline void _fixThreadDictPtr(const_xmlChar** c_ptr,
|
||||
tree.xmlDict* c_src_dict,
|
||||
tree.xmlDict* c_dict) nogil:
|
||||
c_str = c_ptr[0]
|
||||
if c_str and c_src_dict and tree.xmlDictOwns(c_src_dict, c_str):
|
||||
# return value can be NULL on memory error, but we don't handle that here
|
||||
c_str = tree.xmlDictLookup(c_dict, c_str, -1)
|
||||
if c_str:
|
||||
c_ptr[0] = c_str
|
||||
|
||||
|
||||
cdef void fixThreadDictNamesForNode(xmlNode* c_element,
|
||||
tree.xmlDict* c_src_dict,
|
||||
tree.xmlDict* c_dict) nogil:
|
||||
cdef xmlNode* c_node = c_element
|
||||
tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
|
||||
if c_node.type in (tree.XML_ELEMENT_NODE, tree.XML_XINCLUDE_START):
|
||||
fixThreadDictNamesForAttributes(
|
||||
c_node.properties, c_src_dict, c_dict)
|
||||
fixThreadDictNsForNode(c_node, c_src_dict, c_dict)
|
||||
_fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
|
||||
elif c_node.type == tree.XML_TEXT_NODE:
|
||||
# libxml2's SAX2 parser interns some indentation space
|
||||
fixThreadDictContentForNode(c_node, c_src_dict, c_dict)
|
||||
elif c_node.type == tree.XML_COMMENT_NODE:
|
||||
pass # don't touch c_node.name
|
||||
else:
|
||||
_fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
|
||||
tree.END_FOR_EACH_FROM(c_node)
|
||||
|
||||
|
||||
cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr,
|
||||
tree.xmlDict* c_src_dict,
|
||||
tree.xmlDict* c_dict) nogil:
|
||||
cdef xmlNode* c_child
|
||||
cdef xmlNode* c_node = <xmlNode*>c_attr
|
||||
while c_node is not NULL:
|
||||
if c_node.type not in (tree.XML_TEXT_NODE, tree.XML_COMMENT_NODE):
|
||||
_fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
|
||||
# libxml2 keeps some (!) attribute values in the dict
|
||||
c_child = c_node.children
|
||||
while c_child is not NULL:
|
||||
fixThreadDictContentForNode(c_child, c_src_dict, c_dict)
|
||||
c_child = c_child.next
|
||||
c_node = c_node.next
|
||||
|
||||
|
||||
cdef inline void fixThreadDictContentForNode(xmlNode* c_node,
|
||||
tree.xmlDict* c_src_dict,
|
||||
tree.xmlDict* c_dict) nogil:
|
||||
if c_node.content is not NULL and \
|
||||
c_node.content is not <xmlChar*>&c_node.properties:
|
||||
if tree.xmlDictOwns(c_src_dict, c_node.content):
|
||||
# result can be NULL on memory error, but we don't handle that here
|
||||
c_node.content = <xmlChar*>tree.xmlDictLookup(c_dict, c_node.content, -1)
|
||||
|
||||
|
||||
cdef inline void fixThreadDictNsForNode(xmlNode* c_node,
|
||||
tree.xmlDict* c_src_dict,
|
||||
tree.xmlDict* c_dict) nogil:
|
||||
cdef xmlNs* c_ns = c_node.nsDef
|
||||
while c_ns is not NULL:
|
||||
_fixThreadDictPtr(&c_ns.href, c_src_dict, c_dict)
|
||||
_fixThreadDictPtr(&c_ns.prefix, c_src_dict, c_dict)
|
||||
c_ns = c_ns.next
|
||||
|
||||
|
||||
cdef void fixThreadDictNamesForDtd(tree.xmlDtd* c_dtd,
|
||||
tree.xmlDict* c_src_dict,
|
||||
tree.xmlDict* c_dict) nogil:
|
||||
cdef xmlNode* c_node
|
||||
cdef tree.xmlElement* c_element
|
||||
cdef tree.xmlAttribute* c_attribute
|
||||
cdef tree.xmlEntity* c_entity
|
||||
|
||||
c_node = c_dtd.children
|
||||
while c_node:
|
||||
if c_node.type == tree.XML_ELEMENT_DECL:
|
||||
c_element = <tree.xmlElement*>c_node
|
||||
if c_element.content:
|
||||
_fixThreadDictPtr(&c_element.content.name, c_src_dict, c_dict)
|
||||
_fixThreadDictPtr(&c_element.content.prefix, c_src_dict, c_dict)
|
||||
c_attribute = c_element.attributes
|
||||
while c_attribute:
|
||||
_fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict)
|
||||
_fixThreadDictPtr(&c_attribute.name, c_src_dict, c_dict)
|
||||
_fixThreadDictPtr(&c_attribute.prefix, c_src_dict, c_dict)
|
||||
_fixThreadDictPtr(&c_attribute.elem, c_src_dict, c_dict)
|
||||
c_attribute = c_attribute.nexth
|
||||
elif c_node.type == tree.XML_ENTITY_DECL:
|
||||
c_entity = <tree.xmlEntity*>c_node
|
||||
_fixThreadDictPtr(&c_entity.name, c_src_dict, c_dict)
|
||||
_fixThreadDictPtr(&c_entity.ExternalID, c_src_dict, c_dict)
|
||||
_fixThreadDictPtr(&c_entity.SystemID, c_src_dict, c_dict)
|
||||
_fixThreadDictPtr(<const_xmlChar**>&c_entity.content, c_src_dict, c_dict)
|
||||
c_node = c_node.next
|
||||
|
||||
|
||||
################################################################################
|
||||
# adopt an xmlDoc from an external libxml2 document source
|
||||
|
||||
cdef _Document _adoptForeignDoc(xmlDoc* c_doc, _BaseParser parser=None, bint is_owned=True):
|
||||
"""Convert and wrap an externally produced xmlDoc for use in lxml.
|
||||
Assures that all '_private' pointers are NULL to prevent accidental
|
||||
dereference into lxml proxy objects.
|
||||
"""
|
||||
if c_doc is NULL:
|
||||
raise ValueError("Illegal document provided: NULL")
|
||||
if c_doc.type not in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
|
||||
doc_type = c_doc.type
|
||||
if is_owned:
|
||||
tree.xmlFreeDoc(c_doc)
|
||||
raise ValueError(f"Illegal document provided: expected XML or HTML, found {doc_type}")
|
||||
|
||||
cdef xmlNode* c_node = <xmlNode*>c_doc
|
||||
|
||||
if is_owned:
|
||||
tree.BEGIN_FOR_EACH_FROM(<xmlNode*>c_doc, c_node, 1)
|
||||
c_node._private = NULL
|
||||
tree.END_FOR_EACH_FROM(c_node)
|
||||
else:
|
||||
# create a fresh copy that lxml owns
|
||||
c_doc = tree.xmlCopyDoc(c_doc, 1)
|
||||
if c_doc is NULL:
|
||||
raise MemoryError()
|
||||
|
||||
return _documentFactory(c_doc, parser)
|
||||
178
.venv/lib/python3.7/site-packages/lxml/public-api.pxi
Normal file
178
.venv/lib/python3.7/site-packages/lxml/public-api.pxi
Normal file
@@ -0,0 +1,178 @@
|
||||
# Public C API for lxml.etree
|
||||
|
||||
cdef public api _Element deepcopyNodeToDocument(_Document doc, xmlNode* c_root):
|
||||
u"Recursively copy the element into the document. doc is not modified."
|
||||
cdef xmlNode* c_node
|
||||
c_node = _copyNodeToDoc(c_root, doc._c_doc)
|
||||
return _elementFactory(doc, c_node)
|
||||
|
||||
cdef public api _ElementTree elementTreeFactory(_Element context_node):
|
||||
_assertValidNode(context_node)
|
||||
return newElementTree(context_node, _ElementTree)
|
||||
|
||||
cdef public api _ElementTree newElementTree(_Element context_node,
|
||||
object subclass):
|
||||
if <void*>context_node is NULL or context_node is None:
|
||||
raise TypeError
|
||||
_assertValidNode(context_node)
|
||||
return _newElementTree(context_node._doc, context_node, subclass)
|
||||
|
||||
cdef public api _ElementTree adoptExternalDocument(xmlDoc* c_doc, parser, bint is_owned):
|
||||
if c_doc is NULL:
|
||||
raise TypeError
|
||||
doc = _adoptForeignDoc(c_doc, parser, is_owned)
|
||||
return _elementTreeFactory(doc, None)
|
||||
|
||||
cdef public api _Element elementFactory(_Document doc, xmlNode* c_node):
|
||||
if c_node is NULL or doc is None:
|
||||
raise TypeError
|
||||
return _elementFactory(doc, c_node)
|
||||
|
||||
cdef public api _Element makeElement(tag, _Document doc, parser,
|
||||
text, tail, attrib, nsmap):
|
||||
return _makeElement(tag, NULL, doc, parser, text, tail, attrib, nsmap, None)
|
||||
|
||||
cdef public api _Element makeSubElement(_Element parent, tag, text, tail,
|
||||
attrib, nsmap):
|
||||
_assertValidNode(parent)
|
||||
return _makeSubElement(parent, tag, text, tail, attrib, nsmap, None)
|
||||
|
||||
cdef public api void setElementClassLookupFunction(
|
||||
_element_class_lookup_function function, state):
|
||||
_setElementClassLookupFunction(function, state)
|
||||
|
||||
cdef public api object lookupDefaultElementClass(state, doc, xmlNode* c_node):
|
||||
return _lookupDefaultElementClass(state, doc, c_node)
|
||||
|
||||
cdef public api object lookupNamespaceElementClass(state, doc, xmlNode* c_node):
|
||||
return _find_nselement_class(state, doc, c_node)
|
||||
|
||||
cdef public api object callLookupFallback(FallbackElementClassLookup lookup,
|
||||
_Document doc, xmlNode* c_node):
|
||||
return _callLookupFallback(lookup, doc, c_node)
|
||||
|
||||
cdef public api int tagMatches(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
|
||||
if c_node is NULL:
|
||||
return -1
|
||||
return _tagMatches(c_node, c_href, c_name)
|
||||
|
||||
cdef public api _Document documentOrRaise(object input):
|
||||
return _documentOrRaise(input)
|
||||
|
||||
cdef public api _Element rootNodeOrRaise(object input):
|
||||
return _rootNodeOrRaise(input)
|
||||
|
||||
cdef public api bint hasText(xmlNode* c_node):
|
||||
return _hasText(c_node)
|
||||
|
||||
cdef public api bint hasTail(xmlNode* c_node):
|
||||
return _hasTail(c_node)
|
||||
|
||||
cdef public api object textOf(xmlNode* c_node):
|
||||
if c_node is NULL:
|
||||
return None
|
||||
return _collectText(c_node.children)
|
||||
|
||||
cdef public api object tailOf(xmlNode* c_node):
|
||||
if c_node is NULL:
|
||||
return None
|
||||
return _collectText(c_node.next)
|
||||
|
||||
cdef public api int setNodeText(xmlNode* c_node, text) except -1:
|
||||
if c_node is NULL:
|
||||
raise ValueError
|
||||
return _setNodeText(c_node, text)
|
||||
|
||||
cdef public api int setTailText(xmlNode* c_node, text) except -1:
|
||||
if c_node is NULL:
|
||||
raise ValueError
|
||||
return _setTailText(c_node, text)
|
||||
|
||||
cdef public api object attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node):
|
||||
return _attributeValue(c_element, c_attrib_node)
|
||||
|
||||
cdef public api object attributeValueFromNsName(xmlNode* c_element,
|
||||
const_xmlChar* ns, const_xmlChar* name):
|
||||
return _attributeValueFromNsName(c_element, ns, name)
|
||||
|
||||
cdef public api object getAttributeValue(_Element element, key, default):
|
||||
_assertValidNode(element)
|
||||
return _getAttributeValue(element, key, default)
|
||||
|
||||
cdef public api object iterattributes(_Element element, int keysvalues):
|
||||
_assertValidNode(element)
|
||||
return _attributeIteratorFactory(element, keysvalues)
|
||||
|
||||
cdef public api list collectAttributes(xmlNode* c_element, int keysvalues):
|
||||
return _collectAttributes(c_element, keysvalues)
|
||||
|
||||
cdef public api int setAttributeValue(_Element element, key, value) except -1:
|
||||
_assertValidNode(element)
|
||||
return _setAttributeValue(element, key, value)
|
||||
|
||||
cdef public api int delAttribute(_Element element, key) except -1:
|
||||
_assertValidNode(element)
|
||||
return _delAttribute(element, key)
|
||||
|
||||
cdef public api int delAttributeFromNsName(tree.xmlNode* c_element,
|
||||
const_xmlChar* c_href, const_xmlChar* c_name):
|
||||
return _delAttributeFromNsName(c_element, c_href, c_name)
|
||||
|
||||
cdef public api bint hasChild(xmlNode* c_node):
|
||||
return _hasChild(c_node)
|
||||
|
||||
cdef public api xmlNode* findChild(xmlNode* c_node, Py_ssize_t index):
|
||||
return _findChild(c_node, index)
|
||||
|
||||
cdef public api xmlNode* findChildForwards(xmlNode* c_node, Py_ssize_t index):
|
||||
return _findChildForwards(c_node, index)
|
||||
|
||||
cdef public api xmlNode* findChildBackwards(xmlNode* c_node, Py_ssize_t index):
|
||||
return _findChildBackwards(c_node, index)
|
||||
|
||||
cdef public api xmlNode* nextElement(xmlNode* c_node):
|
||||
return _nextElement(c_node)
|
||||
|
||||
cdef public api xmlNode* previousElement(xmlNode* c_node):
|
||||
return _previousElement(c_node)
|
||||
|
||||
cdef public api void appendChild(_Element parent, _Element child):
|
||||
# deprecated, use appendChildToElement() instead!
|
||||
_appendChild(parent, child)
|
||||
|
||||
cdef public api int appendChildToElement(_Element parent, _Element child) except -1:
|
||||
return _appendChild(parent, child)
|
||||
|
||||
cdef public api object pyunicode(const_xmlChar* s):
|
||||
if s is NULL:
|
||||
raise TypeError
|
||||
return funicode(s)
|
||||
|
||||
cdef public api bytes utf8(object s):
|
||||
return _utf8(s)
|
||||
|
||||
cdef public api tuple getNsTag(object tag):
|
||||
return _getNsTag(tag)
|
||||
|
||||
cdef public api tuple getNsTagWithEmptyNs(object tag):
|
||||
return _getNsTagWithEmptyNs(tag)
|
||||
|
||||
cdef public api object namespacedName(xmlNode* c_node):
|
||||
return _namespacedName(c_node)
|
||||
|
||||
cdef public api object namespacedNameFromNsName(const_xmlChar* href, const_xmlChar* name):
|
||||
return _namespacedNameFromNsName(href, name)
|
||||
|
||||
cdef public api void iteratorStoreNext(_ElementIterator iterator, _Element node):
|
||||
# deprecated!
|
||||
iterator._storeNext(node)
|
||||
|
||||
cdef public api void initTagMatch(_ElementTagMatcher matcher, tag):
|
||||
# deprecated!
|
||||
matcher._initTagMatch(tag)
|
||||
|
||||
cdef public api tree.xmlNs* findOrBuildNodeNsPrefix(
|
||||
_Document doc, xmlNode* c_node, const_xmlChar* href, const_xmlChar* prefix) except NULL:
|
||||
if doc is None:
|
||||
raise TypeError
|
||||
return doc._findOrBuildNodeNs(c_node, href, prefix, 0)
|
||||
3
.venv/lib/python3.7/site-packages/lxml/pyclasslookup.py
Normal file
3
.venv/lib/python3.7/site-packages/lxml/pyclasslookup.py
Normal file
@@ -0,0 +1,3 @@
|
||||
# dummy module for backwards compatibility
|
||||
|
||||
from lxml.etree import PythonElementClassLookup
|
||||
565
.venv/lib/python3.7/site-packages/lxml/readonlytree.pxi
Normal file
565
.venv/lib/python3.7/site-packages/lxml/readonlytree.pxi
Normal file
@@ -0,0 +1,565 @@
|
||||
# read-only tree implementation
|
||||
|
||||
@cython.internal
|
||||
cdef class _ReadOnlyProxy:
|
||||
u"A read-only proxy class suitable for PIs/Comments (for internal use only!)."
|
||||
cdef bint _free_after_use
|
||||
cdef xmlNode* _c_node
|
||||
cdef _ReadOnlyProxy _source_proxy
|
||||
cdef list _dependent_proxies
|
||||
def __cinit__(self):
|
||||
self._c_node = NULL
|
||||
self._free_after_use = 0
|
||||
|
||||
cdef int _assertNode(self) except -1:
|
||||
u"""This is our way of saying: this proxy is invalid!
|
||||
"""
|
||||
if not self._c_node:
|
||||
raise ReferenceError("Proxy invalidated!")
|
||||
return 0
|
||||
|
||||
cdef int _raise_unsupported_type(self) except -1:
|
||||
raise TypeError(f"Unsupported node type: {self._c_node.type}")
|
||||
|
||||
cdef void free_after_use(self):
|
||||
u"""Should the xmlNode* be freed when releasing the proxy?
|
||||
"""
|
||||
self._free_after_use = 1
|
||||
|
||||
@property
|
||||
def tag(self):
|
||||
"""Element tag
|
||||
"""
|
||||
self._assertNode()
|
||||
if self._c_node.type == tree.XML_ELEMENT_NODE:
|
||||
return _namespacedName(self._c_node)
|
||||
elif self._c_node.type == tree.XML_PI_NODE:
|
||||
return ProcessingInstruction
|
||||
elif self._c_node.type == tree.XML_COMMENT_NODE:
|
||||
return Comment
|
||||
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
|
||||
return Entity
|
||||
else:
|
||||
self._raise_unsupported_type()
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
"""Text before the first subelement. This is either a string or
|
||||
the value None, if there was no text.
|
||||
"""
|
||||
self._assertNode()
|
||||
if self._c_node.type == tree.XML_ELEMENT_NODE:
|
||||
return _collectText(self._c_node.children)
|
||||
elif self._c_node.type in (tree.XML_PI_NODE,
|
||||
tree.XML_COMMENT_NODE):
|
||||
if self._c_node.content is NULL:
|
||||
return ''
|
||||
else:
|
||||
return funicode(self._c_node.content)
|
||||
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
|
||||
return f'&{funicode(self._c_node.name)};'
|
||||
else:
|
||||
self._raise_unsupported_type()
|
||||
|
||||
@property
|
||||
def tail(self):
|
||||
"""Text after this element's end tag, but before the next sibling
|
||||
element's start tag. This is either a string or the value None, if
|
||||
there was no text.
|
||||
"""
|
||||
self._assertNode()
|
||||
return _collectText(self._c_node.next)
|
||||
|
||||
@property
|
||||
def sourceline(self):
|
||||
"""Original line number as found by the parser or None if unknown.
|
||||
"""
|
||||
cdef long line
|
||||
self._assertNode()
|
||||
line = tree.xmlGetLineNo(self._c_node)
|
||||
if line > 0:
|
||||
return line
|
||||
else:
|
||||
return None
|
||||
|
||||
def __repr__(self):
|
||||
self._assertNode()
|
||||
if self._c_node.type == tree.XML_ELEMENT_NODE:
|
||||
return "<Element %s at 0x%x>" % (strrepr(self.tag), id(self))
|
||||
elif self._c_node.type == tree.XML_COMMENT_NODE:
|
||||
return "<!--%s-->" % strrepr(self.text)
|
||||
elif self._c_node.type == tree.XML_ENTITY_NODE:
|
||||
return "&%s;" % strrepr(funicode(self._c_node.name))
|
||||
elif self._c_node.type == tree.XML_PI_NODE:
|
||||
text = self.text
|
||||
if text:
|
||||
return "<?%s %s?>" % (strrepr(self.target), text)
|
||||
else:
|
||||
return "<?%s?>" % strrepr(self.target)
|
||||
else:
|
||||
self._raise_unsupported_type()
|
||||
|
||||
def __getitem__(self, x):
|
||||
u"""Returns the subelement at the given position or the requested
|
||||
slice.
|
||||
"""
|
||||
cdef xmlNode* c_node = NULL
|
||||
cdef Py_ssize_t step = 0, slicelength = 0
|
||||
cdef Py_ssize_t c, i
|
||||
cdef _node_to_node_function next_element
|
||||
cdef list result
|
||||
self._assertNode()
|
||||
if isinstance(x, slice):
|
||||
# slicing
|
||||
if _isFullSlice(<slice>x):
|
||||
return _collectChildren(self)
|
||||
_findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
|
||||
if c_node is NULL:
|
||||
return []
|
||||
if step > 0:
|
||||
next_element = _nextElement
|
||||
else:
|
||||
step = -step
|
||||
next_element = _previousElement
|
||||
result = []
|
||||
c = 0
|
||||
while c_node is not NULL and c < slicelength:
|
||||
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
|
||||
result.append(_elementFactory(self._doc, c_node))
|
||||
c = c + 1
|
||||
for i from 0 <= i < step:
|
||||
c_node = next_element(c_node)
|
||||
return result
|
||||
else:
|
||||
# indexing
|
||||
c_node = _findChild(self._c_node, x)
|
||||
if c_node is NULL:
|
||||
raise IndexError, u"list index out of range"
|
||||
return _newReadOnlyProxy(self._source_proxy, c_node)
|
||||
|
||||
def __len__(self):
|
||||
u"""Returns the number of subelements.
|
||||
"""
|
||||
cdef Py_ssize_t c
|
||||
cdef xmlNode* c_node
|
||||
self._assertNode()
|
||||
c = 0
|
||||
c_node = self._c_node.children
|
||||
while c_node is not NULL:
|
||||
if tree._isElement(c_node):
|
||||
c = c + 1
|
||||
c_node = c_node.next
|
||||
return c
|
||||
|
||||
def __nonzero__(self):
|
||||
cdef xmlNode* c_node
|
||||
self._assertNode()
|
||||
c_node = _findChildBackwards(self._c_node, 0)
|
||||
return c_node != NULL
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
u"__deepcopy__(self, memo)"
|
||||
return self.__copy__()
|
||||
|
||||
cpdef __copy__(self):
|
||||
u"__copy__(self)"
|
||||
cdef xmlDoc* c_doc
|
||||
cdef xmlNode* c_node
|
||||
cdef _Document new_doc
|
||||
if self._c_node is NULL:
|
||||
return self
|
||||
c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive
|
||||
new_doc = _documentFactory(c_doc, None)
|
||||
root = new_doc.getroot()
|
||||
if root is not None:
|
||||
return root
|
||||
# Comment/PI
|
||||
c_node = c_doc.children
|
||||
while c_node is not NULL and c_node.type != self._c_node.type:
|
||||
c_node = c_node.next
|
||||
if c_node is NULL:
|
||||
return None
|
||||
return _elementFactory(new_doc, c_node)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.getchildren())
|
||||
|
||||
def iterchildren(self, tag=None, *, reversed=False):
|
||||
u"""iterchildren(self, tag=None, reversed=False)
|
||||
|
||||
Iterate over the children of this element.
|
||||
"""
|
||||
children = self.getchildren()
|
||||
if tag is not None and tag != '*':
|
||||
children = [ el for el in children if el.tag == tag ]
|
||||
if reversed:
|
||||
children = children[::-1]
|
||||
return iter(children)
|
||||
|
||||
cpdef getchildren(self):
|
||||
u"""Returns all subelements. The elements are returned in document
|
||||
order.
|
||||
"""
|
||||
cdef xmlNode* c_node
|
||||
cdef list result
|
||||
self._assertNode()
|
||||
result = []
|
||||
c_node = self._c_node.children
|
||||
while c_node is not NULL:
|
||||
if tree._isElement(c_node):
|
||||
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
|
||||
c_node = c_node.next
|
||||
return result
|
||||
|
||||
def getparent(self):
|
||||
u"""Returns the parent of this element or None for the root element.
|
||||
"""
|
||||
cdef xmlNode* c_parent
|
||||
self._assertNode()
|
||||
c_parent = self._c_node.parent
|
||||
if c_parent is NULL or not tree._isElement(c_parent):
|
||||
return None
|
||||
else:
|
||||
return _newReadOnlyProxy(self._source_proxy, c_parent)
|
||||
|
||||
def getnext(self):
|
||||
u"""Returns the following sibling of this element or None.
|
||||
"""
|
||||
cdef xmlNode* c_node
|
||||
self._assertNode()
|
||||
c_node = _nextElement(self._c_node)
|
||||
if c_node is not NULL:
|
||||
return _newReadOnlyProxy(self._source_proxy, c_node)
|
||||
return None
|
||||
|
||||
def getprevious(self):
|
||||
u"""Returns the preceding sibling of this element or None.
|
||||
"""
|
||||
cdef xmlNode* c_node
|
||||
self._assertNode()
|
||||
c_node = _previousElement(self._c_node)
|
||||
if c_node is not NULL:
|
||||
return _newReadOnlyProxy(self._source_proxy, c_node)
|
||||
return None
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _ReadOnlyPIProxy(_ReadOnlyProxy):
|
||||
"""A read-only proxy for processing instructions (for internal use only!)"""
|
||||
@property
|
||||
def target(self):
|
||||
self._assertNode()
|
||||
return funicode(self._c_node.name)
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy):
|
||||
"""A read-only proxy for entity references (for internal use only!)"""
|
||||
property name:
|
||||
def __get__(self):
|
||||
return funicode(self._c_node.name)
|
||||
|
||||
def __set__(self, value):
|
||||
value_utf = _utf8(value)
|
||||
if u'&' in value or u';' in value:
|
||||
raise ValueError(f"Invalid entity name '{value}'")
|
||||
tree.xmlNodeSetName(self._c_node, _xcstr(value_utf))
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return f'&{funicode(self._c_node.name)};'
|
||||
|
||||
|
||||
@cython.internal
|
||||
cdef class _ReadOnlyElementProxy(_ReadOnlyProxy):
|
||||
"""The main read-only Element proxy class (for internal use only!)."""
|
||||
|
||||
@property
|
||||
def attrib(self):
|
||||
self._assertNode()
|
||||
return dict(_collectAttributes(self._c_node, 3))
|
||||
|
||||
@property
|
||||
def prefix(self):
|
||||
"""Namespace prefix or None.
|
||||
"""
|
||||
self._assertNode()
|
||||
if self._c_node.ns is not NULL:
|
||||
if self._c_node.ns.prefix is not NULL:
|
||||
return funicode(self._c_node.ns.prefix)
|
||||
return None
|
||||
|
||||
@property
|
||||
def nsmap(self):
|
||||
"""Namespace prefix->URI mapping known in the context of this
|
||||
Element. This includes all namespace declarations of the
|
||||
parents.
|
||||
|
||||
Note that changing the returned dict has no effect on the Element.
|
||||
"""
|
||||
self._assertNode()
|
||||
return _build_nsmap(self._c_node)
|
||||
|
||||
def get(self, key, default=None):
|
||||
u"""Gets an element attribute.
|
||||
"""
|
||||
self._assertNode()
|
||||
return _getNodeAttributeValue(self._c_node, key, default)
|
||||
|
||||
def keys(self):
|
||||
u"""Gets a list of attribute names. The names are returned in an
|
||||
arbitrary order (just like for an ordinary Python dictionary).
|
||||
"""
|
||||
self._assertNode()
|
||||
return _collectAttributes(self._c_node, 1)
|
||||
|
||||
def values(self):
|
||||
u"""Gets element attributes, as a sequence. The attributes are returned
|
||||
in an arbitrary order.
|
||||
"""
|
||||
self._assertNode()
|
||||
return _collectAttributes(self._c_node, 2)
|
||||
|
||||
def items(self):
|
||||
u"""Gets element attributes, as a sequence. The attributes are returned
|
||||
in an arbitrary order.
|
||||
"""
|
||||
self._assertNode()
|
||||
return _collectAttributes(self._c_node, 3)
|
||||
|
||||
cdef _ReadOnlyProxy _newReadOnlyProxy(
|
||||
_ReadOnlyProxy source_proxy, xmlNode* c_node):
|
||||
cdef _ReadOnlyProxy el
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy)
|
||||
elif c_node.type == tree.XML_PI_NODE:
|
||||
el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy)
|
||||
elif c_node.type in (tree.XML_COMMENT_NODE,
|
||||
tree.XML_ENTITY_REF_NODE):
|
||||
el = _ReadOnlyProxy.__new__(_ReadOnlyProxy)
|
||||
else:
|
||||
raise TypeError(f"Unsupported element type: {c_node.type}")
|
||||
el._c_node = c_node
|
||||
_initReadOnlyProxy(el, source_proxy)
|
||||
return el
|
||||
|
||||
cdef inline _initReadOnlyProxy(_ReadOnlyProxy el,
|
||||
_ReadOnlyProxy source_proxy):
|
||||
if source_proxy is None:
|
||||
el._source_proxy = el
|
||||
el._dependent_proxies = [el]
|
||||
else:
|
||||
el._source_proxy = source_proxy
|
||||
source_proxy._dependent_proxies.append(el)
|
||||
|
||||
cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy):
|
||||
cdef xmlNode* c_node
|
||||
cdef _ReadOnlyProxy el
|
||||
if sourceProxy is None:
|
||||
return
|
||||
if sourceProxy._dependent_proxies is None:
|
||||
return
|
||||
for el in sourceProxy._dependent_proxies:
|
||||
c_node = el._c_node
|
||||
el._c_node = NULL
|
||||
if el._free_after_use:
|
||||
tree.xmlFreeNode(c_node)
|
||||
del sourceProxy._dependent_proxies[:]
|
||||
|
||||
# opaque wrapper around non-element nodes, e.g. the document node
|
||||
#
|
||||
# This class does not imply any restrictions on modifiability or
|
||||
# read-only status of the node, so use with caution.
|
||||
|
||||
@cython.internal
|
||||
cdef class _OpaqueNodeWrapper:
|
||||
cdef tree.xmlNode* _c_node
|
||||
def __init__(self):
|
||||
raise TypeError, u"This type cannot be instantiated from Python"
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper):
|
||||
cdef int _assertNode(self) except -1:
|
||||
u"""This is our way of saying: this proxy is invalid!
|
||||
"""
|
||||
assert self._c_node is not NULL, u"Proxy invalidated!"
|
||||
return 0
|
||||
|
||||
cpdef append(self, other_element):
|
||||
u"""Append a copy of an Element to the list of children.
|
||||
"""
|
||||
cdef xmlNode* c_next
|
||||
cdef xmlNode* c_node
|
||||
self._assertNode()
|
||||
c_node = _roNodeOf(other_element)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL:
|
||||
raise ValueError, u"cannot append, document already has a root element"
|
||||
elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE):
|
||||
raise TypeError, f"unsupported element type for top-level node: {c_node.type}"
|
||||
c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node)
|
||||
c_next = c_node.next
|
||||
tree.xmlAddChild(self._c_node, c_node)
|
||||
_moveTail(c_next, c_node)
|
||||
|
||||
def extend(self, elements):
|
||||
u"""Append a copy of all Elements from a sequence to the list of
|
||||
children.
|
||||
"""
|
||||
self._assertNode()
|
||||
for element in elements:
|
||||
self.append(element)
|
||||
|
||||
cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node):
|
||||
cdef _OpaqueNodeWrapper node
|
||||
if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
|
||||
node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper)
|
||||
else:
|
||||
node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper)
|
||||
node._c_node = c_node
|
||||
return node
|
||||
|
||||
# element proxies that allow restricted modification
|
||||
|
||||
@cython.internal
|
||||
cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy):
|
||||
u"""A read-only proxy that allows changing the text content.
|
||||
"""
|
||||
property text:
|
||||
def __get__(self):
|
||||
self._assertNode()
|
||||
if self._c_node.content is NULL:
|
||||
return ''
|
||||
else:
|
||||
return funicode(self._c_node.content)
|
||||
|
||||
def __set__(self, value):
|
||||
cdef tree.xmlDict* c_dict
|
||||
self._assertNode()
|
||||
if value is None:
|
||||
c_text = <const_xmlChar*>NULL
|
||||
else:
|
||||
value = _utf8(value)
|
||||
c_text = _xcstr(value)
|
||||
tree.xmlNodeSetContent(self._c_node, c_text)
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy):
|
||||
"""A read-only proxy that allows changing the text/target content of a
|
||||
processing instruction.
|
||||
"""
|
||||
property target:
|
||||
def __get__(self):
|
||||
self._assertNode()
|
||||
return funicode(self._c_node.name)
|
||||
|
||||
def __set__(self, value):
|
||||
self._assertNode()
|
||||
value = _utf8(value)
|
||||
c_text = _xcstr(value)
|
||||
tree.xmlNodeSetName(self._c_node, c_text)
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy):
|
||||
"A read-only proxy for entity references (for internal use only!)"
|
||||
property name:
|
||||
def __get__(self):
|
||||
return funicode(self._c_node.name)
|
||||
|
||||
def __set__(self, value):
|
||||
value = _utf8(value)
|
||||
assert u'&' not in value and u';' not in value, \
|
||||
f"Invalid entity name '{value}'"
|
||||
c_text = _xcstr(value)
|
||||
tree.xmlNodeSetName(self._c_node, c_text)
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy):
|
||||
u"""A read-only element that allows adding children and changing the
|
||||
text content (i.e. everything that adds to the subtree).
|
||||
"""
|
||||
cpdef append(self, other_element):
|
||||
u"""Append a copy of an Element to the list of children.
|
||||
"""
|
||||
cdef xmlNode* c_next
|
||||
cdef xmlNode* c_node
|
||||
self._assertNode()
|
||||
c_node = _roNodeOf(other_element)
|
||||
c_node = _copyNodeToDoc(c_node, self._c_node.doc)
|
||||
c_next = c_node.next
|
||||
tree.xmlAddChild(self._c_node, c_node)
|
||||
_moveTail(c_next, c_node)
|
||||
|
||||
def extend(self, elements):
|
||||
u"""Append a copy of all Elements from a sequence to the list of
|
||||
children.
|
||||
"""
|
||||
self._assertNode()
|
||||
for element in elements:
|
||||
self.append(element)
|
||||
|
||||
property text:
|
||||
"""Text before the first subelement. This is either a string or the
|
||||
value None, if there was no text.
|
||||
"""
|
||||
def __get__(self):
|
||||
self._assertNode()
|
||||
return _collectText(self._c_node.children)
|
||||
|
||||
def __set__(self, value):
|
||||
self._assertNode()
|
||||
if isinstance(value, QName):
|
||||
value = _resolveQNameText(self, value).decode('utf8')
|
||||
_setNodeText(self._c_node, value)
|
||||
|
||||
|
||||
cdef _ReadOnlyProxy _newAppendOnlyProxy(
|
||||
_ReadOnlyProxy source_proxy, xmlNode* c_node):
|
||||
cdef _ReadOnlyProxy el
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy)
|
||||
elif c_node.type == tree.XML_PI_NODE:
|
||||
el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy)
|
||||
elif c_node.type == tree.XML_COMMENT_NODE:
|
||||
el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy)
|
||||
else:
|
||||
raise TypeError(f"Unsupported element type: {c_node.type}")
|
||||
el._c_node = c_node
|
||||
_initReadOnlyProxy(el, source_proxy)
|
||||
return el
|
||||
|
||||
cdef xmlNode* _roNodeOf(element) except NULL:
|
||||
cdef xmlNode* c_node
|
||||
if isinstance(element, _Element):
|
||||
c_node = (<_Element>element)._c_node
|
||||
elif isinstance(element, _ReadOnlyProxy):
|
||||
c_node = (<_ReadOnlyProxy>element)._c_node
|
||||
elif isinstance(element, _OpaqueNodeWrapper):
|
||||
c_node = (<_OpaqueNodeWrapper>element)._c_node
|
||||
else:
|
||||
raise TypeError, f"invalid argument type {type(element)}"
|
||||
|
||||
if c_node is NULL:
|
||||
raise TypeError, u"invalid element"
|
||||
return c_node
|
||||
|
||||
cdef xmlNode* _nonRoNodeOf(element) except NULL:
|
||||
cdef xmlNode* c_node
|
||||
if isinstance(element, _Element):
|
||||
c_node = (<_Element>element)._c_node
|
||||
elif isinstance(element, _AppendOnlyElementProxy):
|
||||
c_node = (<_AppendOnlyElementProxy>element)._c_node
|
||||
elif isinstance(element, _OpaqueNodeWrapper):
|
||||
c_node = (<_OpaqueNodeWrapper>element)._c_node
|
||||
else:
|
||||
raise TypeError, f"invalid argument type {type(element)}"
|
||||
|
||||
if c_node is NULL:
|
||||
raise TypeError, u"invalid element"
|
||||
return c_node
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user