Files
lichterei-web/kirby/src/Toolkit/Xml.php
Bastian Allgeier 70b8439c49 Upgrade to 3.6.1
2021-12-07 12:39:43 +01:00

436 lines
19 KiB
PHP
Executable File

<?php
namespace Kirby\Toolkit;
use SimpleXMLElement;
/**
* XML parser and creator class
*
* @package Kirby Toolkit
* @author Bastian Allgeier <bastian@getkirby.com>
* @link https://getkirby.com
* @copyright Bastian Allgeier GmbH
* @license https://opensource.org/licenses/MIT
*/
class Xml
{
/**
* HTML to XML conversion table for entities
*
* @var array
*/
public static $entities = [
'&nbsp;' => '&#160;', '&iexcl;' => '&#161;', '&cent;' => '&#162;', '&pound;' => '&#163;', '&curren;' => '&#164;', '&yen;' => '&#165;', '&brvbar;' => '&#166;', '&sect;' => '&#167;',
'&uml;' => '&#168;', '&copy;' => '&#169;', '&ordf;' => '&#170;', '&laquo;' => '&#171;', '&not;' => '&#172;', '&shy;' => '&#173;', '&reg;' => '&#174;', '&macr;' => '&#175;',
'&deg;' => '&#176;', '&plusmn;' => '&#177;', '&sup2;' => '&#178;', '&sup3;' => '&#179;', '&acute;' => '&#180;', '&micro;' => '&#181;', '&para;' => '&#182;', '&middot;' => '&#183;',
'&cedil;' => '&#184;', '&sup1;' => '&#185;', '&ordm;' => '&#186;', '&raquo;' => '&#187;', '&frac14;' => '&#188;', '&frac12;' => '&#189;', '&frac34;' => '&#190;', '&iquest;' => '&#191;',
'&Agrave;' => '&#192;', '&Aacute;' => '&#193;', '&Acirc;' => '&#194;', '&Atilde;' => '&#195;', '&Auml;' => '&#196;', '&Aring;' => '&#197;', '&AElig;' => '&#198;', '&Ccedil;' => '&#199;',
'&Egrave;' => '&#200;', '&Eacute;' => '&#201;', '&Ecirc;' => '&#202;', '&Euml;' => '&#203;', '&Igrave;' => '&#204;', '&Iacute;' => '&#205;', '&Icirc;' => '&#206;', '&Iuml;' => '&#207;',
'&ETH;' => '&#208;', '&Ntilde;' => '&#209;', '&Ograve;' => '&#210;', '&Oacute;' => '&#211;', '&Ocirc;' => '&#212;', '&Otilde;' => '&#213;', '&Ouml;' => '&#214;', '&times;' => '&#215;',
'&Oslash;' => '&#216;', '&Ugrave;' => '&#217;', '&Uacute;' => '&#218;', '&Ucirc;' => '&#219;', '&Uuml;' => '&#220;', '&Yacute;' => '&#221;', '&THORN;' => '&#222;', '&szlig;' => '&#223;',
'&agrave;' => '&#224;', '&aacute;' => '&#225;', '&acirc;' => '&#226;', '&atilde;' => '&#227;', '&auml;' => '&#228;', '&aring;' => '&#229;', '&aelig;' => '&#230;', '&ccedil;' => '&#231;',
'&egrave;' => '&#232;', '&eacute;' => '&#233;', '&ecirc;' => '&#234;', '&euml;' => '&#235;', '&igrave;' => '&#236;', '&iacute;' => '&#237;', '&icirc;' => '&#238;', '&iuml;' => '&#239;',
'&eth;' => '&#240;', '&ntilde;' => '&#241;', '&ograve;' => '&#242;', '&oacute;' => '&#243;', '&ocirc;' => '&#244;', '&otilde;' => '&#245;', '&ouml;' => '&#246;', '&divide;' => '&#247;',
'&oslash;' => '&#248;', '&ugrave;' => '&#249;', '&uacute;' => '&#250;', '&ucirc;' => '&#251;', '&uuml;' => '&#252;', '&yacute;' => '&#253;', '&thorn;' => '&#254;', '&yuml;' => '&#255;',
'&fnof;' => '&#402;', '&Alpha;' => '&#913;', '&Beta;' => '&#914;', '&Gamma;' => '&#915;', '&Delta;' => '&#916;', '&Epsilon;' => '&#917;', '&Zeta;' => '&#918;', '&Eta;' => '&#919;',
'&Theta;' => '&#920;', '&Iota;' => '&#921;', '&Kappa;' => '&#922;', '&Lambda;' => '&#923;', '&Mu;' => '&#924;', '&Nu;' => '&#925;', '&Xi;' => '&#926;', '&Omicron;' => '&#927;',
'&Pi;' => '&#928;', '&Rho;' => '&#929;', '&Sigma;' => '&#931;', '&Tau;' => '&#932;', '&Upsilon;' => '&#933;', '&Phi;' => '&#934;', '&Chi;' => '&#935;', '&Psi;' => '&#936;',
'&Omega;' => '&#937;', '&alpha;' => '&#945;', '&beta;' => '&#946;', '&gamma;' => '&#947;', '&delta;' => '&#948;', '&epsilon;' => '&#949;', '&zeta;' => '&#950;', '&eta;' => '&#951;',
'&theta;' => '&#952;', '&iota;' => '&#953;', '&kappa;' => '&#954;', '&lambda;' => '&#955;', '&mu;' => '&#956;', '&nu;' => '&#957;', '&xi;' => '&#958;', '&omicron;' => '&#959;',
'&pi;' => '&#960;', '&rho;' => '&#961;', '&sigmaf;' => '&#962;', '&sigma;' => '&#963;', '&tau;' => '&#964;', '&upsilon;' => '&#965;', '&phi;' => '&#966;', '&chi;' => '&#967;',
'&psi;' => '&#968;', '&omega;' => '&#969;', '&thetasym;' => '&#977;', '&upsih;' => '&#978;', '&piv;' => '&#982;', '&bull;' => '&#8226;', '&hellip;' => '&#8230;', '&prime;' => '&#8242;',
'&Prime;' => '&#8243;', '&oline;' => '&#8254;', '&frasl;' => '&#8260;', '&weierp;' => '&#8472;', '&image;' => '&#8465;', '&real;' => '&#8476;', '&trade;' => '&#8482;', '&alefsym;' => '&#8501;',
'&larr;' => '&#8592;', '&uarr;' => '&#8593;', '&rarr;' => '&#8594;', '&darr;' => '&#8595;', '&harr;' => '&#8596;', '&crarr;' => '&#8629;', '&lArr;' => '&#8656;', '&uArr;' => '&#8657;',
'&rArr;' => '&#8658;', '&dArr;' => '&#8659;', '&hArr;' => '&#8660;', '&forall;' => '&#8704;', '&part;' => '&#8706;', '&exist;' => '&#8707;', '&empty;' => '&#8709;', '&nabla;' => '&#8711;',
'&isin;' => '&#8712;', '&notin;' => '&#8713;', '&ni;' => '&#8715;', '&prod;' => '&#8719;', '&sum;' => '&#8721;', '&minus;' => '&#8722;', '&lowast;' => '&#8727;', '&radic;' => '&#8730;',
'&prop;' => '&#8733;', '&infin;' => '&#8734;', '&ang;' => '&#8736;', '&and;' => '&#8743;', '&or;' => '&#8744;', '&cap;' => '&#8745;', '&cup;' => '&#8746;', '&int;' => '&#8747;',
'&there4;' => '&#8756;', '&sim;' => '&#8764;', '&cong;' => '&#8773;', '&asymp;' => '&#8776;', '&ne;' => '&#8800;', '&equiv;' => '&#8801;', '&le;' => '&#8804;', '&ge;' => '&#8805;',
'&sub;' => '&#8834;', '&sup;' => '&#8835;', '&nsub;' => '&#8836;', '&sube;' => '&#8838;', '&supe;' => '&#8839;', '&oplus;' => '&#8853;', '&otimes;' => '&#8855;', '&perp;' => '&#8869;',
'&sdot;' => '&#8901;', '&lceil;' => '&#8968;', '&rceil;' => '&#8969;', '&lfloor;' => '&#8970;', '&rfloor;' => '&#8971;', '&lang;' => '&#9001;', '&rang;' => '&#9002;', '&loz;' => '&#9674;',
'&spades;' => '&#9824;', '&clubs;' => '&#9827;', '&hearts;' => '&#9829;', '&diams;' => '&#9830;', '&quot;' => '&#34;', '&amp;' => '&#38;', '&lt;' => '&#60;', '&gt;' => '&#62;', '&OElig;' => '&#338;',
'&oelig;' => '&#339;', '&Scaron;' => '&#352;', '&scaron;' => '&#353;', '&Yuml;' => '&#376;', '&circ;' => '&#710;', '&tilde;' => '&#732;', '&ensp;' => '&#8194;', '&emsp;' => '&#8195;',
'&thinsp;' => '&#8201;', '&zwnj;' => '&#8204;', '&zwj;' => '&#8205;', '&lrm;' => '&#8206;', '&rlm;' => '&#8207;', '&ndash;' => '&#8211;', '&mdash;' => '&#8212;', '&lsquo;' => '&#8216;',
'&rsquo;' => '&#8217;', '&sbquo;' => '&#8218;', '&ldquo;' => '&#8220;', '&rdquo;' => '&#8221;', '&bdquo;' => '&#8222;', '&dagger;' => '&#8224;', '&Dagger;' => '&#8225;', '&permil;' => '&#8240;',
'&lsaquo;' => '&#8249;', '&rsaquo;' => '&#8250;', '&euro;' => '&#8364;'
];
/**
* Closing string for void tags
*
* @var string
*/
public static $void = ' />';
/**
* Generates a single attribute or a list of attributes
*
* @param string|array $name String: A single attribute with that name will be generated.
* Key-value array: A list of attributes will be generated. Don't pass a second argument in that case.
* @param mixed $value If used with a `$name` string, pass the value of the attribute here.
* If used with a `$name` array, this can be set to `false` to disable attribute sorting.
* @return string|null The generated XML attributes string
*/
public static function attr($name, $value = null): ?string
{
if (is_array($name) === true) {
if ($value !== false) {
ksort($name);
}
$attributes = [];
foreach ($name as $key => $val) {
$a = static::attr($key, $val);
if ($a) {
$attributes[] = $a;
}
}
return implode(' ', $attributes);
}
if ($value === null || $value === '' || $value === []) {
return null;
}
if ($value === ' ') {
return strtolower($name) . '=""';
}
if (is_bool($value) === true) {
return $value === true ? strtolower($name) . '="' . strtolower($name) . '"' : null;
}
if (is_array($value) === true) {
if (isset($value['value'], $value['escape'])) {
$value = $value['escape'] === true ? static::encode($value['value']) : $value['value'];
} else {
$value = implode(' ', array_filter(
$value,
fn ($value) => !empty($value) || is_numeric($value)
));
}
} else {
$value = static::encode($value);
}
return strtolower($name) . '="' . $value . '"';
}
/**
* Creates an XML string from an array
*
* Supports special array keys `@name` (element name),
* `@attributes` (XML attribute key-value array),
* `@namespaces` (array with XML namespaces) and
* `@value` (element content)
*
* @param array|string $props The source array or tag content (used internally)
* @param string $name The name of the root element
* @param bool $head Include the XML declaration head or not
* @param string $indent Indentation string, defaults to two spaces
* @param int $level The indentation level (used internally)
* @return string The XML string
*/
public static function create($props, string $name = 'root', bool $head = true, string $indent = ' ', int $level = 0): string
{
if (is_array($props) === true) {
if (A::isAssociative($props) === true) {
// a tag with attributes or named children
// extract metadata from special array keys
$name = $props['@name'] ?? $name;
$attributes = $props['@attributes'] ?? [];
$value = $props['@value'] ?? null;
if (isset($props['@namespaces'])) {
foreach ($props['@namespaces'] as $key => $namespace) {
$key = 'xmlns' . (($key)? ':' . $key : '');
$attributes[$key] = $namespace;
}
}
// continue with just the children
unset($props['@name'], $props['@attributes'], $props['@namespaces'], $props['@value']);
if (count($props) > 0) {
// there are children, use them instead of the value
$value = [];
foreach ($props as $childName => $childItem) {
// render the child, but don't include the indentation of the first line
$value[] = trim(static::create($childItem, $childName, false, $indent, $level + 1));
}
}
$result = static::tag($name, $value, $attributes, $indent, $level);
} else {
// just children
$result = [];
foreach ($props as $childItem) {
$result[] = static::create($childItem, $name, false, $indent, $level);
}
$result = implode(PHP_EOL, $result);
}
} else {
// scalar value
$result = static::tag($name, $props, null, $indent, $level);
}
if ($head === true) {
return '<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL . $result;
} else {
return $result;
}
}
/**
* Removes all HTML/XML tags and encoded chars from a string
*
* ```
* echo Xml::decode('some &uuml;ber <em>crazy</em> stuff');
* // output: some über crazy stuff
* ```
*
* @param string|null $string
* @return string
*/
public static function decode(?string $string): string
{
if ($string === null) {
$string = '';
}
$string = strip_tags($string);
return html_entity_decode($string, ENT_COMPAT, 'utf-8');
}
/**
* Converts a string to an XML-safe string
*
* Converts it to HTML-safe first and then it
* will replace HTML entities with XML entities
*
* ```php
* echo Xml::encode('some über crazy stuff');
* // output: some &#252;ber crazy stuff
* ```
*
* @param string|null $string
* @param bool $html True = Convert to HTML-safe first
* @return string
*/
public static function encode(?string $string, bool $html = true): string
{
if ($string === null) {
return '';
}
if ($html === true) {
$string = Html::encode($string, false);
}
$entities = self::entities();
$html = array_keys($entities);
$xml = array_values($entities);
return str_replace($html, $xml, $string);
}
/**
* Returns the HTML-to-XML entity translation table
*
* @return array
*/
public static function entities(): array
{
return self::$entities;
}
/**
* Parses an XML string and returns an array
*
* @param string $xml
* @return array|null Parsed array or `null` on error
*/
public static function parse(string $xml): ?array
{
$xml = @simplexml_load_string($xml, 'SimpleXMLElement', LIBXML_NOENT);
if (is_object($xml) !== true) {
return null;
}
return static::simplify($xml);
}
/**
* Breaks a SimpleXMLElement down into a simpler tree
* structure of arrays and strings
*
* @param \SimpleXMLElement $element
* @param bool $collectName Whether the element name should be collected (for the root element)
* @return array|string
*/
public static function simplify(SimpleXMLElement $element, bool $collectName = true)
{
// get all XML namespaces of the whole document to iterate over later;
// we don't need the global namespace (empty string) in the list
$usedNamespaces = $element->getNamespaces(true);
if (isset($usedNamespaces[''])) {
unset($usedNamespaces['']);
}
// now collect element metadata of the parent
$array = [];
if ($collectName === true) {
$array['@name'] = $element->getName();
}
// collect attributes with each defined document namespace;
// also check for attributes without any namespace
$attributeArray = [];
foreach (array_merge([0 => null], array_keys($usedNamespaces)) as $namespace) {
$prefix = ($namespace)? $namespace . ':' : '';
$attributes = $element->attributes($namespace, true);
foreach ($attributes as $key => $value) {
$attributeArray[$prefix . $key] = (string)$value;
}
}
if (count($attributeArray) > 0) {
$array['@attributes'] = $attributeArray;
}
// collect namespace definitions of this particular XML element
if ($namespaces = $element->getDocNamespaces(false, false)) {
$array['@namespaces'] = $namespaces;
}
// check for children with each defined document namespace;
// also check for children without any namespace
$hasChildren = false;
foreach (array_merge([0 => null], array_keys($usedNamespaces)) as $namespace) {
$prefix = ($namespace)? $namespace . ':' : '';
$children = $element->children($namespace, true);
if (count($children) > 0) {
// there are children, recursively simplify each one
$hasChildren = true;
// make a grouped collection of elements per element name
foreach ($children as $child) {
$array[$prefix . $child->getName()][] = static::simplify($child, false);
}
}
}
if ($hasChildren === true) {
// there were children of any namespace
// reduce elements where there is only one item
// of the respective type to a simple string;
// don't do anything with special `@` metadata keys
foreach ($array as $name => $item) {
if (substr($name, 0, 1) !== '@' && count($item) === 1) {
$array[$name] = $item[0];
}
}
return $array;
} else {
// we didn't find any XML children above, only use the string value
$element = (string)$element;
if (count($array) > 0) {
$array['@value'] = $element;
return $array;
} else {
return $element;
}
}
}
/**
* Builds an XML tag
*
* @param string $name Tag name
* @param array|string|null $content Scalar value or array with multiple lines of content or `null` to
* generate a self-closing tag; pass an empty string to generate empty content
* @param array $attr An associative array with additional attributes for the tag
* @param string|null $indent Indentation string, defaults to two spaces or `null` for output on one line
* @param int $level Indentation level
* @return string The generated XML
*/
public static function tag(string $name, $content = '', array $attr = null, ?string $indent = null, int $level = 0): string
{
$attr = static::attr($attr);
$start = '<' . $name . ($attr ? ' ' . $attr : '') . '>';
$startShort = '<' . $name . ($attr ? ' ' . $attr : '') . static::$void;
$end = '</' . $name . '>';
$baseIndent = $indent ? str_repeat($indent, $level) : '';
if (is_array($content) === true) {
if (is_string($indent) === true) {
$xml = $baseIndent . $start . PHP_EOL;
foreach ($content as $line) {
$xml .= $baseIndent . $indent . $line . PHP_EOL;
}
$xml .= $baseIndent . $end;
} else {
$xml = $start . implode($content) . $end;
}
} elseif ($content === null) {
$xml = $baseIndent . $startShort;
} else {
$xml = $baseIndent . $start . static::value($content) . $end;
}
return $xml;
}
/**
* Properly encodes tag contents
*
* @param mixed $value
* @return string|null
*/
public static function value($value): ?string
{
if ($value === true) {
return 'true';
}
if ($value === false) {
return 'false';
}
if (is_numeric($value) === true) {
return (string)$value;
}
if ($value === null || $value === '') {
return null;
}
if (Str::startsWith($value, '<![CDATA[') === true) {
return $value;
}
// TODO: in 3.7.0 use ENT_NOQUOTES | ENT_XML1 instead
$encoded = htmlentities($value, ENT_COMPAT);
if ($encoded === $value) {
// no CDATA block needed
return $value;
}
// wrap everything in a CDATA block
// and ensure that it is not closed in the input string
return '<![CDATA[' . str_replace(']]>', ']]]]><![CDATA[>', $value) . ']]>';
}
}