449 lines
17 KiB
PHP
449 lines
17 KiB
PHP
<?php
|
|
|
|
namespace Kirby\Toolkit;
|
|
|
|
use Kirby\Cms\Helpers;
|
|
use SimpleXMLElement;
|
|
|
|
/**
|
|
* XML parser and creator class
|
|
*
|
|
* @package Kirby Toolkit
|
|
* @author Bastian Allgeier <bastian@getkirby.com>
|
|
* @link https://getkirby.com
|
|
* @copyright Bastian Allgeier
|
|
* @license https://opensource.org/licenses/MIT
|
|
*/
|
|
class Xml
|
|
{
|
|
/**
|
|
* HTML to XML conversion table for entities
|
|
*/
|
|
public static array|null $entities = [
|
|
' ' => ' ', '¡' => '¡', '¢' => '¢', '£' => '£', '¤' => '¤', '¥' => '¥', '¦' => '¦', '§' => '§',
|
|
'¨' => '¨', '©' => '©', 'ª' => 'ª', '«' => '«', '¬' => '¬', '­' => '­', '®' => '®', '¯' => '¯',
|
|
'°' => '°', '±' => '±', '²' => '²', '³' => '³', '´' => '´', 'µ' => 'µ', '¶' => '¶', '·' => '·',
|
|
'¸' => '¸', '¹' => '¹', 'º' => 'º', '»' => '»', '¼' => '¼', '½' => '½', '¾' => '¾', '¿' => '¿',
|
|
'À' => 'À', 'Á' => 'Á', 'Â' => 'Â', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Å' => 'Å', 'Æ' => 'Æ', 'Ç' => 'Ç',
|
|
'È' => 'È', 'É' => 'É', 'Ê' => 'Ê', 'Ë' => 'Ë', 'Ì' => 'Ì', 'Í' => 'Í', 'Î' => 'Î', 'Ï' => 'Ï',
|
|
'Ð' => 'Ð', 'Ñ' => 'Ñ', 'Ò' => 'Ò', 'Ó' => 'Ó', 'Ô' => 'Ô', 'Õ' => 'Õ', 'Ö' => 'Ö', '×' => '×',
|
|
'Ø' => 'Ø', 'Ù' => 'Ù', 'Ú' => 'Ú', 'Û' => 'Û', 'Ü' => 'Ü', 'Ý' => 'Ý', 'Þ' => 'Þ', 'ß' => 'ß',
|
|
'à' => 'à', 'á' => 'á', 'â' => 'â', 'ã' => 'ã', 'ä' => 'ä', 'å' => 'å', 'æ' => 'æ', 'ç' => 'ç',
|
|
'è' => 'è', 'é' => 'é', 'ê' => 'ê', 'ë' => 'ë', 'ì' => 'ì', 'í' => 'í', 'î' => 'î', 'ï' => 'ï',
|
|
'ð' => 'ð', 'ñ' => 'ñ', 'ò' => 'ò', 'ó' => 'ó', 'ô' => 'ô', 'õ' => 'õ', 'ö' => 'ö', '÷' => '÷',
|
|
'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü', 'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ',
|
|
'ƒ' => 'ƒ', 'Α' => 'Α', 'Β' => 'Β', 'Γ' => 'Γ', 'Δ' => 'Δ', 'Ε' => 'Ε', 'Ζ' => 'Ζ', 'Η' => 'Η',
|
|
'Θ' => 'Θ', 'Ι' => 'Ι', 'Κ' => 'Κ', 'Λ' => 'Λ', 'Μ' => 'Μ', 'Ν' => 'Ν', 'Ξ' => 'Ξ', 'Ο' => 'Ο',
|
|
'Π' => 'Π', 'Ρ' => 'Ρ', 'Σ' => 'Σ', 'Τ' => 'Τ', 'Υ' => 'Υ', 'Φ' => 'Φ', 'Χ' => 'Χ', 'Ψ' => 'Ψ',
|
|
'Ω' => 'Ω', 'α' => 'α', 'β' => 'β', 'γ' => 'γ', 'δ' => 'δ', 'ε' => 'ε', 'ζ' => 'ζ', 'η' => 'η',
|
|
'θ' => 'θ', 'ι' => 'ι', 'κ' => 'κ', 'λ' => 'λ', 'μ' => 'μ', 'ν' => 'ν', 'ξ' => 'ξ', 'ο' => 'ο',
|
|
'π' => 'π', 'ρ' => 'ρ', 'ς' => 'ς', 'σ' => 'σ', 'τ' => 'τ', 'υ' => 'υ', 'φ' => 'φ', 'χ' => 'χ',
|
|
'ψ' => 'ψ', 'ω' => 'ω', 'ϑ' => 'ϑ', 'ϒ' => 'ϒ', 'ϖ' => 'ϖ', '•' => '•', '…' => '…', '′' => '′',
|
|
'″' => '″', '‾' => '‾', '⁄' => '⁄', '℘' => '℘', 'ℑ' => 'ℑ', 'ℜ' => 'ℜ', '™' => '™', 'ℵ' => 'ℵ',
|
|
'←' => '←', '↑' => '↑', '→' => '→', '↓' => '↓', '↔' => '↔', '↵' => '↵', '⇐' => '⇐', '⇑' => '⇑',
|
|
'⇒' => '⇒', '⇓' => '⇓', '⇔' => '⇔', '∀' => '∀', '∂' => '∂', '∃' => '∃', '∅' => '∅', '∇' => '∇',
|
|
'∈' => '∈', '∉' => '∉', '∋' => '∋', '∏' => '∏', '∑' => '∑', '−' => '−', '∗' => '∗', '√' => '√',
|
|
'∝' => '∝', '∞' => '∞', '∠' => '∠', '∧' => '∧', '∨' => '∨', '∩' => '∩', '∪' => '∪', '∫' => '∫',
|
|
'∴' => '∴', '∼' => '∼', '≅' => '≅', '≈' => '≈', '≠' => '≠', '≡' => '≡', '≤' => '≤', '≥' => '≥',
|
|
'⊂' => '⊂', '⊃' => '⊃', '⊄' => '⊄', '⊆' => '⊆', '⊇' => '⊇', '⊕' => '⊕', '⊗' => '⊗', '⊥' => '⊥',
|
|
'⋅' => '⋅', '⌈' => '⌈', '⌉' => '⌉', '⌊' => '⌊', '⌋' => '⌋', '⟨' => '〈', '⟩' => '〉', '◊' => '◊',
|
|
'♠' => '♠', '♣' => '♣', '♥' => '♥', '♦' => '♦', '"' => '"', '&' => '&', '<' => '<', '>' => '>', 'Œ' => 'Œ',
|
|
'œ' => 'œ', 'Š' => 'Š', 'š' => 'š', 'Ÿ' => 'Ÿ', 'ˆ' => 'ˆ', '˜' => '˜', ' ' => ' ', ' ' => ' ',
|
|
' ' => ' ', '‌' => '‌', '‍' => '‍', '‎' => '‎', '‏' => '‏', '–' => '–', '—' => '—', '‘' => '‘',
|
|
'’' => '’', '‚' => '‚', '“' => '“', '”' => '”', '„' => '„', '†' => '†', '‡' => '‡', '‰' => '‰',
|
|
'‹' => '‹', '›' => '›', '€' => '€'
|
|
];
|
|
|
|
/**
|
|
* Closing string for void tags
|
|
*
|
|
* @var string
|
|
*/
|
|
public static $void = ' />';
|
|
|
|
/**
|
|
* Generates a single attribute or a list of attributes
|
|
*
|
|
* @param string|array $name String: A single attribute with that name will be generated.
|
|
* Key-value array: A list of attributes will be generated. Don't pass a second argument in that case.
|
|
* @param mixed $value If used with a `$name` string, pass the value of the attribute here.
|
|
* If used with a `$name` array, this can be set to `false` to disable attribute sorting.
|
|
* @return string|null The generated XML attributes string
|
|
*/
|
|
public static function attr(
|
|
string|array $name,
|
|
$value = null
|
|
): string|null {
|
|
if (is_array($name) === true) {
|
|
if ($value !== false) {
|
|
ksort($name);
|
|
}
|
|
|
|
$attributes = [];
|
|
foreach ($name as $key => $val) {
|
|
if (is_int($key) === true) {
|
|
$key = $val;
|
|
$val = true;
|
|
}
|
|
|
|
if ($attribute = static::attr($key, $val)) {
|
|
$attributes[] = $attribute;
|
|
}
|
|
}
|
|
|
|
return implode(' ', $attributes);
|
|
}
|
|
|
|
// TODO: In 3.10, treat $value === '' to render as name=""
|
|
if ($value === null || $value === '' || $value === []) {
|
|
// TODO: Remove in 3.10
|
|
// @codeCoverageIgnoreStart
|
|
if ($value === '') {
|
|
Helpers::deprecated('Passing an empty string as value to `Xml::attr()` has been deprecated. In a future version, passing an empty string won\'t omit the attribute anymore but render it with an empty value. To omit the attribute, please pass `null`.', 'xml-attr-empty-string');
|
|
}
|
|
// @codeCoverageIgnoreEnd
|
|
|
|
return null;
|
|
}
|
|
|
|
// TODO: In 3.10, add deprecation message for space = empty attribute
|
|
// TODO: In 3.11, render space as space
|
|
if ($value === ' ') {
|
|
return $name . '=""';
|
|
}
|
|
|
|
if ($value === true) {
|
|
return $name . '="' . $name . '"';
|
|
}
|
|
|
|
if ($value === false) {
|
|
return null;
|
|
}
|
|
|
|
if (is_array($value) === true) {
|
|
if (isset($value['value'], $value['escape'])) {
|
|
$value = $value['escape'] === true ? static::encode($value['value']) : $value['value'];
|
|
} else {
|
|
$value = implode(' ', array_filter(
|
|
$value,
|
|
fn ($value) => !empty($value) || is_numeric($value)
|
|
));
|
|
}
|
|
} else {
|
|
$value = static::encode($value);
|
|
}
|
|
|
|
return $name . '="' . $value . '"';
|
|
}
|
|
|
|
/**
|
|
* Creates an XML string from an array
|
|
*
|
|
* Supports special array keys `@name` (element name),
|
|
* `@attributes` (XML attribute key-value array),
|
|
* `@namespaces` (array with XML namespaces) and
|
|
* `@value` (element content)
|
|
*
|
|
* @param array|string $props The source array or tag content (used internally)
|
|
* @param string $name The name of the root element
|
|
* @param bool $head Include the XML declaration head or not
|
|
* @param string $indent Indentation string, defaults to two spaces
|
|
* @param int $level The indentation level (used internally)
|
|
* @return string The XML string
|
|
*/
|
|
public static function create(
|
|
array|string $props,
|
|
string $name = 'root',
|
|
bool $head = true,
|
|
string $indent = ' ',
|
|
int $level = 0
|
|
): string {
|
|
if (is_array($props) === true) {
|
|
if (A::isAssociative($props) === true) {
|
|
// a tag with attributes or named children
|
|
|
|
// extract metadata from special array keys
|
|
$name = $props['@name'] ?? $name;
|
|
$attributes = $props['@attributes'] ?? [];
|
|
$value = $props['@value'] ?? null;
|
|
if (isset($props['@namespaces'])) {
|
|
foreach ($props['@namespaces'] as $key => $namespace) {
|
|
$key = 'xmlns' . (($key) ? ':' . $key : '');
|
|
$attributes[$key] = $namespace;
|
|
}
|
|
}
|
|
|
|
// continue with just the children
|
|
unset($props['@name'], $props['@attributes'], $props['@namespaces'], $props['@value']);
|
|
|
|
if (count($props) > 0) {
|
|
// there are children, use them instead of the value
|
|
|
|
$value = [];
|
|
foreach ($props as $childName => $childItem) {
|
|
// render the child, but don't include the indentation of the first line
|
|
$value[] = trim(static::create($childItem, $childName, false, $indent, $level + 1));
|
|
}
|
|
}
|
|
|
|
$result = static::tag($name, $value, $attributes, $indent, $level);
|
|
} else {
|
|
// just children
|
|
|
|
$result = [];
|
|
foreach ($props as $childItem) {
|
|
$result[] = static::create($childItem, $name, false, $indent, $level);
|
|
}
|
|
|
|
$result = implode(PHP_EOL, $result);
|
|
}
|
|
} else {
|
|
// scalar value
|
|
|
|
$result = static::tag($name, $props, [], $indent, $level);
|
|
}
|
|
|
|
if ($head === true) {
|
|
return '<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL . $result;
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Removes all HTML/XML tags and encoded chars from a string
|
|
*
|
|
* ```
|
|
* echo Xml::decode('some über <em>crazy</em> stuff');
|
|
* // output: some über crazy stuff
|
|
* ```
|
|
*/
|
|
public static function decode(string|null $string): string
|
|
{
|
|
$string = strip_tags($string ?? '');
|
|
return html_entity_decode($string, ENT_COMPAT, 'utf-8');
|
|
}
|
|
|
|
/**
|
|
* Converts a string to an XML-safe string
|
|
*
|
|
* Converts it to HTML-safe first and then it
|
|
* will replace HTML entities with XML entities
|
|
*
|
|
* ```php
|
|
* echo Xml::encode('some über crazy stuff');
|
|
* // output: some über crazy stuff
|
|
* ```
|
|
*
|
|
* @param bool $html True = Convert to HTML-safe first
|
|
*/
|
|
public static function encode(
|
|
string|null $string,
|
|
bool $html = true
|
|
): string {
|
|
if ($string === null) {
|
|
return '';
|
|
}
|
|
|
|
if ($html === true) {
|
|
$string = Html::encode($string, false);
|
|
}
|
|
|
|
$entities = self::entities();
|
|
$html = array_keys($entities);
|
|
$xml = array_values($entities);
|
|
|
|
return str_replace($html, $xml, $string);
|
|
}
|
|
|
|
/**
|
|
* Returns the HTML-to-XML entity translation table
|
|
*/
|
|
public static function entities(): array
|
|
{
|
|
return self::$entities;
|
|
}
|
|
|
|
/**
|
|
* Parses an XML string and returns an array
|
|
*
|
|
* @return array|null Parsed array or `null` on error
|
|
*/
|
|
public static function parse(string $xml): array|null
|
|
{
|
|
$xml = @simplexml_load_string($xml);
|
|
|
|
if (is_object($xml) !== true) {
|
|
return null;
|
|
}
|
|
|
|
return static::simplify($xml);
|
|
}
|
|
|
|
/**
|
|
* Breaks a SimpleXMLElement down into a simpler tree
|
|
* structure of arrays and strings
|
|
*
|
|
* @param bool $collectName Whether the element name should be collected (for the root element)
|
|
*/
|
|
public static function simplify(
|
|
SimpleXMLElement $element,
|
|
bool $collectName = true
|
|
): array|string {
|
|
// get all XML namespaces of the whole document to iterate over later;
|
|
// we don't need the global namespace (empty string) in the list
|
|
$usedNamespaces = $element->getNamespaces(true);
|
|
if (isset($usedNamespaces[''])) {
|
|
unset($usedNamespaces['']);
|
|
}
|
|
|
|
// now collect element metadata of the parent
|
|
$array = [];
|
|
if ($collectName === true) {
|
|
$array['@name'] = $element->getName();
|
|
}
|
|
|
|
// collect attributes with each defined document namespace;
|
|
// also check for attributes without any namespace
|
|
$attributeArray = [];
|
|
foreach (array_merge([0 => null], array_keys($usedNamespaces)) as $namespace) {
|
|
$prefix = ($namespace) ? $namespace . ':' : '';
|
|
$attributes = $element->attributes($namespace, true);
|
|
|
|
foreach ($attributes as $key => $value) {
|
|
$attributeArray[$prefix . $key] = (string)$value;
|
|
}
|
|
}
|
|
if (count($attributeArray) > 0) {
|
|
$array['@attributes'] = $attributeArray;
|
|
}
|
|
|
|
// collect namespace definitions of this particular XML element
|
|
if ($namespaces = $element->getDocNamespaces(false, false)) {
|
|
$array['@namespaces'] = $namespaces;
|
|
}
|
|
|
|
// check for children with each defined document namespace;
|
|
// also check for children without any namespace
|
|
$hasChildren = false;
|
|
foreach (array_merge([0 => null], array_keys($usedNamespaces)) as $namespace) {
|
|
$prefix = ($namespace) ? $namespace . ':' : '';
|
|
$children = $element->children($namespace, true);
|
|
|
|
if (count($children) > 0) {
|
|
// there are children, recursively simplify each one
|
|
$hasChildren = true;
|
|
|
|
// make a grouped collection of elements per element name
|
|
foreach ($children as $child) {
|
|
$array[$prefix . $child->getName()][] = static::simplify($child, false);
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($hasChildren === true) {
|
|
// there were children of any namespace
|
|
|
|
// reduce elements where there is only one item
|
|
// of the respective type to a simple string;
|
|
// don't do anything with special `@` metadata keys
|
|
foreach ($array as $name => $item) {
|
|
if (substr($name, 0, 1) !== '@' && count($item) === 1) {
|
|
$array[$name] = $item[0];
|
|
}
|
|
}
|
|
|
|
return $array;
|
|
}
|
|
|
|
// we didn't find any XML children above, only use the string value
|
|
$element = (string)$element;
|
|
|
|
if (count($array) === 0) {
|
|
return $element;
|
|
}
|
|
|
|
$array['@value'] = $element;
|
|
return $array;
|
|
}
|
|
|
|
/**
|
|
* Builds an XML tag
|
|
*
|
|
* @param string $name Tag name
|
|
* @param array|string|null $content Scalar value or array with multiple lines of content or `null` to
|
|
* generate a self-closing tag; pass an empty string to generate empty content
|
|
* @param array $attr An associative array with additional attributes for the tag
|
|
* @param string|null $indent Indentation string, defaults to two spaces or `null` for output on one line
|
|
* @param int $level Indentation level
|
|
* @return string The generated XML
|
|
*/
|
|
public static function tag(
|
|
string $name,
|
|
array|string|null $content = '',
|
|
array $attr = [],
|
|
string $indent = null,
|
|
int $level = 0
|
|
): string {
|
|
$attr = static::attr($attr);
|
|
$start = '<' . $name . ($attr ? ' ' . $attr : '') . '>';
|
|
$startShort = '<' . $name . ($attr ? ' ' . $attr : '') . static::$void;
|
|
$end = '</' . $name . '>';
|
|
$baseIndent = $indent ? str_repeat($indent, $level) : '';
|
|
|
|
if (is_array($content) === true) {
|
|
if (is_string($indent) === true) {
|
|
$xml = $baseIndent . $start . PHP_EOL;
|
|
foreach ($content as $line) {
|
|
$xml .= $baseIndent . $indent . $line . PHP_EOL;
|
|
}
|
|
$xml .= $baseIndent . $end;
|
|
} else {
|
|
$xml = $start . implode($content) . $end;
|
|
}
|
|
} elseif ($content === null) {
|
|
$xml = $baseIndent . $startShort;
|
|
} else {
|
|
$xml = $baseIndent . $start . static::value($content) . $end;
|
|
}
|
|
|
|
return $xml;
|
|
}
|
|
|
|
/**
|
|
* Properly encodes tag contents
|
|
*/
|
|
public static function value($value): string|null
|
|
{
|
|
if ($value === true) {
|
|
return 'true';
|
|
}
|
|
|
|
if ($value === false) {
|
|
return 'false';
|
|
}
|
|
|
|
if (is_numeric($value) === true) {
|
|
return (string)$value;
|
|
}
|
|
|
|
if ($value === null || $value === '') {
|
|
return null;
|
|
}
|
|
|
|
if (Str::startsWith($value, '<![CDATA[') === true) {
|
|
return $value;
|
|
}
|
|
|
|
$encoded = htmlentities($value, ENT_NOQUOTES | ENT_XML1);
|
|
if ($encoded === $value) {
|
|
// no CDATA block needed
|
|
return $value;
|
|
}
|
|
|
|
// wrap everything in a CDATA block
|
|
// and ensure that it is not closed in the input string
|
|
return '<![CDATA[' . str_replace(']]>', ']]]]><![CDATA[>', $value) . ']]>';
|
|
}
|
|
}
|