* @link https://getkirby.com * @copyright Bastian Allgeier GmbH * @license https://opensource.org/licenses/MIT */ class Xml { /** * HTML to XML conversion table for entities * * @var array */ public static $entities = [ ' ' => ' ', '¡' => '¡', '¢' => '¢', '£' => '£', '¤' => '¤', '¥' => '¥', '¦' => '¦', '§' => '§', '¨' => '¨', '©' => '©', 'ª' => 'ª', '«' => '«', '¬' => '¬', '­' => '­', '®' => '®', '¯' => '¯', '°' => '°', '±' => '±', '²' => '²', '³' => '³', '´' => '´', 'µ' => 'µ', '¶' => '¶', '·' => '·', '¸' => '¸', '¹' => '¹', 'º' => 'º', '»' => '»', '¼' => '¼', '½' => '½', '¾' => '¾', '¿' => '¿', 'À' => 'À', 'Á' => 'Á', 'Â' => 'Â', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Å' => 'Å', 'Æ' => 'Æ', 'Ç' => 'Ç', 'È' => 'È', 'É' => 'É', 'Ê' => 'Ê', 'Ë' => 'Ë', 'Ì' => 'Ì', 'Í' => 'Í', 'Î' => 'Î', 'Ï' => 'Ï', 'Ð' => 'Ð', 'Ñ' => 'Ñ', 'Ò' => 'Ò', 'Ó' => 'Ó', 'Ô' => 'Ô', 'Õ' => 'Õ', 'Ö' => 'Ö', '×' => '×', 'Ø' => 'Ø', 'Ù' => 'Ù', 'Ú' => 'Ú', 'Û' => 'Û', 'Ü' => 'Ü', 'Ý' => 'Ý', 'Þ' => 'Þ', 'ß' => 'ß', 'à' => 'à', 'á' => 'á', 'â' => 'â', 'ã' => 'ã', 'ä' => 'ä', 'å' => 'å', 'æ' => 'æ', 'ç' => 'ç', 'è' => 'è', 'é' => 'é', 'ê' => 'ê', 'ë' => 'ë', 'ì' => 'ì', 'í' => 'í', 'î' => 'î', 'ï' => 'ï', 'ð' => 'ð', 'ñ' => 'ñ', 'ò' => 'ò', 'ó' => 'ó', 'ô' => 'ô', 'õ' => 'õ', 'ö' => 'ö', '÷' => '÷', 'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü', 'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ', 'ƒ' => 'ƒ', 'Α' => 'Α', 'Β' => 'Β', 'Γ' => 'Γ', 'Δ' => 'Δ', 'Ε' => 'Ε', 'Ζ' => 'Ζ', 'Η' => 'Η', 'Θ' => 'Θ', 'Ι' => 'Ι', 'Κ' => 'Κ', 'Λ' => 'Λ', 'Μ' => 'Μ', 'Ν' => 'Ν', 'Ξ' => 'Ξ', 'Ο' => 'Ο', 'Π' => 'Π', 'Ρ' => 'Ρ', 'Σ' => 'Σ', 'Τ' => 'Τ', 'Υ' => 'Υ', 'Φ' => 'Φ', 'Χ' => 'Χ', 'Ψ' => 'Ψ', 'Ω' => 'Ω', 'α' => 'α', 'β' => 'β', 'γ' => 'γ', 'δ' => 'δ', 'ε' => 'ε', 'ζ' => 'ζ', 'η' => 'η', 'θ' => 'θ', 'ι' => 'ι', 'κ' => 'κ', 'λ' => 'λ', 'μ' => 'μ', 'ν' => 'ν', 'ξ' => 'ξ', 'ο' => 'ο', 'π' => 'π', 'ρ' => 'ρ', 'ς' => 'ς', 'σ' => 'σ', 'τ' => 'τ', 'υ' => 'υ', 'φ' => 'φ', 'χ' => 'χ', 'ψ' => 'ψ', 'ω' => 'ω', 'ϑ' => 'ϑ', 'ϒ' => 'ϒ', 'ϖ' => 'ϖ', '•' => '•', '…' => '…', '′' => '′', '″' => '″', '‾' => '‾', '⁄' => '⁄', '℘' => '℘', 'ℑ' => 'ℑ', 'ℜ' => 'ℜ', '™' => '™', 'ℵ' => 'ℵ', '←' => '←', '↑' => '↑', '→' => '→', '↓' => '↓', '↔' => '↔', '↵' => '↵', '⇐' => '⇐', '⇑' => '⇑', '⇒' => '⇒', '⇓' => '⇓', '⇔' => '⇔', '∀' => '∀', '∂' => '∂', '∃' => '∃', '∅' => '∅', '∇' => '∇', '∈' => '∈', '∉' => '∉', '∋' => '∋', '∏' => '∏', '∑' => '∑', '−' => '−', '∗' => '∗', '√' => '√', '∝' => '∝', '∞' => '∞', '∠' => '∠', '∧' => '∧', '∨' => '∨', '∩' => '∩', '∪' => '∪', '∫' => '∫', '∴' => '∴', '∼' => '∼', '≅' => '≅', '≈' => '≈', '≠' => '≠', '≡' => '≡', '≤' => '≤', '≥' => '≥', '⊂' => '⊂', '⊃' => '⊃', '⊄' => '⊄', '⊆' => '⊆', '⊇' => '⊇', '⊕' => '⊕', '⊗' => '⊗', '⊥' => '⊥', '⋅' => '⋅', '⌈' => '⌈', '⌉' => '⌉', '⌊' => '⌊', '⌋' => '⌋', '⟨' => '〈', '⟩' => '〉', '◊' => '◊', '♠' => '♠', '♣' => '♣', '♥' => '♥', '♦' => '♦', '"' => '"', '&' => '&', '<' => '<', '>' => '>', 'Œ' => 'Œ', 'œ' => 'œ', 'Š' => 'Š', 'š' => 'š', 'Ÿ' => 'Ÿ', 'ˆ' => 'ˆ', '˜' => '˜', ' ' => ' ', ' ' => ' ', ' ' => ' ', '‌' => '‌', '‍' => '‍', '‎' => '‎', '‏' => '‏', '–' => '–', '—' => '—', '‘' => '‘', '’' => '’', '‚' => '‚', '“' => '“', '”' => '”', '„' => '„', '†' => '†', '‡' => '‡', '‰' => '‰', '‹' => '‹', '›' => '›', '€' => '€' ]; /** * Closing string for void tags * * @var string */ public static $void = ' />'; /** * Generates a single attribute or a list of attributes * * @param string|array $name String: A single attribute with that name will be generated. * Key-value array: A list of attributes will be generated. Don't pass a second argument in that case. * @param mixed $value If used with a `$name` string, pass the value of the attribute here. * If used with a `$name` array, this can be set to `false` to disable attribute sorting. * @return string|null The generated XML attributes string */ public static function attr($name, $value = null): ?string { if (is_array($name) === true) { if ($value !== false) { ksort($name); } $attributes = []; foreach ($name as $key => $val) { $a = static::attr($key, $val); if ($a) { $attributes[] = $a; } } return implode(' ', $attributes); } if ($value === null || $value === '' || $value === []) { return null; } if ($value === ' ') { return strtolower($name) . '=""'; } if (is_bool($value) === true) { return $value === true ? strtolower($name) . '="' . strtolower($name) . '"' : null; } if (is_array($value) === true) { if (isset($value['value'], $value['escape'])) { $value = $value['escape'] === true ? static::encode($value['value']) : $value['value']; } else { $value = implode(' ', array_filter( $value, fn ($value) => !empty($value) || is_numeric($value) )); } } else { $value = static::encode($value); } return strtolower($name) . '="' . $value . '"'; } /** * Creates an XML string from an array * * Supports special array keys `@name` (element name), * `@attributes` (XML attribute key-value array), * `@namespaces` (array with XML namespaces) and * `@value` (element content) * * @param array|string $props The source array or tag content (used internally) * @param string $name The name of the root element * @param bool $head Include the XML declaration head or not * @param string $indent Indentation string, defaults to two spaces * @param int $level The indentation level (used internally) * @return string The XML string */ public static function create($props, string $name = 'root', bool $head = true, string $indent = ' ', int $level = 0): string { if (is_array($props) === true) { if (A::isAssociative($props) === true) { // a tag with attributes or named children // extract metadata from special array keys $name = $props['@name'] ?? $name; $attributes = $props['@attributes'] ?? []; $value = $props['@value'] ?? null; if (isset($props['@namespaces'])) { foreach ($props['@namespaces'] as $key => $namespace) { $key = 'xmlns' . (($key)? ':' . $key : ''); $attributes[$key] = $namespace; } } // continue with just the children unset($props['@name'], $props['@attributes'], $props['@namespaces'], $props['@value']); if (count($props) > 0) { // there are children, use them instead of the value $value = []; foreach ($props as $childName => $childItem) { // render the child, but don't include the indentation of the first line $value[] = trim(static::create($childItem, $childName, false, $indent, $level + 1)); } } $result = static::tag($name, $value, $attributes, $indent, $level); } else { // just children $result = []; foreach ($props as $childItem) { $result[] = static::create($childItem, $name, false, $indent, $level); } $result = implode(PHP_EOL, $result); } } else { // scalar value $result = static::tag($name, $props, null, $indent, $level); } if ($head === true) { return '' . PHP_EOL . $result; } else { return $result; } } /** * Removes all HTML/XML tags and encoded chars from a string * * ``` * echo Xml::decode('some über crazy stuff'); * // output: some über crazy stuff * ``` * * @param string|null $string * @return string */ public static function decode(?string $string): string { if ($string === null) { $string = ''; } $string = strip_tags($string); return html_entity_decode($string, ENT_COMPAT, 'utf-8'); } /** * Converts a string to an XML-safe string * * Converts it to HTML-safe first and then it * will replace HTML entities with XML entities * * ```php * echo Xml::encode('some über crazy stuff'); * // output: some über crazy stuff * ``` * * @param string|null $string * @param bool $html True = Convert to HTML-safe first * @return string */ public static function encode(?string $string, bool $html = true): string { if ($string === null) { return ''; } if ($html === true) { $string = Html::encode($string, false); } $entities = self::entities(); $html = array_keys($entities); $xml = array_values($entities); return str_replace($html, $xml, $string); } /** * Returns the HTML-to-XML entity translation table * * @return array */ public static function entities(): array { return self::$entities; } /** * Parses an XML string and returns an array * * @param string $xml * @return array|null Parsed array or `null` on error */ public static function parse(string $xml): ?array { $xml = @simplexml_load_string($xml, 'SimpleXMLElement', LIBXML_NOENT); if (is_object($xml) !== true) { return null; } return static::simplify($xml); } /** * Breaks a SimpleXMLElement down into a simpler tree * structure of arrays and strings * * @param \SimpleXMLElement $element * @param bool $collectName Whether the element name should be collected (for the root element) * @return array|string */ public static function simplify(SimpleXMLElement $element, bool $collectName = true) { // get all XML namespaces of the whole document to iterate over later; // we don't need the global namespace (empty string) in the list $usedNamespaces = $element->getNamespaces(true); if (isset($usedNamespaces[''])) { unset($usedNamespaces['']); } // now collect element metadata of the parent $array = []; if ($collectName === true) { $array['@name'] = $element->getName(); } // collect attributes with each defined document namespace; // also check for attributes without any namespace $attributeArray = []; foreach (array_merge([0 => null], array_keys($usedNamespaces)) as $namespace) { $prefix = ($namespace)? $namespace . ':' : ''; $attributes = $element->attributes($namespace, true); foreach ($attributes as $key => $value) { $attributeArray[$prefix . $key] = (string)$value; } } if (count($attributeArray) > 0) { $array['@attributes'] = $attributeArray; } // collect namespace definitions of this particular XML element if ($namespaces = $element->getDocNamespaces(false, false)) { $array['@namespaces'] = $namespaces; } // check for children with each defined document namespace; // also check for children without any namespace $hasChildren = false; foreach (array_merge([0 => null], array_keys($usedNamespaces)) as $namespace) { $prefix = ($namespace)? $namespace . ':' : ''; $children = $element->children($namespace, true); if (count($children) > 0) { // there are children, recursively simplify each one $hasChildren = true; // make a grouped collection of elements per element name foreach ($children as $child) { $array[$prefix . $child->getName()][] = static::simplify($child, false); } } } if ($hasChildren === true) { // there were children of any namespace // reduce elements where there is only one item // of the respective type to a simple string; // don't do anything with special `@` metadata keys foreach ($array as $name => $item) { if (substr($name, 0, 1) !== '@' && count($item) === 1) { $array[$name] = $item[0]; } } return $array; } else { // we didn't find any XML children above, only use the string value $element = (string)$element; if (count($array) > 0) { $array['@value'] = $element; return $array; } else { return $element; } } } /** * Builds an XML tag * * @param string $name Tag name * @param array|string|null $content Scalar value or array with multiple lines of content or `null` to * generate a self-closing tag; pass an empty string to generate empty content * @param array $attr An associative array with additional attributes for the tag * @param string|null $indent Indentation string, defaults to two spaces or `null` for output on one line * @param int $level Indentation level * @return string The generated XML */ public static function tag(string $name, $content = '', array $attr = null, ?string $indent = null, int $level = 0): string { $attr = static::attr($attr); $start = '<' . $name . ($attr ? ' ' . $attr : '') . '>'; $startShort = '<' . $name . ($attr ? ' ' . $attr : '') . static::$void; $end = ''; $baseIndent = $indent ? str_repeat($indent, $level) : ''; if (is_array($content) === true) { if (is_string($indent) === true) { $xml = $baseIndent . $start . PHP_EOL; foreach ($content as $line) { $xml .= $baseIndent . $indent . $line . PHP_EOL; } $xml .= $baseIndent . $end; } else { $xml = $start . implode($content) . $end; } } elseif ($content === null) { $xml = $baseIndent . $startShort; } else { $xml = $baseIndent . $start . static::value($content) . $end; } return $xml; } /** * Properly encodes tag contents * * @param mixed $value * @return string|null */ public static function value($value): ?string { if ($value === true) { return 'true'; } if ($value === false) { return 'false'; } if (is_numeric($value) === true) { return (string)$value; } if ($value === null || $value === '') { return null; } if (Str::startsWith($value, '', ']]]]>', $value) . ']]>'; } }