Upgrade to 3.6.0

This commit is contained in:
Bastian Allgeier
2021-11-16 14:53:37 +01:00
parent 7388fa4d24
commit 92b7a330fa
318 changed files with 20017 additions and 6878 deletions

View File

@@ -5,28 +5,103 @@ namespace Kirby\Parsley\Schema;
use Kirby\Parsley\Element;
use Kirby\Toolkit\Str;
/**
* The plain schema definition converts
* the entire document into simple text blocks
*
* @since 3.5.0
*
* @package Kirby Parsley
* @author Bastian Allgeier <bastian@getkirby.com>,
* @link https://getkirby.com
* @copyright Bastian Allgeier GmbH
* @license https://getkirby.com/license
*/
class Blocks extends Plain
{
public function fallback(string $html)
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function blockquote(Element $node): array
{
$html = trim($html);
$citation = null;
$text = [];
if (Str::length($html) === 0) {
return false;
// get all the text for the quote
foreach ($node->children() as $child) {
if (is_a($child, 'DOMText') === true) {
$text[] = trim($child->textContent);
}
if (is_a($child, 'DOMElement') === true && $child->tagName !== 'footer') {
$text[] = (new Element($child))->innerHTML($this->marks());
}
}
// filter empty blocks and separate text blocks with breaks
$text = implode('', array_filter($text));
// get the citation from the footer
if ($footer = $node->find('footer')) {
$citation = $footer->innerHTML($this->marks());
}
return [
'content' => [
'text' => '<p>' . $html . '</p>',
'citation' => $citation,
'text' => $text
],
'type' => 'quote',
];
}
/**
* Creates the fallback block type
* if no other block can be found
*
* @param \Kirby\Parsley\Element|string $element
* @return array|null
*/
public function fallback($element): ?array
{
if (is_a($element, Element::class) === true) {
$html = $element->innerHtml();
// wrap the inner HTML in a p tag if it doesn't
// contain one yet.
if (Str::contains($html, '<p>') === false) {
$html = '<p>' . $html . '</p>';
}
} elseif (is_string($element) === true) {
$html = trim($element);
if (Str::length($html) === 0) {
return null;
}
$html = '<p>' . $html . '</p>';
} else {
return null;
}
return [
'content' => [
'text' => $html,
],
'type' => 'text',
];
}
public function heading($node, $level)
/**
* Converts a heading element to a heading block
*
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function heading(Element $node): array
{
$content = [
'level' => $level,
'level' => strtolower($node->tagName()),
'text' => $node->innerHTML()
];
@@ -42,7 +117,91 @@ class Blocks extends Plain
];
}
public function list($node)
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function iframe(Element $node): array
{
$caption = null;
$src = $node->attr('src');
if ($figcaption = $node->find('ancestor::figure[1]//figcaption')) {
$caption = $figcaption->innerHTML($this->marks());
// avoid parsing the caption twice
$figcaption->remove();
}
// reverse engineer video URLs
if (preg_match('!player.vimeo.com\/video\/([0-9]+)!i', $src, $array) === 1) {
$src = 'https://vimeo.com/' . $array[1];
} elseif (preg_match('!youtube.com\/embed\/([a-zA-Z0-9_-]+)!', $src, $array) === 1) {
$src = 'https://youtube.com/watch?v=' . $array[1];
} elseif (preg_match('!youtube-nocookie.com\/embed\/([a-zA-Z0-9_-]+)!', $src, $array) === 1) {
$src = 'https://youtube.com/watch?v=' . $array[1];
} else {
$src = false;
}
// correct video URL
if ($src) {
return [
'content' => [
'caption' => $caption,
'url' => $src
],
'type' => 'video',
];
}
return [
'content' => [
'text' => $node->outerHTML()
],
'type' => 'markdown',
];
}
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function img(Element $node): array
{
$caption = null;
$link = null;
if ($figcaption = $node->find('ancestor::figure[1]//figcaption')) {
$caption = $figcaption->innerHTML($this->marks());
// avoid parsing the caption twice
$figcaption->remove();
}
if ($a = $node->find('ancestor::a')) {
$link = $a->attr('href');
}
return [
'content' => [
'alt' => $node->attr('alt'),
'caption' => $caption,
'link' => $link,
'location' => 'web',
'src' => $node->attr('src'),
],
'type' => 'image',
];
}
/**
* Converts a list element to HTML
*
* @param \Kirby\Parsley\Element $node
* @return string
*/
public function list(Element $node): string
{
$html = [];
@@ -69,12 +228,21 @@ class Blocks extends Plain
return '<' . $node->tagName() . '>' . implode($html) . '</' . $node->tagName() . '>';
}
/**
* Returns a list of allowed inline marks
* and their parsing rules
*
* @return array
*/
public function marks(): array
{
return [
[
'tag' => 'a',
'attrs' => ['href', 'target', 'title'],
'attrs' => ['href', 'rel', 'target', 'title'],
'defaults' => [
'rel' => 'noopener noreferrer'
]
],
[
'tag' => 'abbr',
@@ -82,6 +250,9 @@ class Blocks extends Plain
[
'tag' => 'b'
],
[
'tag' => 'br',
],
[
'tag' => 'code'
],
@@ -94,6 +265,9 @@ class Blocks extends Plain
[
'tag' => 'i',
],
[
'tag' => 'p',
],
[
'tag' => 'strike',
],
@@ -112,153 +286,81 @@ class Blocks extends Plain
];
}
/**
* Returns a list of allowed nodes and
* their parsing rules
*
* @codeCoverageIgnore
* @return array
*/
public function nodes(): array
{
return [
[
'tag' => 'blockquote',
'parse' => function ($node) {
$citation = null;
$text = [];
// get all the text for the quote
foreach ($node->element()->childNodes as $child) {
if (is_a($child, 'DOMText') === true) {
$text[] = trim($child->textContent);
}
if (is_a($child, 'DOMElement') === true && $child->tagName !== 'footer') {
$text[] = (new Element($child))->innerHTML($this->marks());
}
}
// filter empty blocks and separate text blocks with breaks
$text = implode('<br></br>', array_filter($text));
// get the citation from the footer
if ($footer = $node->find('footer')) {
$citation = $footer->innerHTML($this->marks());
}
return [
'content' => [
'citation' => $citation,
'text' => $text
],
'type' => 'quote',
];
'parse' => function (Element $node) {
return $this->blockquote($node);
}
],
[
'tag' => 'h1',
'parse' => function ($node) {
return $this->heading($node, 'h1');
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h2',
'parse' => function ($node) {
return $this->heading($node, 'h2');
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h3',
'parse' => function ($node) {
return $this->heading($node, 'h3');
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h4',
'parse' => function ($node) {
return $this->heading($node, 'h4');
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h5',
'parse' => function ($node) {
return $this->heading($node, 'h5');
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'h6',
'parse' => function ($node) {
return $this->heading($node, 'h6');
'parse' => function (Element $node) {
return $this->heading($node);
}
],
[
'tag' => 'hr',
'parse' => function (Element $node) {
return [
'type' => 'line'
];
}
],
[
'tag' => 'iframe',
'parse' => function ($node) {
$caption = null;
$src = $node->attr('src');
if ($figcaption = $node->find('ancestor::figure[1]//figcaption')) {
$caption = $figcaption->innerHTML($this->marks());
// avoid parsing the caption twice
$figcaption->remove();
}
// reverse engineer video URLs
if (preg_match('!player.vimeo.com\/video\/([0-9]+)!i', $src, $array) === 1) {
$src = 'https://vimeo.com/' . $array[1];
} elseif (preg_match('!youtube.com\/embed\/([a-zA-Z0-9_-]+)!', $src, $array) === 1) {
$src = 'https://youtube.com/watch?v=' . $array[1];
} elseif (preg_match('!youtube-nocookie.com\/embed\/([a-zA-Z0-9_-]+)!', $src, $array) === 1) {
$src = 'https://youtube.com/watch?v=' . $array[1];
} else {
$src = false;
}
// correct video URL
if ($src) {
return [
'content' => [
'caption' => $caption,
'url' => $src
],
'type' => 'video',
];
}
return [
'content' => [
'text' => $node->outerHTML()
],
'type' => 'markdown',
];
'parse' => function (Element $node) {
return $this->iframe($node);
}
],
[
'tag' => 'img',
'parse' => function ($node) {
$caption = null;
$link = null;
if ($figcaption = $node->find('ancestor::figure[1]//figcaption')) {
$caption = $figcaption->innerHTML($this->marks());
// avoid parsing the caption twice
$figcaption->remove();
}
if ($a = $node->find('ancestor::a')) {
$link = $a->attr('href');
}
return [
'content' => [
'alt' => $node->attr('alt'),
'caption' => $caption,
'link' => $link,
'location' => 'web',
'src' => $node->attr('src'),
],
'type' => 'image',
];
'parse' => function (Element $node) {
return $this->img($node);
}
],
[
'tag' => 'ol',
'parse' => function ($node) {
'parse' => function (Element $node) {
return [
'content' => [
'text' => $this->list($node)
@@ -269,41 +371,19 @@ class Blocks extends Plain
],
[
'tag' => 'pre',
'parse' => function ($node) {
$language = 'text';
if ($code = $node->find('//code')) {
foreach ($code->classList() as $className) {
if (preg_match('!language-(.*?)!', $className)) {
$language = str_replace('language-', '', $className);
break;
}
}
}
return [
'content' => [
'code' => $node->innerText(),
'language' => $language
],
'type' => 'code',
];
'parse' => function (Element $node) {
return $this->pre($node);
}
],
[
'tag' => 'table',
'parse' => function ($node) {
return [
'content' => [
'text' => $node->outerHTML(),
],
'type' => 'markdown',
];
'parse' => function (Element $node) {
return $this->table($node);
}
],
[
'tag' => 'ul',
'parse' => function ($node) {
'parse' => function (Element $node) {
return [
'content' => [
'text' => $this->list($node)
@@ -314,4 +394,44 @@ class Blocks extends Plain
],
];
}
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function pre(Element $node): array
{
$language = 'text';
if ($code = $node->find('//code')) {
foreach ($code->classList() as $className) {
if (preg_match('!language-(.*?)!', $className)) {
$language = str_replace('language-', '', $className);
break;
}
}
}
return [
'content' => [
'code' => $node->innerText(),
'language' => $language
],
'type' => 'code',
];
}
/**
* @param \Kirby\Parsley\Element $node
* @return array
*/
public function table(Element $node): array
{
return [
'content' => [
'text' => $node->outerHTML(),
],
'type' => 'markdown',
];
}
}

View File

@@ -2,39 +2,68 @@
namespace Kirby\Parsley\Schema;
use Kirby\Parsley\Element;
use Kirby\Parsley\Schema;
use Kirby\Toolkit\Str;
/**
* The plain schema definition converts
* the entire document into simple text blocks
*
* @since 3.5.0
*
* @package Kirby Parsley
* @author Bastian Allgeier <bastian@getkirby.com>,
* @link https://getkirby.com
* @copyright Bastian Allgeier GmbH
* @license https://getkirby.com/license
*/
class Plain extends Schema
{
public function fallback(string $html)
/**
* Creates the fallback block type
* if no other block can be found
*
* @param \Kirby\Parsley\Element|string $element
* @return array|null
*/
public function fallback($element): ?array
{
$text = trim($html);
if (is_a($element, Element::class) === true) {
$text = $element->innerText();
} elseif (is_string($element) === true) {
$text = trim($element);
if (Str::length($text) === 0) {
return false;
if (Str::length($text) === 0) {
return null;
}
} else {
return null;
}
return [
'type' => 'text',
'content' => [
'text' => $text
]
],
'type' => 'text',
];
}
public function marks(): array
{
return [];
}
public function nodes(): array
{
return [];
}
/**
* Returns a list of all elements that
* should be skipped during parsing
*
* @return array
*/
public function skip(): array
{
return ['meta', 'script', 'style'];
return [
'base',
'link',
'meta',
'script',
'style',
'title'
];
}
}