From 6a147796fd6be32e29f90d9db24f2d9debe88d90 Mon Sep 17 00:00:00 2001 From: Alexey Skobkin Date: Fri, 18 Aug 2023 15:58:58 +0300 Subject: [PATCH] Markdown parser draft based on league/commonmark. --- composer.json | 1 + composer.lock | 414 +++++++++++++++++- .../Extension/PointMarkdownExtension.php | 85 ++++ .../Parser/Block/BlockQuoteStartParser.php | 29 ++ .../Parser/Inline/ImageLinkParser.php | 11 + src/Markdown/Parser/Inline/NewLineParser.php | 41 ++ src/Markdown/PointMarkdownConverter.php | 21 + 7 files changed, 601 insertions(+), 1 deletion(-) create mode 100644 src/Markdown/Extension/PointMarkdownExtension.php create mode 100644 src/Markdown/Parser/Block/BlockQuoteStartParser.php create mode 100644 src/Markdown/Parser/Inline/ImageLinkParser.php create mode 100644 src/Markdown/Parser/Inline/NewLineParser.php create mode 100644 src/Markdown/PointMarkdownConverter.php diff --git a/composer.json b/composer.json index fa93d48..4d3e882 100644 --- a/composer.json +++ b/composer.json @@ -15,6 +15,7 @@ "doctrine/orm": "^2.14", "jms/serializer-bundle": "^5.2", "knplabs/knp-paginator-bundle": "^6.2", + "league/commonmark": "^2.4", "phpdocumentor/reflection-docblock": "^5.3", "phpstan/phpdoc-parser": "^1.16", "sensio/framework-extra-bundle": "^6.1", diff --git a/composer.lock b/composer.lock index 3c23b8a..57ecaf4 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,83 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "fa19d360fa749ba51772278ee535bbb3", + "content-hash": "fa9dc98a86c71cbd1b41e19b3203a4f7", "packages": [ + { + "name": "dflydev/dot-access-data", + "version": "v3.0.2", + "source": { + "type": "git", + "url": "https://github.com/dflydev/dflydev-dot-access-data.git", + "reference": "f41715465d65213d644d3141a6a93081be5d3549" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/dflydev/dflydev-dot-access-data/zipball/f41715465d65213d644d3141a6a93081be5d3549", + "reference": "f41715465d65213d644d3141a6a93081be5d3549", + "shasum": "" + }, + "require": { + "php": "^7.1 || ^8.0" + }, + "require-dev": { + "phpstan/phpstan": "^0.12.42", + "phpunit/phpunit": "^7.5 || ^8.5 || ^9.3", + "scrutinizer/ocular": "1.6.0", + "squizlabs/php_codesniffer": "^3.5", + "vimeo/psalm": "^4.0.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "3.x-dev" + } + }, + "autoload": { + "psr-4": { + "Dflydev\\DotAccessData\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Dragonfly Development Inc.", + "email": "info@dflydev.com", + "homepage": "http://dflydev.com" + }, + { + "name": "Beau Simensen", + "email": "beau@dflydev.com", + "homepage": "http://beausimensen.com" + }, + { + "name": "Carlos Frutos", + "email": "carlos@kiwing.it", + "homepage": "https://github.com/cfrutos" + }, + { + "name": "Colin O'Dell", + "email": "colinodell@gmail.com", + "homepage": "https://www.colinodell.com" + } + ], + "description": "Given a deep data structure, access data by dot notation.", + "homepage": "https://github.com/dflydev/dflydev-dot-access-data", + "keywords": [ + "access", + "data", + "dot", + "notation" + ], + "support": { + "issues": "https://github.com/dflydev/dflydev-dot-access-data/issues", + "source": "https://github.com/dflydev/dflydev-dot-access-data/tree/v3.0.2" + }, + "time": "2022-10-27T11:44:00+00:00" + }, { "name": "doctrine/annotations", "version": "2.0.1", @@ -1962,6 +2037,194 @@ }, "time": "2023-03-25T06:51:40+00:00" }, + { + "name": "league/commonmark", + "version": "2.4.0", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/commonmark.git", + "reference": "d44a24690f16b8c1808bf13b1bd54ae4c63ea048" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/commonmark/zipball/d44a24690f16b8c1808bf13b1bd54ae4c63ea048", + "reference": "d44a24690f16b8c1808bf13b1bd54ae4c63ea048", + "shasum": "" + }, + "require": { + "ext-mbstring": "*", + "league/config": "^1.1.1", + "php": "^7.4 || ^8.0", + "psr/event-dispatcher": "^1.0", + "symfony/deprecation-contracts": "^2.1 || ^3.0", + "symfony/polyfill-php80": "^1.16" + }, + "require-dev": { + "cebe/markdown": "^1.0", + "commonmark/cmark": "0.30.0", + "commonmark/commonmark.js": "0.30.0", + "composer/package-versions-deprecated": "^1.8", + "embed/embed": "^4.4", + "erusev/parsedown": "^1.0", + "ext-json": "*", + "github/gfm": "0.29.0", + "michelf/php-markdown": "^1.4 || ^2.0", + "nyholm/psr7": "^1.5", + "phpstan/phpstan": "^1.8.2", + "phpunit/phpunit": "^9.5.21", + "scrutinizer/ocular": "^1.8.1", + "symfony/finder": "^5.3 | ^6.0", + "symfony/yaml": "^2.3 | ^3.0 | ^4.0 | ^5.0 | ^6.0", + "unleashedtech/php-coding-standard": "^3.1.1", + "vimeo/psalm": "^4.24.0 || ^5.0.0" + }, + "suggest": { + "symfony/yaml": "v2.3+ required if using the Front Matter extension" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "2.5-dev" + } + }, + "autoload": { + "psr-4": { + "League\\CommonMark\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Colin O'Dell", + "email": "colinodell@gmail.com", + "homepage": "https://www.colinodell.com", + "role": "Lead Developer" + } + ], + "description": "Highly-extensible PHP Markdown parser which fully supports the CommonMark spec and GitHub-Flavored Markdown (GFM)", + "homepage": "https://commonmark.thephpleague.com", + "keywords": [ + "commonmark", + "flavored", + "gfm", + "github", + "github-flavored", + "markdown", + "md", + "parser" + ], + "support": { + "docs": "https://commonmark.thephpleague.com/", + "forum": "https://github.com/thephpleague/commonmark/discussions", + "issues": "https://github.com/thephpleague/commonmark/issues", + "rss": "https://github.com/thephpleague/commonmark/releases.atom", + "source": "https://github.com/thephpleague/commonmark" + }, + "funding": [ + { + "url": "https://www.colinodell.com/sponsor", + "type": "custom" + }, + { + "url": "https://www.paypal.me/colinpodell/10.00", + "type": "custom" + }, + { + "url": "https://github.com/colinodell", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/league/commonmark", + "type": "tidelift" + } + ], + "time": "2023-03-24T15:16:10+00:00" + }, + { + "name": "league/config", + "version": "v1.2.0", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/config.git", + "reference": "754b3604fb2984c71f4af4a9cbe7b57f346ec1f3" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/config/zipball/754b3604fb2984c71f4af4a9cbe7b57f346ec1f3", + "reference": "754b3604fb2984c71f4af4a9cbe7b57f346ec1f3", + "shasum": "" + }, + "require": { + "dflydev/dot-access-data": "^3.0.1", + "nette/schema": "^1.2", + "php": "^7.4 || ^8.0" + }, + "require-dev": { + "phpstan/phpstan": "^1.8.2", + "phpunit/phpunit": "^9.5.5", + "scrutinizer/ocular": "^1.8.1", + "unleashedtech/php-coding-standard": "^3.1", + "vimeo/psalm": "^4.7.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-main": "1.2-dev" + } + }, + "autoload": { + "psr-4": { + "League\\Config\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Colin O'Dell", + "email": "colinodell@gmail.com", + "homepage": "https://www.colinodell.com", + "role": "Lead Developer" + } + ], + "description": "Define configuration arrays with strict schemas and access values with dot notation", + "homepage": "https://config.thephpleague.com", + "keywords": [ + "array", + "config", + "configuration", + "dot", + "dot-access", + "nested", + "schema" + ], + "support": { + "docs": "https://config.thephpleague.com/", + "issues": "https://github.com/thephpleague/config/issues", + "rss": "https://github.com/thephpleague/config/releases.atom", + "source": "https://github.com/thephpleague/config" + }, + "funding": [ + { + "url": "https://www.colinodell.com/sponsor", + "type": "custom" + }, + { + "url": "https://www.paypal.me/colinpodell/10.00", + "type": "custom" + }, + { + "url": "https://github.com/colinodell", + "type": "github" + } + ], + "time": "2022-12-11T20:36:23+00:00" + }, { "name": "monolog/monolog", "version": "3.3.1", @@ -2063,6 +2326,155 @@ ], "time": "2023-02-06T13:46:10+00:00" }, + { + "name": "nette/schema", + "version": "v1.2.3", + "source": { + "type": "git", + "url": "https://github.com/nette/schema.git", + "reference": "abbdbb70e0245d5f3bf77874cea1dfb0c930d06f" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/nette/schema/zipball/abbdbb70e0245d5f3bf77874cea1dfb0c930d06f", + "reference": "abbdbb70e0245d5f3bf77874cea1dfb0c930d06f", + "shasum": "" + }, + "require": { + "nette/utils": "^2.5.7 || ^3.1.5 || ^4.0", + "php": ">=7.1 <8.3" + }, + "require-dev": { + "nette/tester": "^2.3 || ^2.4", + "phpstan/phpstan-nette": "^1.0", + "tracy/tracy": "^2.7" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.2-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause", + "GPL-2.0-only", + "GPL-3.0-only" + ], + "authors": [ + { + "name": "David Grudl", + "homepage": "https://davidgrudl.com" + }, + { + "name": "Nette Community", + "homepage": "https://nette.org/contributors" + } + ], + "description": "📐 Nette Schema: validating data structures against a given Schema.", + "homepage": "https://nette.org", + "keywords": [ + "config", + "nette" + ], + "support": { + "issues": "https://github.com/nette/schema/issues", + "source": "https://github.com/nette/schema/tree/v1.2.3" + }, + "time": "2022-10-13T01:24:26+00:00" + }, + { + "name": "nette/utils", + "version": "v4.0.0", + "source": { + "type": "git", + "url": "https://github.com/nette/utils.git", + "reference": "cacdbf5a91a657ede665c541eda28941d4b09c1e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/nette/utils/zipball/cacdbf5a91a657ede665c541eda28941d4b09c1e", + "reference": "cacdbf5a91a657ede665c541eda28941d4b09c1e", + "shasum": "" + }, + "require": { + "php": ">=8.0 <8.3" + }, + "conflict": { + "nette/finder": "<3", + "nette/schema": "<1.2.2" + }, + "require-dev": { + "jetbrains/phpstorm-attributes": "dev-master", + "nette/tester": "^2.4", + "phpstan/phpstan": "^1.0", + "tracy/tracy": "^2.9" + }, + "suggest": { + "ext-gd": "to use Image", + "ext-iconv": "to use Strings::webalize(), toAscii(), chr() and reverse()", + "ext-intl": "to use Strings::webalize(), toAscii(), normalize() and compare()", + "ext-json": "to use Nette\\Utils\\Json", + "ext-mbstring": "to use Strings::lower() etc...", + "ext-tokenizer": "to use Nette\\Utils\\Reflection::getUseStatements()", + "ext-xml": "to use Strings::length() etc. when mbstring is not available" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.0-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause", + "GPL-2.0-only", + "GPL-3.0-only" + ], + "authors": [ + { + "name": "David Grudl", + "homepage": "https://davidgrudl.com" + }, + { + "name": "Nette Community", + "homepage": "https://nette.org/contributors" + } + ], + "description": "🛠 Nette Utils: lightweight utilities for string & array manipulation, image handling, safe JSON encoding/decoding, validation, slug or strong password generating etc.", + "homepage": "https://nette.org", + "keywords": [ + "array", + "core", + "datetime", + "images", + "json", + "nette", + "paginator", + "password", + "slugify", + "string", + "unicode", + "utf-8", + "utility", + "validation" + ], + "support": { + "issues": "https://github.com/nette/utils/issues", + "source": "https://github.com/nette/utils/tree/v4.0.0" + }, + "time": "2023-02-02T10:41:53+00:00" + }, { "name": "phpdocumentor/reflection-common", "version": "2.2.0", diff --git a/src/Markdown/Extension/PointMarkdownExtension.php b/src/Markdown/Extension/PointMarkdownExtension.php new file mode 100644 index 0000000..f9f0bac --- /dev/null +++ b/src/Markdown/Extension/PointMarkdownExtension.php @@ -0,0 +1,85 @@ +addSchema('commonmark', Expect::structure([ + 'use_asterisk' => Expect::bool(true), + 'use_underscore' => Expect::bool(true), + 'enable_strong' => Expect::bool(true), + 'enable_em' => Expect::bool(true), + 'unordered_list_markers' => Expect::listOf('string') + ->min(1) + ->default(['*', '+', '-']) + ->mergeDefaults(false), + ])); + } + + public function register(EnvironmentBuilderInterface $environment): void + { + $environment + ->addBlockStartParser(new PointParser\Block\BlockQuoteStartParser(), 70) + ->addBlockStartParser(new LeagueParser\Block\HeadingStartParser(), 60) + ->addBlockStartParser(new LeagueParser\Block\FencedCodeStartParser(), 50) + ->addBlockStartParser(new LeagueParser\Block\HtmlBlockStartParser(), 40) + ->addBlockStartParser(new LeagueParser\Block\ThematicBreakStartParser(), 20) + ->addBlockStartParser(new LeagueParser\Block\ListBlockStartParser(), 10) + ->addBlockStartParser(new LeagueParser\Block\IndentedCodeStartParser(), -100) + + ->addInlineParser(new PointParser\Inline\NewLineParser(), 200) + ->addInlineParser(new LeagueParser\Inline\BacktickParser(), 150) + ->addInlineParser(new LeagueParser\Inline\EscapableParser(), 80) + ->addInlineParser(new LeagueParser\Inline\EntityParser(), 70) + ->addInlineParser(new LeagueParser\Inline\AutolinkParser(), 50) + ->addInlineParser(new PointParser\Inline\ImageLinkParser(), 60) + ->addInlineParser(new LeagueParser\Inline\HtmlInlineParser(), 40) + ->addInlineParser(new LeagueParser\Inline\CloseBracketParser(), 30) + ->addInlineParser(new LeagueParser\Inline\OpenBracketParser(), 20) + ->addInlineParser(new LeagueParser\Inline\BangParser(), 10) + + ->addRenderer(LeagueNode\Block\BlockQuote::class, new LeagueRenderer\Block\BlockQuoteRenderer(), 0) + ->addRenderer(LeagueCoreNode\Block\Document::class, new LeagueCoreRenderer\Block\DocumentRenderer(), 0) + ->addRenderer(LeagueNode\Block\FencedCode::class, new LeagueRenderer\Block\FencedCodeRenderer(), 0) + ->addRenderer(LeagueNode\Block\Heading::class, new LeagueRenderer\Block\HeadingRenderer(), 0) + ->addRenderer(LeagueNode\Block\HtmlBlock::class, new LeagueRenderer\Block\HtmlBlockRenderer(), 0) + ->addRenderer(LeagueNode\Block\IndentedCode::class, new LeagueRenderer\Block\IndentedCodeRenderer(), 0) + ->addRenderer(LeagueNode\Block\ListBlock::class, new LeagueRenderer\Block\ListBlockRenderer(), 0) + ->addRenderer(LeagueNode\Block\ListItem::class, new LeagueRenderer\Block\ListItemRenderer(), 0) + ->addRenderer(LeagueCoreNode\Block\Paragraph::class, new LeagueCoreRenderer\Block\ParagraphRenderer(), 0) + ->addRenderer(LeagueNode\Block\ThematicBreak::class, new LeagueRenderer\Block\ThematicBreakRenderer(), 0) + + ->addRenderer(LeagueNode\Inline\Code::class, new LeagueRenderer\Inline\CodeRenderer(), 0) + ->addRenderer(LeagueNode\Inline\Emphasis::class, new LeagueRenderer\Inline\EmphasisRenderer(), 0) + ->addRenderer(LeagueNode\Inline\HtmlInline::class, new LeagueRenderer\Inline\HtmlInlineRenderer(), 0) + ->addRenderer(LeagueNode\Inline\Image::class, new LeagueRenderer\Inline\ImageRenderer(), 0) + ->addRenderer(LeagueNode\Inline\Link::class, new LeagueRenderer\Inline\LinkRenderer(), 0) + ->addRenderer(LeagueCoreNode\Inline\Newline::class, new LeagueCoreRenderer\Inline\NewlineRenderer(), 0) + ->addRenderer(LeagueNode\Inline\Strong::class, new LeagueRenderer\Inline\StrongRenderer(), 0) + ->addRenderer(LeagueCoreNode\Inline\Text::class, new LeagueCoreRenderer\Inline\TextRenderer(), 0) + ; + + if ($environment->getConfiguration()->get('commonmark/use_asterisk')) { + $environment->addDelimiterProcessor(new EmphasisDelimiterProcessor('*')); + } + + if ($environment->getConfiguration()->get('commonmark/use_underscore')) { + $environment->addDelimiterProcessor(new EmphasisDelimiterProcessor('_')); + } + } +} diff --git a/src/Markdown/Parser/Block/BlockQuoteStartParser.php b/src/Markdown/Parser/Block/BlockQuoteStartParser.php new file mode 100644 index 0000000..ba8a16d --- /dev/null +++ b/src/Markdown/Parser/Block/BlockQuoteStartParser.php @@ -0,0 +1,29 @@ +isIndented()) { + return BlockStart::none(); + } + + if ($cursor->getNextNonSpaceCharacter() !== '>') { + return BlockStart::none(); + } + + $cursor->advanceToNextNonSpaceOrTab(); + $cursor->advanceBy(1); + $cursor->advanceBySpaceOrTab(); + + return BlockStart::of(new BlockQuoteParser())->at($cursor); + } +} diff --git a/src/Markdown/Parser/Inline/ImageLinkParser.php b/src/Markdown/Parser/Inline/ImageLinkParser.php new file mode 100644 index 0000000..39c8517 --- /dev/null +++ b/src/Markdown/Parser/Inline/ImageLinkParser.php @@ -0,0 +1,11 @@ +getCursor()->advanceBy(1); + + // Check previous inline for trailing spaces + $spaces = 0; + $lastInline = $inlineContext->getContainer()->lastChild(); + if ($lastInline instanceof Text) { + $trimmed = \rtrim($lastInline->getLiteral(), ' '); + $spaces = \strlen($lastInline->getLiteral()) - \strlen($trimmed); + if ($spaces) { + $lastInline->setLiteral($trimmed); + } + } + + if ($spaces >= 2) { + $inlineContext->getContainer()->appendChild(new Newline(Newline::HARDBREAK)); + } else { + $inlineContext->getContainer()->appendChild(new Newline(Newline::SOFTBREAK)); + } + + return true; + } +} diff --git a/src/Markdown/PointMarkdownConverter.php b/src/Markdown/PointMarkdownConverter.php new file mode 100644 index 0000000..074a32c --- /dev/null +++ b/src/Markdown/PointMarkdownConverter.php @@ -0,0 +1,21 @@ + 'strip', + ]); + $env->addExtension(new PointMarkdownExtension()); + + parent::__construct($env); + } +}