Files
DomFramework/src/Markdown.php
2022-11-25 21:21:30 +01:00

496 lines
18 KiB
PHP

<?php
/** DomFramework
* @package domframework
* @author Dominique Fournier <dominique@fournier38.fr>
* @license BSD
*/
namespace Domframework;
/** Convert the Markdown text to html format
*/
class Markdown
{
/** To debug the markdown analyzer, activate the option */
public $debug = false;
/** The list of the HTML elements used by block */
private $blockid = array("<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>",
"<hr/>");
/** Convert the markdown text to html
* @param string $markdown The markdown to convert
*/
public function html($markdown)
{
$markdown = rtrim($markdown);
$markdown = htmlentities($markdown);
// Here are the regexp on multilines
$search = array();
$replace = array();
// Titles with underline (SeText)
// Titre1
// ======
$search[] = "/(.+)\\n==+$/Um";
$replace[] = "</p><h1>\\1</h1>\n<p>";
// Titre2
// ------
$search[] = "/(.+)\\n--+$/Um";
$replace[] = "</p><h2>\\1</h2>\n<p>";
// SEPARATORS : *** --- ___ * * * - - - _ _ _
// Must be placed before EMPHASIS
$search[] = "/^[*_-] ?[*_-] ?[*_-]$/Um";
$replace[] = "</p><hr/>\n<p>";
$markdown = preg_replace($search, $replace, $markdown);
$textArray = explode("\n", $markdown);
$pos = 0;
$html = $this->detectBlock($textArray, 0, $pos);
$html = str_replace("<p></p>", "", $html);
$html = str_replace("<p> </p>", "", $html);
$html = trim($html);
return $html;
}
/** Search and replace in the paragraph on one line
* @param string $line The line to analyze
*/
private function searchReplace($line)
{
if ($this->debug) {
echo "CALL searchReplace ($line)\n";
}
// REMEMBER : THE $line is already in HTML ENTITIES !
// Quotes : &quot;
$res = $line;
// Manage the <hr/> separators
$search = array("***", "---", "___", "* * *", "- - -", "_ _ _");
foreach ($search as $key => $pattern) {
$start = 0;
while (1) {
$start = strpos($res, $pattern, $start);
if ($start === false) {
break;
}
if ($res[$start + 1] === $pattern) {
// Pattern too long, not this test : skip it
$start += strlen($pattern) + 1;
continue;
}
if ($start > 1 && $res[$start - 1] === "\\") {
// Search the ending pattern to skip it. Remove the backslash
$res = substr($res, 0, $start - 1) . substr($res, $start);
} else {
$res = substr($res, 0, $start) . "<hr/>" .
substr($res, $start + strlen($pattern));
}
}
}
// Manage the emphasis and code correctely with the backslash
$search = array();
$replace = array();
$search[] = "__";
$replace[] = "<strong>\\1</strong>";
$search[] = "_";
$replace[] = "<em>\\1</em>";
$search[] = "**";
$replace[] = "<strong>\\1</strong>";
$search[] = "*";
$replace[] = "<em>\\1</em>";
$search[] = "`";
$replace[] = "<code>\\1</code>";
foreach ($search as $key => $pattern) {
$start = 0;
while (1) {
$start = strpos($res, $pattern, $start);
if ($start === false) {
break;
}
$end = strpos($res, $pattern, $start + strlen($pattern));
if ($end === false) {
break;
}
if ($res[$start + 1] === $pattern) {
// Pattern too long, not this test : skip it
$start += strlen($pattern) + strspn($res, $pattern, $start + 1);
continue;
}
if ($start > 1 && $res[$start - 1] === "\\") {
// Search the ending pattern to skip it. Remove the backslash
$res = substr($res, 0, $start - 1) . substr($res, $start);
} else {
// It is the real pattern found, without backslash. Replace by the
// $replace value
$content = substr(
$res,
$start + strlen($pattern),
$end - $start - strlen($pattern)
);
if (trim($content) !== "") {
$first = substr($replace[$key], 0, strpos($replace[$key], "\\1"));
$second = substr($replace[$key], strpos($replace[$key], "\\1") + 2);
$res = substr($res, 0, $start) . $first . $content . $second .
substr($res, $end + strlen($pattern));
}
}
$start = $end + strlen($pattern);
}
}
// Manage the others cases
$search = array();
$replace = array();
// Titles short
// == TITRE1
$search[] = '~^([^\\\\]|^)(==+ (.+)( ==+)?)$~Um';
$replace[] = '</p>' . "\n" . '<h1>\3</h1>' . "\n" . '<p>';
// -- TITRE2
$search[] = '~^([^\\\\]|^)(--+ (.+)( --+)?)$~Um';
$replace[] = '</p>\n<h2>\3</h2>\n<p>';
// LINKS (can be relative)
// images
$search[] = '~([^\\\\]|^)(!\[(.+)\]\((.+)\))~';
$replace[] = '\1<img src=\'\4\' alt=\'\3\'/>';
// [Google Site](http://google.fr/ "With help bubble")
$search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+) &quot;(.+)&quot;\))~';
$replace[] = '\1<a href=\'\4\' title=\'\5\'>\3</a>';
// [Google Site](http://google.fr/)
$search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+)\))~U';
$replace[] = '\1<a href=\'\4\'>\3</a>';
// Automatics links :
// <http://dominique.fournier38.fr>
// <dominique@fournier38.fr>
$search[] = '~([^\\\\]|^)(&lt;(https?://.+)&gt;)~U';
$replace[] = '\1<a href=\'\3\'>\3</a>';
$search[] = '~([^\\\\]|^)(&lt;(.+@.+)&gt;)~U';
$replace[] = '\1<a href=\'mailto:\3\'>\3</a>';
// The links must not allow the <em> : redo the conversion
$search[] = '~(<a href=\'.*)<em>(.*)</em>(.*\'>.*)<em>(.*)</em>(.*</a>)~';
$replace[] = '\1_\2_\3_\4_\5';
// TODO : Links by reference :
// Voici un petit texte écrit par [Michel Fortin][mf].
// [mf]: http://michelf.ca/ "Mon site web"
// TITLES
// Titles ATX (Optionnal sharp at the end)
// ###### Title6
$search[] = '~^([^\\\\]|^)?(###### (.+)( +#+)?)$~Um';
$replace[] = '</p><h6>\3</h6><p>';
// ##### Title5
$search[] = '~^([^\\\\]|^)?(##### (.+)( +#+)?)$~Um';
$replace[] = '</p><h5>\3</h5><p>';
// #### Title4
$search[] = '~^([^\\\\]|^)?(#### (.+)( +#+)?)$~Um';
$replace[] = '</p><h4>\3</h4><p>';
// ### Title3
$search[] = '~^([^\\\\]|^)?(### (.+)( +#+)?)$~Um';
$replace[] = '</p><h3>\3</h3><p>';
// ## Title2
$search[] = '~^([^\\\\]|^)?(## (.+)( +#+)?)$~Um';
$replace[] = '</p><h2>\3</h2><p>';
// # Title1
$search[] = '~^([^\\\\]|^)?(# (.+)( +#+)?)$~Um';
$replace[] = '</p><h1>\3</h1><p>';
// Remove the backslashes on the existing regex
foreach ($search as $s) {
$s = str_replace('([^\\\\]|^)?', '([\\\\])', $s);
$s = str_replace('([^\\\\]|^)', '([\\\\])', $s);
$s = str_replace('([^\\\\!]|^)', '([\\\\])', $s);
$s = str_replace('([^\\\\*]|^)', '([\\\\])', $s);
$s = str_replace('([^\\\\_]|^)', '([\\\\])', $s);
$search[] = $s;
$replace[] = '\2';
}
/*foreach ($search as $key=>$s)
{
echo "$key => $s\n";
$res = preg_replace ($s, $replace[$key], $res);
echo "$res\n";
}*/
$res = preg_replace($search, $replace, $res);
return $res;
}
/** Return HTML code corresponding to the code block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeCode($text, $depth, &$pos)
{
if ($this->debug) {
echo "CALL typeCode (\$text, $depth, $pos)\n";
}
$posStart = $pos;
$content = "";
// End of code block : end of markdown text / depth lighter than $depth
while (
isset($text[$pos]) &&
$this->depth($text[$pos]) >= $depth
) {
// The Code blocks can't be imbricated
if ($pos > $posStart) {
$content .= "\n";
}
$content .= substr($text[$pos], $depth);
$pos++;
}
// Insert Geshi on $content
if ($this->debug) {
echo "RETURN typeCode : <pre><code>$content</code></pre>\n";
}
return "<pre><code>$content</code></pre>\n";
}
/** Return HTML code corresponding to the OL block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeOL($text, $depth, &$pos)
{
if ($this->debug) {
echo "CALL typeOL (\$text, $depth, $pos)\n";
}
$content = $this->typeOLUL($text, $depth, $pos, "ol");
if ($this->debug) {
echo "RETURN typeOL : $content\n";
}
return $content;
}
/** Return HTML code corresponding to the UL block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeUL($text, $depth, &$pos)
{
if ($this->debug) {
echo "CALL typeUL (\$text, $depth, $pos)\n";
}
$content = $this->typeOLUL($text, $depth, $pos, "ul");
if ($this->debug) {
echo "RETURN typeUL : $content\n";
}
return $content;
}
/** Return the HTML code corresponding to the OL/UL block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
* @param string $type The block type : "ul" or "ol"
*/
private function typeOLUL($text, $depth, &$pos, $type)
{
if ($this->debug) {
echo "CALL typeOLUL (\$text, $depth, $pos, $type)\n";
}
$content = "";
// End of OL/UL block : end of markdown text / depth lighter than $depth /
// linetype changed
$blockStart = $pos;
$blockContent = "";
while (
isset($text[$pos]) &&
$this->depth($text[$pos]) >= $depth &&
$this->lineType($text[$pos]) === $type
) {
if ($this->debug) {
echo "Start while $pos\n";
}
if (1) {
$content .= str_repeat(" ", ($depth + 2)) . "<li>";
$blockContent .= $text[$pos];
$pos++;
// Look at continuous lines
while (
isset($text[$pos]) &&
$this->lineType($text[$pos]) !== "NONE" &&
$this->lineType($text[$pos]) !== $type &&
$this->depth($text[$pos]) === $depth
) {
if ($this->debug) {
echo "Continuous line : " . $pos . "\n";
}
$blockContent .= " " . $text[$pos];
$pos++;
continue;
}
// Indent the li and remove the number and dot and space at start
if ($type === "ol") {
preg_match("/^( *)[0-9]+\. +(.*)/", $blockContent, $matches);
} else {
preg_match("/^( *)[-+*] +(.*)/", $blockContent, $matches);
}
if (!isset($matches[2])) {
$lineTxt = $blockContent;
} else {
$lineTxt = $matches[2];
}
$lineTxt = $this->searchReplace($lineTxt);
$content .= $lineTxt;
$blockStart = $pos;
$blockContent = "";
}
if (isset($text[$pos]) && $this->depth($text[$pos]) > $depth) {
if ($this->debug) {
echo "Detect Block\n";
}
$content .= "\n" .
$this->detectBlock($text, $this->depth($text[$pos]), $pos) .
str_repeat(" ", ($depth + 2)) . "</li>\n";
} else {
$content .= "</li>\n";
}
}
if ($this->debug) {
echo "RETURN typeOLUL : <$type>\n$content</$type>\n";
}
return "<$type>\n$content" . str_repeat(" ", $depth) . "</$type>\n";
}
/** Return HTML code corresponding to the NONE block
* The NONE type exists only on empty strings. Just skip the current and
* empty line, and return an empty string
* @param string $text The Markdown text to translate split by \n
* @param integer $depth The depth of the current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeNONE($text, $depth, &$pos)
{
if ($this->debug) {
echo "CALL typeNONE (\$text, $depth, $pos)\n";
}
$pos++;
return "";
}
/** Return HTML code corresponding to the P block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeP($text, $depth, &$pos)
{
if ($this->debug) {
echo "CALL typeP (\$text, $depth, $pos)\n";
}
$content = "";
// End of P block : end of markdown text / depth lighter than $depth /
// linetype changed
$Pinc = $pos;
while (
isset($text[$pos]) &&
$this->depth($text[$pos]) == $depth &&
$this->lineType($text[$pos]) === "p"
) {
if (substr($text[$pos], -2) === " ") {
// Two spaces at end of line : add <br/>
$content .= $this->searchReplace(substr($text[$pos], 0, -2)) . "<br/>";
} elseif ($pos > $Pinc && substr($content, -5) !== "<br/>") {
// Add a space between two lines from the same block, if this is not
// the continuity of the block
$content .= " " . $this->searchReplace($text[$pos]);
} else {
$content .= $this->searchReplace($text[$pos]);
}
$pos++;
}
if ($this->debug) {
echo "RETURN typeP : <p>$content</p>\n";
}
return "<p>$content</p>\n";
}
/** Detect the type of the text and call the appropriate function *
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
* @return the HTML code
*/
private function detectBlock($text, $depth, &$pos)
{
if ($this->debug) {
echo "CALL detectBlock (\$text, $depth, $pos)\n";
}
$content = "";
$blockContent = "";
// detect the type and call the right type function
while (isset($text[$pos])) {
if ($this->depth($text[$pos]) > $depth && $depth === 0) {
// New block code
if ($this->debug) {
echo "New block code\n";
}
$content .= $this->typeCode($text, $this->depth($text[$pos]), $pos);
continue;
} elseif ($this->depth($text[$pos]) > $depth) {
if ($this->debug) {
echo "CALL DEPTH > MINDEPTH (" . $this->depth($text[$pos]) .
" > $depth)\n";
}
$content .= $this->detectBlock(
$text,
$this->depth($text[$pos]),
$pos
);
continue;
} elseif ($this->depth($text[$pos]) < $depth) {
if ($this->debug) {
echo "CALL DEPTH > MINDEPTH (" . $this->depth($text[$pos]) .
" < $depth)\n";
}
return $content;
}
$type = $this->lineType($text[$pos]);
$func = "type$type";
if ($this->debug) {
echo "FROM DETECT : CALL $func (line=" . $text[$pos] . ")\n";
}
$content .= str_repeat(" ", $depth) . $this->$func($text, $depth, $pos);
}
return $content;
}
/** Return the Type of object in the provided line
* p, ul, ol, code
* @param string $line The line to get the type
*/
private function lineType($line)
{
if (! isset($line[0])) {
return "NONE";
}
if (preg_match("/^[ \t]*[+*-] /", $line) === 1) {
return "ul";
}
if (preg_match("/^[ \t]*[0-9]+\. /", $line) === 1) {
return "ol";
}
if (preg_match("/^( |\t)+/", $line) === 1) {
return "code";
}
return "p";
}
/** Return the depth of the provided line
* @param string $line Line to analyze
* @return the depth of the line
*/
private function depth($line)
{
return strspn($line, " ");
}
}