Files
DomFramework/markdown.php
2020-09-07 14:13:56 +00:00

472 lines
16 KiB
PHP

<?php
/** DomFramework
* @package domframework
* @author Dominique Fournier <dominique@fournier38.fr>
* @license BSD
*/
/** Convert the Markdown text to html format
*/
class markdown
{
/** To debug the markdown analyzer, activate the option */
public $debug = false;
/** The list of the HTML elements used by block */
private $blockid = array ("<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>",
"<hr/>");
/** Convert the markdown text to html
* @param string $markdown The markdown to convert
*/
public function html ($markdown)
{
$markdown = rtrim ($markdown);
$markdown = htmlentities ($markdown);
// Here are the regexp on multilines
$search = array ();
$replace = array ();
// Titles with underline (SeText)
// Titre1
// ======
$search[] = "/(.+)\\n==+$/Um";
$replace[] = "</p><h1>\\1</h1>\n<p>";
// Titre2
// ------
$search[] = "/(.+)\\n--+$/Um";
$replace[] = "</p><h2>\\1</h2>\n<p>";
// SEPARATORS : *** --- ___ * * * - - - _ _ _
// Must be placed before EMPHASIS
$search[] = "/^[*_-] ?[*_-] ?[*_-]$/Um";
$replace[] = "</p><hr/>\n<p>";
$markdown = preg_replace ($search, $replace, $markdown);
$textArray = explode ("\n", $markdown);
$pos = 0;
$html = $this->detectBlock ($textArray, 0, $pos);
$html = str_replace ("<p></p>", "", $html);
$html = str_replace ("<p> </p>", "", $html);
$html = trim ($html);
return $html;
}
/** Search and replace in the paragraph on one line
* @param string $line The line to analyze
*/
private function searchReplace ($line)
{
if ($this->debug)
echo "CALL searchReplace ($line)\n";
// REMEMBER : THE $line is already in HTML ENTITIES !
// Quotes : &quot;
$res = $line;
// Manage the <hr/> separators
$search = array ("***", "---", "___", "* * *", "- - -", "_ _ _");
foreach ($search as $key=>$pattern)
{
$start = 0;
while (1)
{
$start = strpos ($res, $pattern, $start);
if ($start === false)
break;
if ($res[$start+1] === $pattern)
{
// Pattern too long, not this test : skip it
$start += strlen ($pattern) + 1;
continue;
}
if ($start > 1 && $res[$start-1] === "\\")
{
// Search the ending pattern to skip it. Remove the backslash
$res = substr ($res, 0, $start - 1) . substr ($res, $start);
}
else
{
$res = substr ($res, 0, $start) . "<hr/>" .
substr ($res, $start+strlen ($pattern));
}
}
}
// Manage the emphasis and code correctely with the backslash
$search = array ();
$replace = array ();
$search[] = "__";
$replace[] = "<strong>\\1</strong>";
$search[] = "_";
$replace[] = "<em>\\1</em>";
$search[] = "**";
$replace[] = "<strong>\\1</strong>";
$search[] = "*";
$replace[] = "<em>\\1</em>";
$search[] = "`";
$replace[] = "<code>\\1</code>";
foreach ($search as $key=>$pattern)
{
$start = 0;
while (1)
{
$start = strpos ($res, $pattern, $start);
if ($start === false)
break;
$end = strpos ($res, $pattern, $start + strlen ($pattern));
if ($end === false)
break;
if ($res[$start+1] === $pattern)
{
// Pattern too long, not this test : skip it
$start += strlen ($pattern) + strspn ($res, $pattern, $start+1);
continue;
}
if ($start > 1 && $res[$start-1] === "\\")
{
// Search the ending pattern to skip it. Remove the backslash
$res = substr ($res, 0, $start - 1) . substr ($res, $start);
}
else
{
// It is the real pattern found, without backslash. Replace by the
// $replace value
$content = substr ($res, $start + strlen ($pattern),
$end - $start - strlen ($pattern));
if (trim ($content) !== "")
{
$first = substr ($replace[$key], 0, strpos ($replace[$key], "\\1"));
$second = substr ($replace[$key], strpos ($replace[$key], "\\1")+2);
$res = substr ($res, 0, $start).$first.$content.$second.
substr ($res, $end + strlen ($pattern));
}
}
$start = $end + strlen ($pattern);
}
}
// Manage the others cases
$search = array ();
$replace = array ();
// Titles short
// == TITRE1
$search[] = '~^([^\\\\]|^)(==+ (.+)( ==+)?)$~Um';
$replace[] = '</p>'."\n".'<h1>\3</h1>'."\n".'<p>';
// -- TITRE2
$search[] = '~^([^\\\\]|^)(--+ (.+)( --+)?)$~Um';
$replace[] = '</p>\n<h2>\3</h2>\n<p>';
// LINKS (can be relative)
// images
$search[] = '~([^\\\\]|^)(!\[(.+)\]\((.+)\))~';
$replace[] = '\1<img src=\'\4\' alt=\'\3\'/>';
// [Google Site](http://google.fr/ "With help bubble")
$search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+) &quot;(.+)&quot;\))~';
$replace[] = '\1<a href=\'\4\' title=\'\5\'>\3</a>';
// [Google Site](http://google.fr/)
$search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+)\))~U';
$replace[] = '\1<a href=\'\4\'>\3</a>';
// Automatics links :
// <http://dominique.fournier38.fr>
// <dominique@fournier38.fr>
$search[] = '~([^\\\\]|^)(&lt;(https?://.+)&gt;)~U';
$replace[] = '\1<a href=\'\3\'>\3</a>';
$search[] = '~([^\\\\]|^)(&lt;(.+@.+)&gt;)~U';
$replace[] = '\1<a href=\'mailto:\3\'>\3</a>';
// The links must not allow the <em> : redo the conversion
$search[] = '~(<a href=\'.*)<em>(.*)</em>(.*\'>.*)<em>(.*)</em>(.*</a>)~';
$replace[] = '\1_\2_\3_\4_\5';
// TODO : Links by reference :
// Voici un petit texte écrit par [Michel Fortin][mf].
// [mf]: http://michelf.ca/ "Mon site web"
// TITLES
// Titles ATX (Optionnal sharp at the end)
// ###### Title6
$search[] = '~^([^\\\\]|^)?(###### (.+)( +#+)?)$~Um';
$replace[] = '</p><h6>\3</h6><p>';
// ##### Title5
$search[] = '~^([^\\\\]|^)?(##### (.+)( +#+)?)$~Um';
$replace[] = '</p><h5>\3</h5><p>';
// #### Title4
$search[] = '~^([^\\\\]|^)?(#### (.+)( +#+)?)$~Um';
$replace[] = '</p><h4>\3</h4><p>';
// ### Title3
$search[] = '~^([^\\\\]|^)?(### (.+)( +#+)?)$~Um';
$replace[] = '</p><h3>\3</h3><p>';
// ## Title2
$search[] = '~^([^\\\\]|^)?(## (.+)( +#+)?)$~Um';
$replace[] = '</p><h2>\3</h2><p>';
// # Title1
$search[] = '~^([^\\\\]|^)?(# (.+)( +#+)?)$~Um';
$replace[] = '</p><h1>\3</h1><p>';
// Remove the backslashes on the existing regex
foreach ($search as $s)
{
$s = str_replace ('([^\\\\]|^)?', '([\\\\])', $s);
$s = str_replace ('([^\\\\]|^)', '([\\\\])', $s);
$s = str_replace ('([^\\\\!]|^)', '([\\\\])', $s);
$s = str_replace ('([^\\\\*]|^)', '([\\\\])', $s);
$s = str_replace ('([^\\\\_]|^)', '([\\\\])', $s);
$search[] = $s;
$replace[] = '\2';
}
/*foreach ($search as $key=>$s)
{
echo "$key => $s\n";
$res = preg_replace ($s, $replace[$key], $res);
echo "$res\n";
}*/
$res = preg_replace ($search, $replace, $res);
return $res;
}
/** Return HTML code corresponding to the code block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeCode ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL typeCode (\$text, $depth, $pos)\n";
$posStart = $pos;
$content = "";
// End of code block : end of markdown text / depth lighter than $depth
while (isset ($text[$pos]) &&
$this->depth($text[$pos]) >= $depth)
{
// The Code blocks can't be imbricated
if ($pos > $posStart)
$content .= "\n";
$content .= substr ($text[$pos], $depth);
$pos++;
}
// Insert Geshi on $content
if ($this->debug)
echo "RETURN typeCode : <pre><code>$content</code></pre>\n";
return "<pre><code>$content</code></pre>\n";
}
/** Return HTML code corresponding to the OL block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeOL ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL typeOL (\$text, $depth, $pos)\n";
$content = $this->typeOLUL ($text, $depth, $pos, "ol");
if ($this->debug) echo "RETURN typeOL : $content\n";
return $content;
}
/** Return HTML code corresponding to the UL block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeUL ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL typeUL (\$text, $depth, $pos)\n";
$content = $this->typeOLUL ($text, $depth, $pos, "ul");
if ($this->debug) echo "RETURN typeUL : $content\n";
return $content;
}
/** Return the HTML code corresponding to the OL/UL block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
* @param string $type The block type : "ul" or "ol"
*/
private function typeOLUL ($text, $depth, &$pos, $type)
{
if ($this->debug) echo "CALL typeOLUL (\$text, $depth, $pos, $type)\n";
$content = "";
// End of OL/UL block : end of markdown text / depth lighter than $depth /
// linetype changed
$blockStart = $pos;
$blockContent = "";
while (isset ($text[$pos]) &&
$this->depth($text[$pos]) >= $depth &&
$this->lineType ($text[$pos]) === $type)
{
if ($this->debug)
echo "Start while $pos\n";
if (1)
{
$content .= str_repeat (" ", ($depth+2))."<li>";
$blockContent .= $text[$pos];
$pos++;
// Look at continuous lines
while (isset ($text[$pos]) &&
$this->lineType ($text[$pos]) !== "NONE" &&
$this->lineType ($text[$pos]) !== $type &&
$this->depth($text[$pos]) === $depth)
{
if ($this->debug)
echo "Continuous line : ".$pos."\n";
$blockContent .= " ".$text[$pos];
$pos++;
continue;
}
// Indent the li and remove the number and dot and space at start
if ($type === "ol")
preg_match ("/^( *)[0-9]+\. +(.*)/", $blockContent, $matches);
else
preg_match ("/^( *)[-+*] +(.*)/", $blockContent, $matches);
if (!isset ($matches[2]))
$lineTxt = $blockContent;
else
$lineTxt = $matches[2];
$lineTxt = $this->searchReplace ($lineTxt);
$content .= $lineTxt;
$blockStart = $pos;
$blockContent = "";
}
if (isset ($text[$pos]) && $this->depth($text[$pos]) > $depth)
{
if ($this->debug)
echo "Detect Block\n";
$content .= "\n".
$this->detectBlock ($text, $this->depth($text[$pos]), $pos).
str_repeat (" ", ($depth+2))."</li>\n";
}
else
{
$content .= "</li>\n";
}
}
if ($this->debug) echo "RETURN typeOLUL : <$type>\n$content</$type>\n";
return "<$type>\n$content".str_repeat (" ", $depth)."</$type>\n";
}
/** Return HTML code corresponding to the NONE block
* The NONE type exists only on empty strings. Just skip the current and
* empty line, and return an empty string
* @param string $text The Markdown text to translate split by \n
* @param integer $depth The depth of the current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeNONE ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL typeNONE (\$text, $depth, $pos)\n";
$pos++;
return "";
}
/** Return HTML code corresponding to the P block
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
*/
private function typeP ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL typeP (\$text, $depth, $pos)\n";
$content = "";
// End of P block : end of markdown text / depth lighter than $depth /
// linetype changed
$Pinc = $pos;
while (isset ($text[$pos]) &&
$this->depth($text[$pos]) == $depth &&
$this->lineType ($text[$pos]) === "p")
{
if (substr ($text[$pos], -2) === " ")
{
// Two spaces at end of line : add <br/>
$content .= $this->searchReplace (substr ($text[$pos], 0, -2)) ."<br/>";
}
elseif ($pos > $Pinc && substr ($content, -5) !== "<br/>")
{
// Add a space between two lines from the same block, if this is not
// the continuity of the block
$content .= " ".$this->searchReplace ($text[$pos]);
}
else
{
$content .= $this->searchReplace ($text[$pos]);
}
$pos++;
}
if ($this->debug) echo "RETURN typeP : <p>$content</p>\n";
return "<p>$content</p>\n";
}
/** Detect the type of the text and call the appropriate function *
* @param array $text The Markdown text to translate split by \n
* @param integer $depth The depth of current bloc (in number of space)
* @param integer &$pos The start line number of the bloc
* @return the HTML code
*/
private function detectBlock ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL detectBlock (\$text, $depth, $pos)\n";
$content = "";
$blockContent = "";
// detect the type and call the right type function
while (isset ($text[$pos]))
{
if ($this->depth ($text[$pos]) > $depth && $depth === 0)
{
// New block code
if ($this->debug)
echo "New block code\n";
$content .= $this->typeCode ($text, $this->depth ($text[$pos]), $pos);
continue;
}
elseif ($this->depth ($text[$pos]) > $depth)
{
if ($this->debug)
echo "CALL DEPTH > MINDEPTH (".$this->depth ($text[$pos]).
" > $depth)\n";
$content .= $this->detectBlock ($text, $this->depth ($text[$pos]),
$pos);
continue;
}
elseif ($this->depth ($text[$pos]) < $depth)
{
if ($this->debug)
echo "CALL DEPTH > MINDEPTH (".$this->depth ($text[$pos]).
" < $depth)\n";
return $content;
}
$type = $this->lineType ($text[$pos]);
$func = "type$type";
if ($this->debug)
echo "FROM DETECT : CALL $func (line=".$text[$pos].")\n";
$content .= str_repeat (" ", $depth). $this->$func ($text, $depth, $pos);
}
return $content;
}
/** Return the Type of object in the provided line
* p, ul, ol, code
* @param string $line The line to get the type
*/
private function lineType ($line)
{
if (! isset ($line{0}))
return "NONE";
if (preg_match ("/^[ \t]*[+*-] /", $line) === 1)
return "ul";
if (preg_match ("/^[ \t]*[0-9]+\. /", $line) === 1)
return "ol";
if (preg_match ("/^( |\t)+/", $line) === 1)
return "code";
return "p";
}
/** Return the depth of the provided line
* @param string $line Line to analyze
* @return the depth of the line
*/
private function depth ($line)
{
return strspn ($line, " ");
}
}