514 lines
18 KiB
PHP
514 lines
18 KiB
PHP
<?php
|
|
|
|
/**
|
|
* DomFramework
|
|
* @package domframework
|
|
* @author Dominique Fournier <dominique@fournier38.fr>
|
|
* @license BSD
|
|
*/
|
|
|
|
namespace Domframework;
|
|
|
|
/**
|
|
* Convert the Markdown text to html format
|
|
*/
|
|
class Markdown
|
|
{
|
|
/**
|
|
* To debug the markdown analyzer, activate the option
|
|
*/
|
|
public $debug = false;
|
|
|
|
/**
|
|
* The list of the HTML elements used by block
|
|
*/
|
|
private $blockid = ["<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>",
|
|
"<hr/>"];
|
|
|
|
/**
|
|
* Convert the markdown text to html
|
|
* @param string $markdown The markdown to convert
|
|
*/
|
|
public function html($markdown)
|
|
{
|
|
$markdown = rtrim($markdown);
|
|
$markdown = htmlentities($markdown);
|
|
|
|
// Here are the regexp on multilines
|
|
$search = [];
|
|
$replace = [];
|
|
// Titles with underline (SeText)
|
|
// Titre1
|
|
// ======
|
|
$search[] = "/(.+)\\n==+$/Um";
|
|
$replace[] = "</p><h1>\\1</h1>\n<p>";
|
|
// Titre2
|
|
// ------
|
|
$search[] = "/(.+)\\n--+$/Um";
|
|
$replace[] = "</p><h2>\\1</h2>\n<p>";
|
|
|
|
// SEPARATORS : *** --- ___ * * * - - - _ _ _
|
|
// Must be placed before EMPHASIS
|
|
$search[] = "/^[*_-] ?[*_-] ?[*_-]$/Um";
|
|
$replace[] = "</p><hr/>\n<p>";
|
|
|
|
$markdown = preg_replace($search, $replace, $markdown);
|
|
|
|
$textArray = explode("\n", $markdown);
|
|
$pos = 0;
|
|
$html = $this->detectBlock($textArray, 0, $pos);
|
|
$html = str_replace("<p></p>", "", $html);
|
|
$html = str_replace("<p> </p>", "", $html);
|
|
$html = trim($html);
|
|
return $html;
|
|
}
|
|
|
|
/**
|
|
* Search and replace in the paragraph on one line
|
|
* @param string $line The line to analyze
|
|
*/
|
|
private function searchReplace($line)
|
|
{
|
|
if ($this->debug) {
|
|
echo "CALL searchReplace ($line)\n";
|
|
}
|
|
// REMEMBER : THE $line is already in HTML ENTITIES !
|
|
// Quotes : "
|
|
$res = $line;
|
|
// Manage the <hr/> separators
|
|
$search = ["***", "---", "___", "* * *", "- - -", "_ _ _"];
|
|
foreach ($search as $key => $pattern) {
|
|
$start = 0;
|
|
while (1) {
|
|
$start = strpos($res, $pattern, $start);
|
|
if ($start === false) {
|
|
break;
|
|
}
|
|
if ($res[$start + 1] === $pattern) {
|
|
// Pattern too long, not this test : skip it
|
|
$start += strlen($pattern) + 1;
|
|
continue;
|
|
}
|
|
if ($start > 1 && $res[$start - 1] === "\\") {
|
|
// Search the ending pattern to skip it. Remove the backslash
|
|
$res = substr($res, 0, $start - 1) . substr($res, $start);
|
|
} else {
|
|
$res = substr($res, 0, $start) . "<hr/>" .
|
|
substr($res, $start + strlen($pattern));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Manage the emphasis and code correctely with the backslash
|
|
$search = [];
|
|
$replace = [];
|
|
$search[] = "__";
|
|
$replace[] = "<strong>\\1</strong>";
|
|
$search[] = "_";
|
|
$replace[] = "<em>\\1</em>";
|
|
$search[] = "**";
|
|
$replace[] = "<strong>\\1</strong>";
|
|
$search[] = "*";
|
|
$replace[] = "<em>\\1</em>";
|
|
$search[] = "`";
|
|
$replace[] = "<code>\\1</code>";
|
|
|
|
foreach ($search as $key => $pattern) {
|
|
$start = 0;
|
|
while (1) {
|
|
$start = strpos($res, $pattern, $start);
|
|
if ($start === false) {
|
|
break;
|
|
}
|
|
$end = strpos($res, $pattern, $start + strlen($pattern));
|
|
if ($end === false) {
|
|
break;
|
|
}
|
|
if ($res[$start + 1] === $pattern) {
|
|
// Pattern too long, not this test : skip it
|
|
$start += strlen($pattern) + strspn($res, $pattern, $start + 1);
|
|
continue;
|
|
}
|
|
if ($start > 1 && $res[$start - 1] === "\\") {
|
|
// Search the ending pattern to skip it. Remove the backslash
|
|
$res = substr($res, 0, $start - 1) . substr($res, $start);
|
|
} else {
|
|
// It is the real pattern found, without backslash. Replace by the
|
|
// $replace value
|
|
$content = substr(
|
|
$res,
|
|
$start + strlen($pattern),
|
|
$end - $start - strlen($pattern)
|
|
);
|
|
if (trim($content) !== "") {
|
|
$first = substr($replace[$key], 0, strpos($replace[$key], "\\1"));
|
|
$second = substr($replace[$key], strpos($replace[$key], "\\1") + 2);
|
|
$res = substr($res, 0, $start) . $first . $content . $second .
|
|
substr($res, $end + strlen($pattern));
|
|
}
|
|
}
|
|
$start = $end + strlen($pattern);
|
|
}
|
|
}
|
|
|
|
// Manage the others cases
|
|
$search = [];
|
|
$replace = [];
|
|
// Titles short
|
|
// == TITRE1
|
|
$search[] = '~^([^\\\\]|^)(==+ (.+)( ==+)?)$~Um';
|
|
$replace[] = '</p>' . "\n" . '<h1>\3</h1>' . "\n" . '<p>';
|
|
// -- TITRE2
|
|
$search[] = '~^([^\\\\]|^)(--+ (.+)( --+)?)$~Um';
|
|
$replace[] = '</p>\n<h2>\3</h2>\n<p>';
|
|
|
|
// LINKS (can be relative)
|
|
// images
|
|
$search[] = '~([^\\\\]|^)(!\[(.+)\]\((.+)\))~';
|
|
$replace[] = '\1<img src=\'\4\' alt=\'\3\'/>';
|
|
// [Google Site](http://google.fr/ "With help bubble")
|
|
$search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+) "(.+)"\))~';
|
|
$replace[] = '\1<a href=\'\4\' title=\'\5\'>\3</a>';
|
|
// [Google Site](http://google.fr/)
|
|
$search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+)\))~U';
|
|
$replace[] = '\1<a href=\'\4\'>\3</a>';
|
|
|
|
// Automatics links :
|
|
// <http://dominique.fournier38.fr>
|
|
// <dominique@fournier38.fr>
|
|
$search[] = '~([^\\\\]|^)(<(https?://.+)>)~U';
|
|
$replace[] = '\1<a href=\'\3\'>\3</a>';
|
|
$search[] = '~([^\\\\]|^)(<(.+@.+)>)~U';
|
|
$replace[] = '\1<a href=\'mailto:\3\'>\3</a>';
|
|
// The links must not allow the <em> : redo the conversion
|
|
$search[] = '~(<a href=\'.*)<em>(.*)</em>(.*\'>.*)<em>(.*)</em>(.*</a>)~';
|
|
$replace[] = '\1_\2_\3_\4_\5';
|
|
// TODO : Links by reference :
|
|
// Voici un petit texte écrit par [Michel Fortin][mf].
|
|
// [mf]: http://michelf.ca/ "Mon site web"
|
|
|
|
// TITLES
|
|
// Titles ATX (Optionnal sharp at the end)
|
|
// ###### Title6
|
|
$search[] = '~^([^\\\\]|^)?(###### (.+)( +#+)?)$~Um';
|
|
$replace[] = '</p><h6>\3</h6><p>';
|
|
// ##### Title5
|
|
$search[] = '~^([^\\\\]|^)?(##### (.+)( +#+)?)$~Um';
|
|
$replace[] = '</p><h5>\3</h5><p>';
|
|
// #### Title4
|
|
$search[] = '~^([^\\\\]|^)?(#### (.+)( +#+)?)$~Um';
|
|
$replace[] = '</p><h4>\3</h4><p>';
|
|
// ### Title3
|
|
$search[] = '~^([^\\\\]|^)?(### (.+)( +#+)?)$~Um';
|
|
$replace[] = '</p><h3>\3</h3><p>';
|
|
// ## Title2
|
|
$search[] = '~^([^\\\\]|^)?(## (.+)( +#+)?)$~Um';
|
|
$replace[] = '</p><h2>\3</h2><p>';
|
|
// # Title1
|
|
$search[] = '~^([^\\\\]|^)?(# (.+)( +#+)?)$~Um';
|
|
$replace[] = '</p><h1>\3</h1><p>';
|
|
// Remove the backslashes on the existing regex
|
|
foreach ($search as $s) {
|
|
$s = str_replace('([^\\\\]|^)?', '([\\\\])', $s);
|
|
$s = str_replace('([^\\\\]|^)', '([\\\\])', $s);
|
|
$s = str_replace('([^\\\\!]|^)', '([\\\\])', $s);
|
|
$s = str_replace('([^\\\\*]|^)', '([\\\\])', $s);
|
|
$s = str_replace('([^\\\\_]|^)', '([\\\\])', $s);
|
|
$search[] = $s;
|
|
$replace[] = '\2';
|
|
}
|
|
/*foreach ($search as $key=>$s)
|
|
{
|
|
echo "$key => $s\n";
|
|
$res = preg_replace ($s, $replace[$key], $res);
|
|
echo "$res\n";
|
|
}
|
|
*/
|
|
$res = preg_replace($search, $replace, $res);
|
|
return $res;
|
|
}
|
|
|
|
/**
|
|
* Return HTML code corresponding to the code block
|
|
* @param array $text The Markdown text to translate split by \n
|
|
* @param integer $depth The depth of current bloc (in number of space)
|
|
* @param integer &$pos The start line number of the bloc
|
|
*/
|
|
private function typeCode($text, $depth, &$pos)
|
|
{
|
|
if ($this->debug) {
|
|
echo "CALL typeCode (\$text, $depth, $pos)\n";
|
|
}
|
|
$posStart = $pos;
|
|
$content = "";
|
|
// End of code block : end of markdown text / depth lighter than $depth
|
|
while (
|
|
isset($text[$pos]) &&
|
|
$this->depth($text[$pos]) >= $depth
|
|
) {
|
|
// The Code blocks can't be imbricated
|
|
if ($pos > $posStart) {
|
|
$content .= "\n";
|
|
}
|
|
$content .= substr($text[$pos], $depth);
|
|
$pos++;
|
|
}
|
|
// Insert Geshi on $content
|
|
if ($this->debug) {
|
|
echo "RETURN typeCode : <pre><code>$content</code></pre>\n";
|
|
}
|
|
return "<pre><code>$content</code></pre>\n";
|
|
}
|
|
|
|
/**
|
|
* Return HTML code corresponding to the OL block
|
|
* @param array $text The Markdown text to translate split by \n
|
|
* @param integer $depth The depth of current bloc (in number of space)
|
|
* @param integer &$pos The start line number of the bloc
|
|
*/
|
|
private function typeOL($text, $depth, &$pos)
|
|
{
|
|
if ($this->debug) {
|
|
echo "CALL typeOL (\$text, $depth, $pos)\n";
|
|
}
|
|
$content = $this->typeOLUL($text, $depth, $pos, "ol");
|
|
if ($this->debug) {
|
|
echo "RETURN typeOL : $content\n";
|
|
}
|
|
return $content;
|
|
}
|
|
|
|
/**
|
|
* Return HTML code corresponding to the UL block
|
|
* @param array $text The Markdown text to translate split by \n
|
|
* @param integer $depth The depth of current bloc (in number of space)
|
|
* @param integer &$pos The start line number of the bloc
|
|
*/
|
|
private function typeUL($text, $depth, &$pos)
|
|
{
|
|
if ($this->debug) {
|
|
echo "CALL typeUL (\$text, $depth, $pos)\n";
|
|
}
|
|
$content = $this->typeOLUL($text, $depth, $pos, "ul");
|
|
if ($this->debug) {
|
|
echo "RETURN typeUL : $content\n";
|
|
}
|
|
return $content;
|
|
}
|
|
|
|
/**
|
|
* Return the HTML code corresponding to the OL/UL block
|
|
* @param array $text The Markdown text to translate split by \n
|
|
* @param integer $depth The depth of current bloc (in number of space)
|
|
* @param integer &$pos The start line number of the bloc
|
|
* @param string $type The block type : "ul" or "ol"
|
|
*/
|
|
private function typeOLUL($text, $depth, &$pos, $type)
|
|
{
|
|
if ($this->debug) {
|
|
echo "CALL typeOLUL (\$text, $depth, $pos, $type)\n";
|
|
}
|
|
$content = "";
|
|
// End of OL/UL block : end of markdown text / depth lighter than $depth /
|
|
// linetype changed
|
|
$blockStart = $pos;
|
|
$blockContent = "";
|
|
while (
|
|
isset($text[$pos]) &&
|
|
$this->depth($text[$pos]) >= $depth &&
|
|
$this->lineType($text[$pos]) === $type
|
|
) {
|
|
if ($this->debug) {
|
|
echo "Start while $pos\n";
|
|
}
|
|
if (1) {
|
|
$content .= str_repeat(" ", ($depth + 2)) . "<li>";
|
|
$blockContent .= $text[$pos];
|
|
$pos++;
|
|
// Look at continuous lines
|
|
while (
|
|
isset($text[$pos]) &&
|
|
$this->lineType($text[$pos]) !== "NONE" &&
|
|
$this->lineType($text[$pos]) !== $type &&
|
|
$this->depth($text[$pos]) === $depth
|
|
) {
|
|
if ($this->debug) {
|
|
echo "Continuous line : " . $pos . "\n";
|
|
}
|
|
$blockContent .= " " . $text[$pos];
|
|
$pos++;
|
|
continue;
|
|
}
|
|
// Indent the li and remove the number and dot and space at start
|
|
if ($type === "ol") {
|
|
preg_match("/^( *)[0-9]+\. +(.*)/", $blockContent, $matches);
|
|
} else {
|
|
preg_match("/^( *)[-+*] +(.*)/", $blockContent, $matches);
|
|
}
|
|
if (!isset($matches[2])) {
|
|
$lineTxt = $blockContent;
|
|
} else {
|
|
$lineTxt = $matches[2];
|
|
}
|
|
$lineTxt = $this->searchReplace($lineTxt);
|
|
$content .= $lineTxt;
|
|
$blockStart = $pos;
|
|
$blockContent = "";
|
|
}
|
|
if (isset($text[$pos]) && $this->depth($text[$pos]) > $depth) {
|
|
if ($this->debug) {
|
|
echo "Detect Block\n";
|
|
}
|
|
$content .= "\n" .
|
|
$this->detectBlock($text, $this->depth($text[$pos]), $pos) .
|
|
str_repeat(" ", ($depth + 2)) . "</li>\n";
|
|
} else {
|
|
$content .= "</li>\n";
|
|
}
|
|
}
|
|
if ($this->debug) {
|
|
echo "RETURN typeOLUL : <$type>\n$content</$type>\n";
|
|
}
|
|
return "<$type>\n$content" . str_repeat(" ", $depth) . "</$type>\n";
|
|
}
|
|
|
|
/**
|
|
* Return HTML code corresponding to the NONE block
|
|
* The NONE type exists only on empty strings. Just skip the current and
|
|
* empty line, and return an empty string
|
|
* @param string $text The Markdown text to translate split by \n
|
|
* @param integer $depth The depth of the current bloc (in number of space)
|
|
* @param integer &$pos The start line number of the bloc
|
|
*/
|
|
private function typeNONE($text, $depth, &$pos)
|
|
{
|
|
if ($this->debug) {
|
|
echo "CALL typeNONE (\$text, $depth, $pos)\n";
|
|
}
|
|
$pos++;
|
|
return "";
|
|
}
|
|
|
|
/**
|
|
* Return HTML code corresponding to the P block
|
|
* @param array $text The Markdown text to translate split by \n
|
|
* @param integer $depth The depth of current bloc (in number of space)
|
|
* @param integer &$pos The start line number of the bloc
|
|
*/
|
|
private function typeP($text, $depth, &$pos)
|
|
{
|
|
if ($this->debug) {
|
|
echo "CALL typeP (\$text, $depth, $pos)\n";
|
|
}
|
|
$content = "";
|
|
// End of P block : end of markdown text / depth lighter than $depth /
|
|
// linetype changed
|
|
$Pinc = $pos;
|
|
while (
|
|
isset($text[$pos]) &&
|
|
$this->depth($text[$pos]) == $depth &&
|
|
$this->lineType($text[$pos]) === "p"
|
|
) {
|
|
if (substr($text[$pos], -2) === " ") {
|
|
// Two spaces at end of line : add <br/>
|
|
$content .= $this->searchReplace(substr($text[$pos], 0, -2)) . "<br/>";
|
|
} elseif ($pos > $Pinc && substr($content, -5) !== "<br/>") {
|
|
// Add a space between two lines from the same block, if this is not
|
|
// the continuity of the block
|
|
$content .= " " . $this->searchReplace($text[$pos]);
|
|
} else {
|
|
$content .= $this->searchReplace($text[$pos]);
|
|
}
|
|
$pos++;
|
|
}
|
|
if ($this->debug) {
|
|
echo "RETURN typeP : <p>$content</p>\n";
|
|
}
|
|
return "<p>$content</p>\n";
|
|
}
|
|
|
|
/**
|
|
* Detect the type of the text and call the appropriate function *
|
|
* @param array $text The Markdown text to translate split by \n
|
|
* @param integer $depth The depth of current bloc (in number of space)
|
|
* @param integer &$pos The start line number of the bloc
|
|
* @return the HTML code
|
|
*/
|
|
private function detectBlock($text, $depth, &$pos)
|
|
{
|
|
if ($this->debug) {
|
|
echo "CALL detectBlock (\$text, $depth, $pos)\n";
|
|
}
|
|
$content = "";
|
|
$blockContent = "";
|
|
// detect the type and call the right type function
|
|
while (isset($text[$pos])) {
|
|
if ($this->depth($text[$pos]) > $depth && $depth === 0) {
|
|
// New block code
|
|
if ($this->debug) {
|
|
echo "New block code\n";
|
|
}
|
|
$content .= $this->typeCode($text, $this->depth($text[$pos]), $pos);
|
|
continue;
|
|
} elseif ($this->depth($text[$pos]) > $depth) {
|
|
if ($this->debug) {
|
|
echo "CALL DEPTH > MINDEPTH (" . $this->depth($text[$pos]) .
|
|
" > $depth)\n";
|
|
}
|
|
$content .= $this->detectBlock(
|
|
$text,
|
|
$this->depth($text[$pos]),
|
|
$pos
|
|
);
|
|
continue;
|
|
} elseif ($this->depth($text[$pos]) < $depth) {
|
|
if ($this->debug) {
|
|
echo "CALL DEPTH > MINDEPTH (" . $this->depth($text[$pos]) .
|
|
" < $depth)\n";
|
|
}
|
|
return $content;
|
|
}
|
|
|
|
$type = $this->lineType($text[$pos]);
|
|
$func = "type$type";
|
|
if ($this->debug) {
|
|
echo "FROM DETECT : CALL $func (line=" . $text[$pos] . ")\n";
|
|
}
|
|
$content .= str_repeat(" ", $depth) . $this->$func($text, $depth, $pos);
|
|
}
|
|
return $content;
|
|
}
|
|
|
|
/**
|
|
* Return the Type of object in the provided line
|
|
* p, ul, ol, code
|
|
* @param string $line The line to get the type
|
|
*/
|
|
private function lineType($line)
|
|
{
|
|
if (! isset($line[0])) {
|
|
return "NONE";
|
|
}
|
|
if (preg_match("/^[ \t]*[+*-] /", $line) === 1) {
|
|
return "ul";
|
|
}
|
|
if (preg_match("/^[ \t]*[0-9]+\. /", $line) === 1) {
|
|
return "ol";
|
|
}
|
|
if (preg_match("/^( |\t)+/", $line) === 1) {
|
|
return "code";
|
|
}
|
|
return "p";
|
|
}
|
|
|
|
/**
|
|
* Return the depth of the provided line
|
|
* @param string $line Line to analyze
|
|
* @return the depth of the line
|
|
*/
|
|
private function depth($line)
|
|
{
|
|
return strspn($line, " ");
|
|
}
|
|
}
|