markdown : support of imbricated ul/ol entries with indentation

git-svn-id: https://svn.fournier38.fr/svn/ProgSVN/trunk@1798 bf3deb0d-5f1a-0410-827f-c0cc1f45334c
This commit is contained in:
2014-09-09 11:43:04 +00:00
parent 70166c82d6
commit 0d0a366657

View File

@@ -1,25 +1,17 @@
<?php <?php
/** DomFramework
@package domframework
@author Dominique Fournier <dominique@fournier38.fr> */
error_reporting (E_ALL);
/** Markdown management */
class markdown class markdown
{ {
/** To debug the markdown analyzer, activate the option */
/** debug variable */
public $debug = false; public $debug = false;
/** Convert the markdown language to HTML private $blockid = array ("<h1>","<h2>","<h3>","<h4>","<h5>","<h6>",
Return the HTML string "<hr/>");
@param string $mark Message in markdown syntax to display */
public function html ($mark) /** Convert the markdown text to html */
public function html ($markdown)
{ {
if ($this->debug && file_exists ("/tmp/debugMD")) $markdown = rtrim ($markdown);
unlink ("/tmp/debugMD"); $markdown = htmlentities ($markdown);
$res = "";
$mark = htmlentities ($mark, ENT_QUOTES);
// Here are the regexp on multilines // Here are the regexp on multilines
$search = array (); $search = array ();
@@ -27,44 +19,34 @@ class markdown
// Titles with underline (SeText) // Titles with underline (SeText)
// Titre1 // Titre1
// ====== // ======
$search[] = "/^(.+)\\n==+$\\n/Um"; $search[] = "/(.+)\\n==+$/Um";
$replace[] = "\n<h1>\\1</h1>\n"; $replace[] = "</p><h1>\\1</h1>\n<p>";
// Titre2 // Titre2
// ------ // ------
$search[] = "/^(.+)\\n--+$\\n/Um"; $search[] = "/(.+)\\n--+$/Um";
$replace[] = "\n<h2>\\1</h2>\n"; $replace[] = "</p><h2>\\1</h2>\n<p>";
// SEPARATORS : *** --- ___ * * * - - - _ _ _ // SEPARATORS : *** --- ___ * * * - - - _ _ _
// Must be placed before EMPHASIS // Must be placed before EMPHASIS
$search[] = "/^[*_-] ?[*_-] ?[*_-]$/Um"; $search[] = "/^[*_-] ?[*_-] ?[*_-]$/Um";
$replace[] = "\n<hr/>\n"; $replace[] = "</p><hr/>\n<p>";
// End of line with continuous on second line, without double spaces : add $markdown = preg_replace ($search, $replace, $markdown);
// space
//$search[] = "/(\w+)\\n(\w+)/Um"; $replace[] = "\\1 \\2";
$mark = preg_replace ($search, $replace, $mark);
$res = $this->paragraph ($mark); $textArray = explode ("\n", $markdown);
$pos = 0;
$res = str_replace ("\n<p></p>", "", $res); $html = $this->detectBlock ($textArray, 0, $pos);
$res = str_replace ("<p></p>", "", $res); $html = str_replace ("<p></p>", "", $html);
$res = str_replace ("<p> </p>", "", $res); $html = str_replace ("<p> </p>", "", $html);
$res = str_replace ("<p>\n</p>\n", "", $res); $html = trim ($html);
return $res; return $html;
} }
/** Translate the Markdown paragraph in HTML /** Search and replace in the paragraph on one line */
return the html */ private function searchReplace ($line)
private function paragraph ($mark)
{ {
// Think thereis already htmlentities passed on $mark !!!
$timeStart = microtime (TRUE);
$timeregex = 0;
// Initialization of convertions
$search = array (); $search = array ();
$replace = array (); $replace = array ();
// Titles short // Titles short
// == TITRE1 // == TITRE1
$search[] = "/^==+ (.+)( ==+)?$/Um"; $search[] = "/^==+ (.+)( ==+)?$/Um";
@@ -122,294 +104,212 @@ class markdown
// # Title1 // # Title1
$search[] = "/^# (.+)( +#+)?$/Um"; $search[] = "/^# (.+)( +#+)?$/Um";
$replace[] = "</p><h1>\\1</h1><p>"; $replace[] = "</p><h1>\\1</h1><p>";
// End of line with double space : <br/> return preg_replace ($search, $replace, $line);
$search[] = "/( )$/Um"; $replace[] = "<br/>"; }
// Cleanning the markdown text /** Return HTML code corresponding to the code block
$mark = str_replace ("\t", " ", $mark); @param array $text The Markdown text to translate split by \n
if (trim ($mark) === "") @param integer $depth The depth of current bloc (in number of space)
return ""; @param integer $pos The start line number of the bloc */
private function typeCode ($text, $depth, &$pos)
$spacer = " "; {
$res = ""; if ($this->debug) echo "CALL typeCode (\$text, $depth, $pos)\n";
// P, OL, UL (but not LI !) $posStart = $pos;
// Use to found the changing of types $content = "";
$typeStack = array (); // End of code block : end of markdown text / depth lighter than $depth
// Number of spaces while (isset ($text[$pos]) &&
$indentStack = array (-1); $this->depth($text[$pos]) >= $depth)
// All the HTML stack (with LI)
$htmlStack = array ();
$lines = explode ("\n", $mark);
$timeInit = microtime (TRUE) - $timeStart;
$blockLI = false;
foreach ($lines as $nb=>$line)
{ {
$this->debugMKD ("DEBUT:$line"); // The Code blocks can't be imbricated
if (substr (ltrim ($line), 0, 1) === "<") if ($pos > $posStart)
$content .= "\n";
$content .= substr ($text[$pos], $depth);
$pos++;
}
// Insert Geshi on $content
if ($this->debug)
echo "RETURN typeCode : <pre><code>$content</code></pre>\n";
return "<pre><code>$content</code></pre>\n";
}
/** Return HTML code corresponding to the OL block
@param array $text The Markdown text to translate split by \n
@param integer $depth The depth of current bloc (in number of space)
@param integer $pos The start line number of the bloc */
private function typeOL ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL typeOL (\$text, $depth, $pos)\n";
$content = $this->typeOLUL ($text, $depth, $pos, "ol");
if ($this->debug) echo "RETURN typeOL : $content\n";
return $content;
}
/** Return HTML code corresponding to the UL block
@param array $text The Markdown text to translate split by \n
@param integer $depth The depth of current bloc (in number of space)
@param integer $pos The start line number of the bloc */
private function typeUL ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL typeUL (\$text, $depth, $pos)\n";
$content = $this->typeOLUL ($text, $depth, $pos, "ul");
if ($this->debug) echo "RETURN typeUL : $content\n";
return $content;
}
private function typeOLUL ($text, $depth, &$pos, $type)
{
if ($this->debug) echo "CALL typeOLUL (\$text, $depth, $pos, $type)\n";
$content = "";
// End of OL/UL block : end of markdown text / depth lighter than $depth /
// linetype changed
$blockStart = $pos;
$blockContent = "";
while (isset ($text[$pos]) &&
$this->depth($text[$pos]) >= $depth &&
$this->lineType ($text[$pos]) === $type)
{
if ($this->debug)
echo "Start while $pos\n";
if (1)
{ {
$this->debugMKD ("HTML : Skipped"); $content .= str_repeat (" ", ($depth+2))."<li>";
$res .= $line; $blockContent .= $text[$pos];
continue; $pos++;
// Look at continuous lines
while (isset ($text[$pos]) &&
$this->lineType ($text[$pos]) !== "NONE" &&
$this->lineType ($text[$pos]) !== $type &&
$this->depth($text[$pos]) === $depth)
{
if ($this->debug)
echo "Continuous line : ".$pos."\n";
$blockContent .= " ".$text[$pos];
$pos++;
continue;
}
// Indent the li and remove the number and dot and space at start
if ($type === "ol")
preg_match ("/^( *)[0-9]+\. +(.*)/", $blockContent, $matches);
else
preg_match ("/^( *)[-+*] +(.*)/", $blockContent, $matches);
if (!isset ($matches[2]))
$lineTxt = $blockContent;
else
$lineTxt = $matches[2];
$lineTxt = $this->searchReplace ($lineTxt);
$content .= $lineTxt;
$blockStart = $pos;
$blockContent = "";
} }
$type = $this->paragraphType ($line); if (isset ($text[$pos]) && $this->depth($text[$pos]) > $depth)
$this->debugMKD ("DEBUT: Type='$type'");
$matches = array ();
$indent = strspn ($line, " ");
if (end ($typeStack) === "code" && $indent >= end ($indentStack))
{ {
$this->debugMKD ("DEBUT: Force type=CODE"); if ($this->debug)
$type = "code"; echo "Detect Block\n";
$lineTxt = $line; $content .= "\n".
$indent = end ($indentStack); $this->detectBlock ($text, $this->depth($text[$pos]), $pos).
str_repeat (" ", ($depth+2))."</li>\n";
} }
else else
{ {
switch ($type) $content .= "</li>\n";
{
case "ol" :
preg_match ("/^( *)[0-9]+\. +(.*)/", $line, $matches);
if (!isset ($matches[2]))
$lineTxt = $line;
else
$lineTxt = $matches[2];
break ;
case "ul" :
preg_match ("/^( *)[-+*] +(.*)/", $line, $matches);
if (!isset ($matches[2]))
$lineTxt = $line;
else
$lineTxt = $matches[2];
break ;
default:
$lineTxt = $line;
}
} }
$this->debugMKD ("DEBUT: Indent='$indent'");
$this->debugMKD ("DEBUT: indentStack=".print_r ($indentStack, TRUE));
$this->debugMKD ("DEBUT: typeStack=".print_r ($typeStack, TRUE));
// Spacing
if ($indent < end ($indentStack))
{
$this->debugMKD ("DEB1 : Ending of block");
if (end ($htmlStack) === "li")
{
$this->debugMKD ("Pending <Li> : closing");
$this->debugMKD ("</li>");
$res .= "</li>\n";
array_pop ($htmlStack);
}
if (strlen ($res) && in_array ($type, array ("ul","ol","p")))
{
$this->debugMKD ("Add \\n because of new block and not at start");
$res .= "\n";
}
// Remove last \n to put closing tag at the end of line
$res = substr ($res, 0, -1);
$oldType = array_pop ($typeStack);
if ($oldType === "code")
{
$this->debugMKD ("</code></pre>");
$res .= "</code></pre>";
array_pop ($htmlStack);
array_pop ($indentStack);
}
else
{
$this->debugMKD (str_repeat (" ", end ($indentStack))."</$oldType>");
$res .= str_repeat (" ", end ($indentStack))."</$oldType>";
}
array_pop ($indentStack);
array_pop ($htmlStack);
if ($type === "ol" || $type === "ul")
{
$this->debugMKD ("DEB2 : Pending <Li> : closing");
$this->debugMKD ("</li>");
$res .= "</li>";
array_pop ($htmlStack);
}
if ($type === "")
{
$this->debugMKD ("DEB3 : End of block");
array_pop ($indentStack);
$res .= "\n";
continue;
}
}
if ($indent == end ($indentStack))
{
$this->debugMKD ("DEB1 : Continuous block $type/".end ($typeStack));
if (end ($htmlStack) === "li" && $type !== "p")
{
$this->debugMKD ("Pending <Li> : closing");
$this->debugMKD ("</li>");
$res .= "</li>";
array_pop ($htmlStack);
$blockLI = false;
}
elseif (end ($htmlStack) === "li" && $type === "p")
{
$this->debugMKD ("DEB2 : Continuous LI block");
// Suppress \n on last line
// $res = substr ($res, 0, -1);
$type = prev ($htmlStack);
$blockLI = true;
$lineTxt = " ".$lineTxt;
}
if (end ($htmlStack) === "p" && $type === "p" &&
substr ($res, -5) !== "<br/>")
{
// Add a space to agregate two lines
$this->debugMKD ("DEB2 : Adding aggragate space");
$res .= " ";
}
if ($type !== end ($typeStack))
{
$this->debugMKD ("DEB2 : Continuous Block but type change");
if (end ($typeStack) !== FALSE)
{
// Remove last \n to put closing tag at the end of line
//$res = substr ($res, 0, -1);
$oldType = array_pop ($typeStack);
$this->debugMKD (str_repeat (" ", end ($indentStack)).
"</$oldType>");
$res .= str_repeat (" ", end ($indentStack))."</$oldType>";
array_pop ($indentStack);
array_pop ($htmlStack);
}
if ($type !== "")
{
$this->debugMKD (str_repeat (" ", end ($indentStack))."<$type>");
$res .= str_repeat (" ", end ($indentStack))."<$type>";
$htmlStack[] = $type;
array_push ($indentStack, $indent);
array_push ($typeStack, $type);
}
}
if (($type === "ol" || $type === "ul") && $blockLI === false)
{
$this->debugMKD ("DEB2 : Adding li");
$htmlStack[] = "li";
$this->debugMKD (str_repeat (" ", $indent)."<li>");
$res .= "\n".str_repeat (" ", $indent)."<li>";
}
}
if ($indent > end ($indentStack) && end ($typeStack) !== "code")
{
// The code indentation should not be parsed as a new code : the
// <code></code> continue until the end of paragraph
$this->debugMKD ("DEB1 : Starting a new block");
if ($type === "")
{
$this->debugMKD ("No type : skipped");
continue;
}
if (strlen ($res) && in_array ($type, array ("ul","ol","p")))
{
$this->debugMKD ("Add \\n because of new block");
$res .= "\n";
}
if (end ($indentStack))
array_pop ($indentStack);
if ($type === "code")
{
// Code need a pre before code
if (end ($typeStack))
{
$this->debugMKD ("DEB2 : CODE : Close older HTML");
$oldType = array_pop ($typeStack);
$this->debugMKD (str_repeat (" ", end ($indentStack)).
"</$oldType>");
$res .= str_repeat (" ", end ($indentStack))."</$oldType>";
array_pop ($indentStack);
array_pop ($htmlStack);
}
$typetmp = "pre><code";
$htmlStack[] = "pre";
$htmlStack[] = "code";
array_push ($typeStack, "code");
}
else
{
$typetmp = $type;
$htmlStack[] = $type;
array_push ($typeStack, $type);
}
array_push ($indentStack, $indent);
$this->debugMKD (str_repeat (" ", $indent)."<$typetmp>");
$res .= str_repeat (" ", $indent)."<$typetmp>";
if ($type === "ol" || $type === "ul")
{
$this->debugMKD ("DEB2 : Adding li");
$htmlStack[] = "li";
$this->debugMKD (str_repeat (" ", $indent)."<li>");
$res .= "\n".str_repeat (" ", $indent)."<li>";
}
}
if ($type === "" && end ($indentStack))
{
$this->debugMKD ("DEB2 : Empty type");
// Remove last \n to put closing tag at the end of line
$res = substr ($res, 0, -1);
$oldType = array_pop ($typeStack);
$this->debugMKD (str_repeat (" ", end ($indentStack))."</$oldType>");
$res .= "\n".str_repeat (" ", end ($indentStack))."</$oldType>";
array_pop ($htmlStack);
}
// If code, there is no emphasis, email, and other conversions
if ($type !== "code")
{
$timetmp = microtime (TRUE);
$lineTxt = preg_replace ($search, $replace, $lineTxt);
$timeregex += (microtime (TRUE) - $timetmp);
}
$this->debugMKD (substr ($lineTxt, end ($indentStack))."");
$res .= substr ($lineTxt, end ($indentStack))."";
if ($type === "code")
$res .= "\n";
} }
if ($this->debug) echo "RETURN typeOLUL : <$type>\n$content</$type>\n";
return "<$type>\n$content".str_repeat (" ", $depth)."</$type>\n";
$this->debugMKD ("DEB1 : End of loop"); }
$htmlStack = array_reverse ($htmlStack);
foreach ($htmlStack as $i=>$type) /** Return HTML code corresponding to the NONE block
The NONE type exists only on empty strings. Just skip the current and
empty line, and return an empty string */
private function typeNONE ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL typeNONE (\$text, $depth, $pos)\n";
$pos++;
return "";
}
/** Return HTML code corresponding to the P block
@param array $text The Markdown text to translate split by \n
@param integer $depth The depth of current bloc (in number of space)
@param integer $pos The start line number of the bloc */
private function typeP ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL typeP (\$text, $depth, $pos)\n";
$content = "";
// End of P block : end of markdown text / depth lighter than $depth /
// linetype changed
$Pinc = $pos;
while (isset ($text[$pos]) &&
$this->depth($text[$pos]) == $depth &&
$this->lineType ($text[$pos]) === "p")
{ {
if ($type === "code") if (substr ($text[$pos], -2) === " ")
$res = substr ($res, 0, -1); {
$this->debugMKD ("FIN</$type>"); // Two spaces at end of line : add <br/>
$res .= "</$type>"; $content .= $this->searchReplace (substr ($text[$pos], 0, -2)) ."<br/>";
if (($i+1) < count ($htmlStack) && $type !== "code") }
$res .= "\n"; elseif ($pos > $Pinc && substr ($content, -5) !== "<br/>")
{
// Add a space between two lines from the same block, if this is not
// the continuity of the block
$content .= " ".$this->searchReplace ($text[$pos]);
}
else
{
$content .= $this->searchReplace ($text[$pos]);
}
$pos++;
} }
if ($this->debug) echo "RETURN typeP : <p>$content</p>\n";
return "<p>$content</p>\n";
}
$this->debugMKD ("TimeInit=".($timeInit*1000)."ms"); /** Detect the type of the text and call the appropriate function *
$this->debugMKD ("TimeRegex=".($timeregex*1000)."ms"); @param array $text The Markdown text to translate split by \n
$this->debugMKD ("TimeAll=".((microtime (TRUE) - $timeStart)*1000)."ms"); @param integer $depth The depth of current bloc (in number of space)
$this->debugMKD ("-----------\n"); @param integer $pos The start line number of the bloc
return $res; @return the HTML code */
private function detectBlock ($text, $depth, &$pos)
{
if ($this->debug) echo "CALL detectBlock (\$text, $depth, $pos)\n";
$content = "";
$blockContent = "";
// detect the type and call the right type function
while (isset ($text[$pos]))
{
if ($this->depth ($text[$pos]) > $depth)
{
if ($this->debug)
echo "CALL DEPTH > MINDEPTH (".$this->depth ($text[$pos]).
" > $depth)\n";
$content .= $this->detectBlock ($text, $this->depth ($text[$pos]),
$pos);
continue;
}
elseif ($this->depth ($text[$pos]) < $depth)
{
if ($this->debug)
echo "CALL DEPTH > MINDEPTH (".$this->depth ($text[$pos]).
" < $depth)\n";
return $content;
}
$type = $this->lineType ($text[$pos]);
$func = "type$type";
if ($this->debug)
echo "FROM DETECT : CALL $func (line=".$text[$pos].")\n";
$content .= str_repeat (" ", $depth). $this->$func ($text, $depth, $pos);
}
return $content;
} }
/** Return the Type of object in the provided line /** Return the Type of object in the provided line
p, ul, ol, code */ p, ul, ol, code */
private function paragraphType ($line) private function lineType ($line)
{ {
if (! isset ($line{0})) if (! isset ($line{0}))
return ""; return "NONE";
if (preg_match ("/^[ \t]*[+*-] /", $line) === 1) if (preg_match ("/^[ \t]*[+*-] /", $line) === 1)
return "ul"; return "ul";
if (preg_match ("/^[ \t]*[0-9]+\. /", $line) === 1) if (preg_match ("/^[ \t]*[0-9]+\. /", $line) === 1)
@@ -419,13 +319,11 @@ class markdown
return "p"; return "p";
} }
/** Function to display the MarkDown debug */ /** Return the depth of the provided line
private function debugMKD ($msg) @param $line Line to analyze
@return the depth of the line */
private function depth ($line)
{ {
if ($this->debug === false) return; return strspn ($line, " ");
$trace = debug_backtrace();
$back = reset ($trace);
file_put_contents ("/tmp/debugMD", "[".$back["line"]."] $msg\n",
FILE_APPEND);
} }
} }