From 0d0a3666573461f0946b161dca1126a8e328449d Mon Sep 17 00:00:00 2001 From: Dominique Fournier Date: Tue, 9 Sep 2014 11:43:04 +0000 Subject: [PATCH] markdown : support of imbricated ul/ol entries with indentation git-svn-id: https://svn.fournier38.fr/svn/ProgSVN/trunk@1798 bf3deb0d-5f1a-0410-827f-c0cc1f45334c --- markdown.php | 530 +++++++++++++++++++++------------------------------ 1 file changed, 214 insertions(+), 316 deletions(-) diff --git a/markdown.php b/markdown.php index 4d98117..2d53dd2 100644 --- a/markdown.php +++ b/markdown.php @@ -1,25 +1,17 @@ */ - -error_reporting (E_ALL); -/** Markdown management */ class markdown { - - /** debug variable */ + /** To debug the markdown analyzer, activate the option */ public $debug = false; - /** Convert the markdown language to HTML - Return the HTML string - @param string $mark Message in markdown syntax to display */ - public function html ($mark) + private $blockid = array ("

","

","

","

","

","
", + "
"); + + /** Convert the markdown text to html */ + public function html ($markdown) { - if ($this->debug && file_exists ("/tmp/debugMD")) - unlink ("/tmp/debugMD"); - $res = ""; - $mark = htmlentities ($mark, ENT_QUOTES); + $markdown = rtrim ($markdown); + $markdown = htmlentities ($markdown); // Here are the regexp on multilines $search = array (); @@ -27,44 +19,34 @@ class markdown // Titles with underline (SeText) // Titre1 // ====== - $search[] = "/^(.+)\\n==+$\\n/Um"; - $replace[] = "\n

\\1

\n"; + $search[] = "/(.+)\\n==+$/Um"; + $replace[] = "

\\1

\n

"; // Titre2 // ------ - $search[] = "/^(.+)\\n--+$\\n/Um"; - $replace[] = "\n

\\1

\n"; + $search[] = "/(.+)\\n--+$/Um"; + $replace[] = "

\\1

\n

"; // SEPARATORS : *** --- ___ * * * - - - _ _ _ // Must be placed before EMPHASIS $search[] = "/^[*_-] ?[*_-] ?[*_-]$/Um"; - $replace[] = "\n


\n"; + $replace[] = "


\n

"; - // End of line with continuous on second line, without double spaces : add - // space - //$search[] = "/(\w+)\\n(\w+)/Um"; $replace[] = "\\1 \\2"; - $mark = preg_replace ($search, $replace, $mark); + $markdown = preg_replace ($search, $replace, $markdown); - $res = $this->paragraph ($mark); - - $res = str_replace ("\n

", "", $res); - $res = str_replace ("

", "", $res); - $res = str_replace ("

", "", $res); - $res = str_replace ("

\n

\n", "", $res); - return $res; + $textArray = explode ("\n", $markdown); + $pos = 0; + $html = $this->detectBlock ($textArray, 0, $pos); + $html = str_replace ("

", "", $html); + $html = str_replace ("

", "", $html); + $html = trim ($html); + return $html; } - /** Translate the Markdown paragraph in HTML - return the html */ - private function paragraph ($mark) + /** Search and replace in the paragraph on one line */ + private function searchReplace ($line) { - // Think thereis already htmlentities passed on $mark !!! - $timeStart = microtime (TRUE); - $timeregex = 0; - // Initialization of convertions $search = array (); $replace = array (); - - // Titles short // == TITRE1 $search[] = "/^==+ (.+)( ==+)?$/Um"; @@ -122,294 +104,212 @@ class markdown // # Title1 $search[] = "/^# (.+)( +#+)?$/Um"; $replace[] = "

\\1

"; - // End of line with double space :
- $search[] = "/( )$/Um"; $replace[] = "
"; + return preg_replace ($search, $replace, $line); + } - // Cleanning the markdown text - $mark = str_replace ("\t", " ", $mark); - if (trim ($mark) === "") - return ""; - - $spacer = " "; - $res = ""; - // P, OL, UL (but not LI !) - // Use to found the changing of types - $typeStack = array (); - // Number of spaces - $indentStack = array (-1); - // All the HTML stack (with LI) - $htmlStack = array (); - $lines = explode ("\n", $mark); - $timeInit = microtime (TRUE) - $timeStart; - $blockLI = false; - foreach ($lines as $nb=>$line) + /** Return HTML code corresponding to the code block + @param array $text The Markdown text to translate split by \n + @param integer $depth The depth of current bloc (in number of space) + @param integer $pos The start line number of the bloc */ + private function typeCode ($text, $depth, &$pos) + { + if ($this->debug) echo "CALL typeCode (\$text, $depth, $pos)\n"; + $posStart = $pos; + $content = ""; + // End of code block : end of markdown text / depth lighter than $depth + while (isset ($text[$pos]) && + $this->depth($text[$pos]) >= $depth) { - $this->debugMKD ("DEBUT:$line"); - if (substr (ltrim ($line), 0, 1) === "<") + // The Code blocks can't be imbricated + if ($pos > $posStart) + $content .= "\n"; + $content .= substr ($text[$pos], $depth); + $pos++; + } + // Insert Geshi on $content + if ($this->debug) + echo "RETURN typeCode :

$content
\n"; + return "
$content
\n"; + } + + /** Return HTML code corresponding to the OL block + @param array $text The Markdown text to translate split by \n + @param integer $depth The depth of current bloc (in number of space) + @param integer $pos The start line number of the bloc */ + private function typeOL ($text, $depth, &$pos) + { + if ($this->debug) echo "CALL typeOL (\$text, $depth, $pos)\n"; + $content = $this->typeOLUL ($text, $depth, $pos, "ol"); + if ($this->debug) echo "RETURN typeOL : $content\n"; + return $content; + } + + /** Return HTML code corresponding to the UL block + @param array $text The Markdown text to translate split by \n + @param integer $depth The depth of current bloc (in number of space) + @param integer $pos The start line number of the bloc */ + private function typeUL ($text, $depth, &$pos) + { + if ($this->debug) echo "CALL typeUL (\$text, $depth, $pos)\n"; + $content = $this->typeOLUL ($text, $depth, $pos, "ul"); + if ($this->debug) echo "RETURN typeUL : $content\n"; + return $content; + } + + private function typeOLUL ($text, $depth, &$pos, $type) + { + if ($this->debug) echo "CALL typeOLUL (\$text, $depth, $pos, $type)\n"; + $content = ""; + // End of OL/UL block : end of markdown text / depth lighter than $depth / + // linetype changed + $blockStart = $pos; + $blockContent = ""; + while (isset ($text[$pos]) && + $this->depth($text[$pos]) >= $depth && + $this->lineType ($text[$pos]) === $type) + { + if ($this->debug) + echo "Start while $pos\n"; + if (1) { - $this->debugMKD ("HTML : Skipped"); - $res .= $line; - continue; + $content .= str_repeat (" ", ($depth+2))."
  • "; + $blockContent .= $text[$pos]; + $pos++; + // Look at continuous lines + while (isset ($text[$pos]) && + $this->lineType ($text[$pos]) !== "NONE" && + $this->lineType ($text[$pos]) !== $type && + $this->depth($text[$pos]) === $depth) + { + if ($this->debug) + echo "Continuous line : ".$pos."\n"; + $blockContent .= " ".$text[$pos]; + $pos++; + continue; + } + // Indent the li and remove the number and dot and space at start + if ($type === "ol") + preg_match ("/^( *)[0-9]+\. +(.*)/", $blockContent, $matches); + else + preg_match ("/^( *)[-+*] +(.*)/", $blockContent, $matches); + if (!isset ($matches[2])) + $lineTxt = $blockContent; + else + $lineTxt = $matches[2]; + $lineTxt = $this->searchReplace ($lineTxt); + $content .= $lineTxt; + $blockStart = $pos; + $blockContent = ""; } - $type = $this->paragraphType ($line); - $this->debugMKD ("DEBUT: Type='$type'"); - $matches = array (); - $indent = strspn ($line, " "); - if (end ($typeStack) === "code" && $indent >= end ($indentStack)) + if (isset ($text[$pos]) && $this->depth($text[$pos]) > $depth) { - $this->debugMKD ("DEBUT: Force type=CODE"); - $type = "code"; - $lineTxt = $line; - $indent = end ($indentStack); + if ($this->debug) + echo "Detect Block\n"; + $content .= "\n". + $this->detectBlock ($text, $this->depth($text[$pos]), $pos). + str_repeat (" ", ($depth+2))."
  • \n"; } else { - switch ($type) - { - case "ol" : - preg_match ("/^( *)[0-9]+\. +(.*)/", $line, $matches); - if (!isset ($matches[2])) - $lineTxt = $line; - else - $lineTxt = $matches[2]; - break ; - case "ul" : - preg_match ("/^( *)[-+*] +(.*)/", $line, $matches); - if (!isset ($matches[2])) - $lineTxt = $line; - else - $lineTxt = $matches[2]; - break ; - default: - $lineTxt = $line; - } + $content .= "\n"; } - - $this->debugMKD ("DEBUT: Indent='$indent'"); - $this->debugMKD ("DEBUT: indentStack=".print_r ($indentStack, TRUE)); - $this->debugMKD ("DEBUT: typeStack=".print_r ($typeStack, TRUE)); - - // Spacing - if ($indent < end ($indentStack)) - { - $this->debugMKD ("DEB1 : Ending of block"); - if (end ($htmlStack) === "li") - { - $this->debugMKD ("Pending
  • : closing"); - $this->debugMKD ("
  • "); - $res .= "\n"; - array_pop ($htmlStack); - } - if (strlen ($res) && in_array ($type, array ("ul","ol","p"))) - { - $this->debugMKD ("Add \\n because of new block and not at start"); - $res .= "\n"; - } - - - // Remove last \n to put closing tag at the end of line - $res = substr ($res, 0, -1); - $oldType = array_pop ($typeStack); - if ($oldType === "code") - { - $this->debugMKD (""); - $res .= ""; - array_pop ($htmlStack); - array_pop ($indentStack); - } - else - { - $this->debugMKD (str_repeat (" ", end ($indentStack)).""); - $res .= str_repeat (" ", end ($indentStack)).""; - } - array_pop ($indentStack); - array_pop ($htmlStack); - if ($type === "ol" || $type === "ul") - { - $this->debugMKD ("DEB2 : Pending
  • : closing"); - $this->debugMKD ("
  • "); - $res .= ""; - array_pop ($htmlStack); - } - - if ($type === "") - { - $this->debugMKD ("DEB3 : End of block"); - array_pop ($indentStack); - $res .= "\n"; - continue; - } - } - - if ($indent == end ($indentStack)) - { - $this->debugMKD ("DEB1 : Continuous block $type/".end ($typeStack)); - if (end ($htmlStack) === "li" && $type !== "p") - { - $this->debugMKD ("Pending
  • : closing"); - $this->debugMKD ("
  • "); - $res .= ""; - array_pop ($htmlStack); - $blockLI = false; - } - elseif (end ($htmlStack) === "li" && $type === "p") - { - $this->debugMKD ("DEB2 : Continuous LI block"); - // Suppress \n on last line -// $res = substr ($res, 0, -1); - $type = prev ($htmlStack); - $blockLI = true; - $lineTxt = " ".$lineTxt; - } - - if (end ($htmlStack) === "p" && $type === "p" && - substr ($res, -5) !== "
    ") - { - // Add a space to agregate two lines - $this->debugMKD ("DEB2 : Adding aggragate space"); - $res .= " "; - } - - if ($type !== end ($typeStack)) - { - $this->debugMKD ("DEB2 : Continuous Block but type change"); - if (end ($typeStack) !== FALSE) - { - // Remove last \n to put closing tag at the end of line - //$res = substr ($res, 0, -1); - $oldType = array_pop ($typeStack); - $this->debugMKD (str_repeat (" ", end ($indentStack)). - ""); - $res .= str_repeat (" ", end ($indentStack)).""; - array_pop ($indentStack); - array_pop ($htmlStack); - } - - if ($type !== "") - { - $this->debugMKD (str_repeat (" ", end ($indentStack))."<$type>"); - $res .= str_repeat (" ", end ($indentStack))."<$type>"; - $htmlStack[] = $type; - array_push ($indentStack, $indent); - array_push ($typeStack, $type); - } - } - - if (($type === "ol" || $type === "ul") && $blockLI === false) - { - $this->debugMKD ("DEB2 : Adding li"); - $htmlStack[] = "li"; - $this->debugMKD (str_repeat (" ", $indent)."
  • "); - $res .= "\n".str_repeat (" ", $indent)."
  • "; - } - - } - - if ($indent > end ($indentStack) && end ($typeStack) !== "code") - { - // The code indentation should not be parsed as a new code : the - // continue until the end of paragraph - $this->debugMKD ("DEB1 : Starting a new block"); - if ($type === "") - { - $this->debugMKD ("No type : skipped"); - continue; - } - - if (strlen ($res) && in_array ($type, array ("ul","ol","p"))) - { - $this->debugMKD ("Add \\n because of new block"); - $res .= "\n"; - } - - if (end ($indentStack)) - array_pop ($indentStack); - if ($type === "code") - { - // Code need a pre before code - if (end ($typeStack)) - { - $this->debugMKD ("DEB2 : CODE : Close older HTML"); - $oldType = array_pop ($typeStack); - $this->debugMKD (str_repeat (" ", end ($indentStack)). - ""); - $res .= str_repeat (" ", end ($indentStack)).""; - array_pop ($indentStack); - array_pop ($htmlStack); - } - $typetmp = "pre>debugMKD (str_repeat (" ", $indent)."<$typetmp>"); - $res .= str_repeat (" ", $indent)."<$typetmp>"; - if ($type === "ol" || $type === "ul") - { - $this->debugMKD ("DEB2 : Adding li"); - $htmlStack[] = "li"; - $this->debugMKD (str_repeat (" ", $indent)."
  • "); - $res .= "\n".str_repeat (" ", $indent)."
  • "; - } - } - - if ($type === "" && end ($indentStack)) - { - $this->debugMKD ("DEB2 : Empty type"); - // Remove last \n to put closing tag at the end of line - $res = substr ($res, 0, -1); - $oldType = array_pop ($typeStack); - $this->debugMKD (str_repeat (" ", end ($indentStack)).""); - $res .= "\n".str_repeat (" ", end ($indentStack)).""; - array_pop ($htmlStack); - } - - // If code, there is no emphasis, email, and other conversions - if ($type !== "code") - { - $timetmp = microtime (TRUE); - $lineTxt = preg_replace ($search, $replace, $lineTxt); - $timeregex += (microtime (TRUE) - $timetmp); - } - - $this->debugMKD (substr ($lineTxt, end ($indentStack)).""); - $res .= substr ($lineTxt, end ($indentStack)).""; - if ($type === "code") - $res .= "\n"; } + if ($this->debug) echo "RETURN typeOLUL : <$type>\n$content\n"; + return "<$type>\n$content".str_repeat (" ", $depth)."\n"; - $this->debugMKD ("DEB1 : End of loop"); - $htmlStack = array_reverse ($htmlStack); - foreach ($htmlStack as $i=>$type) + } + + /** Return HTML code corresponding to the NONE block + The NONE type exists only on empty strings. Just skip the current and + empty line, and return an empty string */ + private function typeNONE ($text, $depth, &$pos) + { + if ($this->debug) echo "CALL typeNONE (\$text, $depth, $pos)\n"; + $pos++; + return ""; + } + + /** Return HTML code corresponding to the P block + @param array $text The Markdown text to translate split by \n + @param integer $depth The depth of current bloc (in number of space) + @param integer $pos The start line number of the bloc */ + private function typeP ($text, $depth, &$pos) + { + if ($this->debug) echo "CALL typeP (\$text, $depth, $pos)\n"; + $content = ""; + // End of P block : end of markdown text / depth lighter than $depth / + // linetype changed + $Pinc = $pos; + while (isset ($text[$pos]) && + $this->depth($text[$pos]) == $depth && + $this->lineType ($text[$pos]) === "p") { - if ($type === "code") - $res = substr ($res, 0, -1); - $this->debugMKD ("FIN"); - $res .= ""; - if (($i+1) < count ($htmlStack) && $type !== "code") - $res .= "\n"; + if (substr ($text[$pos], -2) === " ") + { + // Two spaces at end of line : add
    + $content .= $this->searchReplace (substr ($text[$pos], 0, -2)) ."
    "; + } + elseif ($pos > $Pinc && substr ($content, -5) !== "
    ") + { + // Add a space between two lines from the same block, if this is not + // the continuity of the block + $content .= " ".$this->searchReplace ($text[$pos]); + } + else + { + $content .= $this->searchReplace ($text[$pos]); + } + $pos++; } + if ($this->debug) echo "RETURN typeP :

    $content

    \n"; + return "

    $content

    \n"; + } - $this->debugMKD ("TimeInit=".($timeInit*1000)."ms"); - $this->debugMKD ("TimeRegex=".($timeregex*1000)."ms"); - $this->debugMKD ("TimeAll=".((microtime (TRUE) - $timeStart)*1000)."ms"); - $this->debugMKD ("-----------\n"); - return $res; + /** Detect the type of the text and call the appropriate function * + @param array $text The Markdown text to translate split by \n + @param integer $depth The depth of current bloc (in number of space) + @param integer $pos The start line number of the bloc + @return the HTML code */ + private function detectBlock ($text, $depth, &$pos) + { + if ($this->debug) echo "CALL detectBlock (\$text, $depth, $pos)\n"; + $content = ""; + $blockContent = ""; + // detect the type and call the right type function + while (isset ($text[$pos])) + { + if ($this->depth ($text[$pos]) > $depth) + { + if ($this->debug) + echo "CALL DEPTH > MINDEPTH (".$this->depth ($text[$pos]). + " > $depth)\n"; + $content .= $this->detectBlock ($text, $this->depth ($text[$pos]), + $pos); + continue; + } + elseif ($this->depth ($text[$pos]) < $depth) + { + if ($this->debug) + echo "CALL DEPTH > MINDEPTH (".$this->depth ($text[$pos]). + " < $depth)\n"; + return $content; + } + + $type = $this->lineType ($text[$pos]); + $func = "type$type"; + if ($this->debug) + echo "FROM DETECT : CALL $func (line=".$text[$pos].")\n"; + $content .= str_repeat (" ", $depth). $this->$func ($text, $depth, $pos); + } + return $content; } /** Return the Type of object in the provided line p, ul, ol, code */ - private function paragraphType ($line) + private function lineType ($line) { if (! isset ($line{0})) - return ""; + return "NONE"; if (preg_match ("/^[ \t]*[+*-] /", $line) === 1) return "ul"; if (preg_match ("/^[ \t]*[0-9]+\. /", $line) === 1) @@ -419,13 +319,11 @@ class markdown return "p"; } - /** Function to display the MarkDown debug */ - private function debugMKD ($msg) + /** Return the depth of the provided line + @param $line Line to analyze + @return the depth of the line */ + private function depth ($line) { - if ($this->debug === false) return; - $trace = debug_backtrace(); - $back = reset ($trace); - file_put_contents ("/tmp/debugMD", "[".$back["line"]."] $msg\n", - FILE_APPEND); + return strspn ($line, " "); } }