* @license BSD */ namespace Domframework; /** * Convert the Markdown text to html format */ class Markdown { /** * To debug the markdown analyzer, activate the option */ public $debug = false; /** * The list of the HTML elements used by block */ private $blockid = ["

", "

", "

", "

", "

", "
", "
"]; /** * Convert the markdown text to html * @param string $markdown The markdown to convert */ public function html($markdown) { $markdown = rtrim($markdown); $markdown = htmlentities($markdown); // Here are the regexp on multilines $search = []; $replace = []; // Titles with underline (SeText) // Titre1 // ====== $search[] = "/(.+)\\n==+$/Um"; $replace[] = "

\\1

\n

"; // Titre2 // ------ $search[] = "/(.+)\\n--+$/Um"; $replace[] = "

\\1

\n

"; // SEPARATORS : *** --- ___ * * * - - - _ _ _ // Must be placed before EMPHASIS $search[] = "/^[*_-] ?[*_-] ?[*_-]$/Um"; $replace[] = "


\n

"; $markdown = preg_replace($search, $replace, $markdown); $textArray = explode("\n", $markdown); $pos = 0; $html = $this->detectBlock($textArray, 0, $pos); $html = str_replace("

", "", $html); $html = str_replace("

", "", $html); $html = trim($html); return $html; } /** * Search and replace in the paragraph on one line * @param string $line The line to analyze */ private function searchReplace($line) { if ($this->debug) { echo "CALL searchReplace ($line)\n"; } // REMEMBER : THE $line is already in HTML ENTITIES ! // Quotes : " $res = $line; // Manage the
separators $search = ["***", "---", "___", "* * *", "- - -", "_ _ _"]; foreach ($search as $key => $pattern) { $start = 0; while (1) { $start = strpos($res, $pattern, $start); if ($start === false) { break; } if ($res[$start + 1] === $pattern) { // Pattern too long, not this test : skip it $start += strlen($pattern) + 1; continue; } if ($start > 1 && $res[$start - 1] === "\\") { // Search the ending pattern to skip it. Remove the backslash $res = substr($res, 0, $start - 1) . substr($res, $start); } else { $res = substr($res, 0, $start) . "
" . substr($res, $start + strlen($pattern)); } } } // Manage the emphasis and code correctely with the backslash $search = []; $replace = []; $search[] = "__"; $replace[] = "\\1"; $search[] = "_"; $replace[] = "\\1"; $search[] = "**"; $replace[] = "\\1"; $search[] = "*"; $replace[] = "\\1"; $search[] = "`"; $replace[] = "\\1"; foreach ($search as $key => $pattern) { $start = 0; while (1) { $start = strpos($res, $pattern, $start); if ($start === false) { break; } $end = strpos($res, $pattern, $start + strlen($pattern)); if ($end === false) { break; } if ($res[$start + 1] === $pattern) { // Pattern too long, not this test : skip it $start += strlen($pattern) + strspn($res, $pattern, $start + 1); continue; } if ($start > 1 && $res[$start - 1] === "\\") { // Search the ending pattern to skip it. Remove the backslash $res = substr($res, 0, $start - 1) . substr($res, $start); } else { // It is the real pattern found, without backslash. Replace by the // $replace value $content = substr( $res, $start + strlen($pattern), $end - $start - strlen($pattern) ); if (trim($content) !== "") { $first = substr($replace[$key], 0, strpos($replace[$key], "\\1")); $second = substr($replace[$key], strpos($replace[$key], "\\1") + 2); $res = substr($res, 0, $start) . $first . $content . $second . substr($res, $end + strlen($pattern)); } } $start = $end + strlen($pattern); } } // Manage the others cases $search = []; $replace = []; // Titles short // == TITRE1 $search[] = '~^([^\\\\]|^)(==+ (.+)( ==+)?)$~Um'; $replace[] = '

' . "\n" . '

\3

' . "\n" . '

'; // -- TITRE2 $search[] = '~^([^\\\\]|^)(--+ (.+)( --+)?)$~Um'; $replace[] = '

\n

\3

\n

'; // LINKS (can be relative) // images $search[] = '~([^\\\\]|^)(!\[(.+)\]\((.+)\))~'; $replace[] = '\1\'\3\'/'; // [Google Site](http://google.fr/ "With help bubble") $search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+) "(.+)"\))~'; $replace[] = '\1\3'; // [Google Site](http://google.fr/) $search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+)\))~U'; $replace[] = '\1\3'; // Automatics links : // // $search[] = '~([^\\\\]|^)(<(https?://.+)>)~U'; $replace[] = '\1\3'; $search[] = '~([^\\\\]|^)(<(.+@.+)>)~U'; $replace[] = '\1\3'; // The links must not allow the : redo the conversion $search[] = '~((.*)(.*\'>.*)(.*)(.*)~'; $replace[] = '\1_\2_\3_\4_\5'; // TODO : Links by reference : // Voici un petit texte écrit par [Michel Fortin][mf]. // [mf]: http://michelf.ca/ "Mon site web" // TITLES // Titles ATX (Optionnal sharp at the end) // ###### Title6 $search[] = '~^([^\\\\]|^)?(###### (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // ##### Title5 $search[] = '~^([^\\\\]|^)?(##### (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // #### Title4 $search[] = '~^([^\\\\]|^)?(#### (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // ### Title3 $search[] = '~^([^\\\\]|^)?(### (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // ## Title2 $search[] = '~^([^\\\\]|^)?(## (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // # Title1 $search[] = '~^([^\\\\]|^)?(# (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // Remove the backslashes on the existing regex foreach ($search as $s) { $s = str_replace('([^\\\\]|^)?', '([\\\\])', $s); $s = str_replace('([^\\\\]|^)', '([\\\\])', $s); $s = str_replace('([^\\\\!]|^)', '([\\\\])', $s); $s = str_replace('([^\\\\*]|^)', '([\\\\])', $s); $s = str_replace('([^\\\\_]|^)', '([\\\\])', $s); $search[] = $s; $replace[] = '\2'; } /*foreach ($search as $key=>$s) { echo "$key => $s\n"; $res = preg_replace ($s, $replace[$key], $res); echo "$res\n"; } */ $res = preg_replace($search, $replace, $res); return $res; } /** * Return HTML code corresponding to the code block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeCode($text, $depth, &$pos) { if ($this->debug) { echo "CALL typeCode (\$text, $depth, $pos)\n"; } $posStart = $pos; $content = ""; // End of code block : end of markdown text / depth lighter than $depth while ( isset($text[$pos]) && $this->depth($text[$pos]) >= $depth ) { // The Code blocks can't be imbricated if ($pos > $posStart) { $content .= "\n"; } $content .= substr($text[$pos], $depth); $pos++; } // Insert Geshi on $content if ($this->debug) { echo "RETURN typeCode :

$content
\n"; } return "
$content
\n"; } /** * Return HTML code corresponding to the OL block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeOL($text, $depth, &$pos) { if ($this->debug) { echo "CALL typeOL (\$text, $depth, $pos)\n"; } $content = $this->typeOLUL($text, $depth, $pos, "ol"); if ($this->debug) { echo "RETURN typeOL : $content\n"; } return $content; } /** * Return HTML code corresponding to the UL block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeUL($text, $depth, &$pos) { if ($this->debug) { echo "CALL typeUL (\$text, $depth, $pos)\n"; } $content = $this->typeOLUL($text, $depth, $pos, "ul"); if ($this->debug) { echo "RETURN typeUL : $content\n"; } return $content; } /** * Return the HTML code corresponding to the OL/UL block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc * @param string $type The block type : "ul" or "ol" */ private function typeOLUL($text, $depth, &$pos, $type) { if ($this->debug) { echo "CALL typeOLUL (\$text, $depth, $pos, $type)\n"; } $content = ""; // End of OL/UL block : end of markdown text / depth lighter than $depth / // linetype changed $blockStart = $pos; $blockContent = ""; while ( isset($text[$pos]) && $this->depth($text[$pos]) >= $depth && $this->lineType($text[$pos]) === $type ) { if ($this->debug) { echo "Start while $pos\n"; } if (1) { $content .= str_repeat(" ", ($depth + 2)) . "
  • "; $blockContent .= $text[$pos]; $pos++; // Look at continuous lines while ( isset($text[$pos]) && $this->lineType($text[$pos]) !== "NONE" && $this->lineType($text[$pos]) !== $type && $this->depth($text[$pos]) === $depth ) { if ($this->debug) { echo "Continuous line : " . $pos . "\n"; } $blockContent .= " " . $text[$pos]; $pos++; continue; } // Indent the li and remove the number and dot and space at start if ($type === "ol") { preg_match("/^( *)[0-9]+\. +(.*)/", $blockContent, $matches); } else { preg_match("/^( *)[-+*] +(.*)/", $blockContent, $matches); } if (!isset($matches[2])) { $lineTxt = $blockContent; } else { $lineTxt = $matches[2]; } $lineTxt = $this->searchReplace($lineTxt); $content .= $lineTxt; $blockStart = $pos; $blockContent = ""; } if (isset($text[$pos]) && $this->depth($text[$pos]) > $depth) { if ($this->debug) { echo "Detect Block\n"; } $content .= "\n" . $this->detectBlock($text, $this->depth($text[$pos]), $pos) . str_repeat(" ", ($depth + 2)) . "
  • \n"; } else { $content .= "\n"; } } if ($this->debug) { echo "RETURN typeOLUL : <$type>\n$content\n"; } return "<$type>\n$content" . str_repeat(" ", $depth) . "\n"; } /** * Return HTML code corresponding to the NONE block * The NONE type exists only on empty strings. Just skip the current and * empty line, and return an empty string * @param string $text The Markdown text to translate split by \n * @param integer $depth The depth of the current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeNONE($text, $depth, &$pos) { if ($this->debug) { echo "CALL typeNONE (\$text, $depth, $pos)\n"; } $pos++; return ""; } /** * Return HTML code corresponding to the P block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeP($text, $depth, &$pos) { if ($this->debug) { echo "CALL typeP (\$text, $depth, $pos)\n"; } $content = ""; // End of P block : end of markdown text / depth lighter than $depth / // linetype changed $Pinc = $pos; while ( isset($text[$pos]) && $this->depth($text[$pos]) == $depth && $this->lineType($text[$pos]) === "p" ) { if (substr($text[$pos], -2) === " ") { // Two spaces at end of line : add
    $content .= $this->searchReplace(substr($text[$pos], 0, -2)) . "
    "; } elseif ($pos > $Pinc && substr($content, -5) !== "
    ") { // Add a space between two lines from the same block, if this is not // the continuity of the block $content .= " " . $this->searchReplace($text[$pos]); } else { $content .= $this->searchReplace($text[$pos]); } $pos++; } if ($this->debug) { echo "RETURN typeP :

    $content

    \n"; } return "

    $content

    \n"; } /** * Detect the type of the text and call the appropriate function * * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc * @return the HTML code */ private function detectBlock($text, $depth, &$pos) { if ($this->debug) { echo "CALL detectBlock (\$text, $depth, $pos)\n"; } $content = ""; $blockContent = ""; // detect the type and call the right type function while (isset($text[$pos])) { if ($this->depth($text[$pos]) > $depth && $depth === 0) { // New block code if ($this->debug) { echo "New block code\n"; } $content .= $this->typeCode($text, $this->depth($text[$pos]), $pos); continue; } elseif ($this->depth($text[$pos]) > $depth) { if ($this->debug) { echo "CALL DEPTH > MINDEPTH (" . $this->depth($text[$pos]) . " > $depth)\n"; } $content .= $this->detectBlock( $text, $this->depth($text[$pos]), $pos ); continue; } elseif ($this->depth($text[$pos]) < $depth) { if ($this->debug) { echo "CALL DEPTH > MINDEPTH (" . $this->depth($text[$pos]) . " < $depth)\n"; } return $content; } $type = $this->lineType($text[$pos]); $func = "type$type"; if ($this->debug) { echo "FROM DETECT : CALL $func (line=" . $text[$pos] . ")\n"; } $content .= str_repeat(" ", $depth) . $this->$func($text, $depth, $pos); } return $content; } /** * Return the Type of object in the provided line * p, ul, ol, code * @param string $line The line to get the type */ private function lineType($line) { if (! isset($line[0])) { return "NONE"; } if (preg_match("/^[ \t]*[+*-] /", $line) === 1) { return "ul"; } if (preg_match("/^[ \t]*[0-9]+\. /", $line) === 1) { return "ol"; } if (preg_match("/^( |\t)+/", $line) === 1) { return "code"; } return "p"; } /** * Return the depth of the provided line * @param string $line Line to analyze * @return the depth of the line */ private function depth($line) { return strspn($line, " "); } }