* @license BSD */ //namespace Domframework; /** Convert the Markdown text to html format */ class markdown { /** To debug the markdown analyzer, activate the option */ public $debug = false; /** The list of the HTML elements used by block */ private $blockid = array ("

", "

", "

", "

", "

", "
", "
"); /** Convert the markdown text to html * @param string $markdown The markdown to convert */ public function html ($markdown) { $markdown = rtrim ($markdown); $markdown = htmlentities ($markdown); // Here are the regexp on multilines $search = array (); $replace = array (); // Titles with underline (SeText) // Titre1 // ====== $search[] = "/(.+)\\n==+$/Um"; $replace[] = "

\\1

\n

"; // Titre2 // ------ $search[] = "/(.+)\\n--+$/Um"; $replace[] = "

\\1

\n

"; // SEPARATORS : *** --- ___ * * * - - - _ _ _ // Must be placed before EMPHASIS $search[] = "/^[*_-] ?[*_-] ?[*_-]$/Um"; $replace[] = "


\n

"; $markdown = preg_replace ($search, $replace, $markdown); $textArray = explode ("\n", $markdown); $pos = 0; $html = $this->detectBlock ($textArray, 0, $pos); $html = str_replace ("

", "", $html); $html = str_replace ("

", "", $html); $html = trim ($html); return $html; } /** Search and replace in the paragraph on one line * @param string $line The line to analyze */ private function searchReplace ($line) { if ($this->debug) echo "CALL searchReplace ($line)\n"; // REMEMBER : THE $line is already in HTML ENTITIES ! // Quotes : " $res = $line; // Manage the
separators $search = array ("***", "---", "___", "* * *", "- - -", "_ _ _"); foreach ($search as $key=>$pattern) { $start = 0; while (1) { $start = strpos ($res, $pattern, $start); if ($start === false) break; if ($res[$start+1] === $pattern) { // Pattern too long, not this test : skip it $start += strlen ($pattern) + 1; continue; } if ($start > 1 && $res[$start-1] === "\\") { // Search the ending pattern to skip it. Remove the backslash $res = substr ($res, 0, $start - 1) . substr ($res, $start); } else { $res = substr ($res, 0, $start) . "
" . substr ($res, $start+strlen ($pattern)); } } } // Manage the emphasis and code correctely with the backslash $search = array (); $replace = array (); $search[] = "__"; $replace[] = "\\1"; $search[] = "_"; $replace[] = "\\1"; $search[] = "**"; $replace[] = "\\1"; $search[] = "*"; $replace[] = "\\1"; $search[] = "`"; $replace[] = "\\1"; foreach ($search as $key=>$pattern) { $start = 0; while (1) { $start = strpos ($res, $pattern, $start); if ($start === false) break; $end = strpos ($res, $pattern, $start + strlen ($pattern)); if ($end === false) break; if ($res[$start+1] === $pattern) { // Pattern too long, not this test : skip it $start += strlen ($pattern) + strspn ($res, $pattern, $start+1); continue; } if ($start > 1 && $res[$start-1] === "\\") { // Search the ending pattern to skip it. Remove the backslash $res = substr ($res, 0, $start - 1) . substr ($res, $start); } else { // It is the real pattern found, without backslash. Replace by the // $replace value $content = substr ($res, $start + strlen ($pattern), $end - $start - strlen ($pattern)); if (trim ($content) !== "") { $first = substr ($replace[$key], 0, strpos ($replace[$key], "\\1")); $second = substr ($replace[$key], strpos ($replace[$key], "\\1")+2); $res = substr ($res, 0, $start).$first.$content.$second. substr ($res, $end + strlen ($pattern)); } } $start = $end + strlen ($pattern); } } // Manage the others cases $search = array (); $replace = array (); // Titles short // == TITRE1 $search[] = '~^([^\\\\]|^)(==+ (.+)( ==+)?)$~Um'; $replace[] = '

'."\n".'

\3

'."\n".'

'; // -- TITRE2 $search[] = '~^([^\\\\]|^)(--+ (.+)( --+)?)$~Um'; $replace[] = '

\n

\3

\n

'; // LINKS (can be relative) // images $search[] = '~([^\\\\]|^)(!\[(.+)\]\((.+)\))~'; $replace[] = '\1\'\3\'/'; // [Google Site](http://google.fr/ "With help bubble") $search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+) "(.+)"\))~'; $replace[] = '\1\3'; // [Google Site](http://google.fr/) $search[] = '~([^\\\\!]|^)(\[(.+)\]\((.+)\))~U'; $replace[] = '\1\3'; // Automatics links : // // $search[] = '~([^\\\\]|^)(<(https?://.+)>)~U'; $replace[] = '\1\3'; $search[] = '~([^\\\\]|^)(<(.+@.+)>)~U'; $replace[] = '\1\3'; // The links must not allow the : redo the conversion $search[] = '~((.*)(.*\'>.*)(.*)(.*)~'; $replace[] = '\1_\2_\3_\4_\5'; // TODO : Links by reference : // Voici un petit texte écrit par [Michel Fortin][mf]. // [mf]: http://michelf.ca/ "Mon site web" // TITLES // Titles ATX (Optionnal sharp at the end) // ###### Title6 $search[] = '~^([^\\\\]|^)?(###### (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // ##### Title5 $search[] = '~^([^\\\\]|^)?(##### (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // #### Title4 $search[] = '~^([^\\\\]|^)?(#### (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // ### Title3 $search[] = '~^([^\\\\]|^)?(### (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // ## Title2 $search[] = '~^([^\\\\]|^)?(## (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // # Title1 $search[] = '~^([^\\\\]|^)?(# (.+)( +#+)?)$~Um'; $replace[] = '

\3

'; // Remove the backslashes on the existing regex foreach ($search as $s) { $s = str_replace ('([^\\\\]|^)?', '([\\\\])', $s); $s = str_replace ('([^\\\\]|^)', '([\\\\])', $s); $s = str_replace ('([^\\\\!]|^)', '([\\\\])', $s); $s = str_replace ('([^\\\\*]|^)', '([\\\\])', $s); $s = str_replace ('([^\\\\_]|^)', '([\\\\])', $s); $search[] = $s; $replace[] = '\2'; } /*foreach ($search as $key=>$s) { echo "$key => $s\n"; $res = preg_replace ($s, $replace[$key], $res); echo "$res\n"; }*/ $res = preg_replace ($search, $replace, $res); return $res; } /** Return HTML code corresponding to the code block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeCode ($text, $depth, &$pos) { if ($this->debug) echo "CALL typeCode (\$text, $depth, $pos)\n"; $posStart = $pos; $content = ""; // End of code block : end of markdown text / depth lighter than $depth while (isset ($text[$pos]) && $this->depth($text[$pos]) >= $depth) { // The Code blocks can't be imbricated if ($pos > $posStart) $content .= "\n"; $content .= substr ($text[$pos], $depth); $pos++; } // Insert Geshi on $content if ($this->debug) echo "RETURN typeCode :

$content
\n"; return "
$content
\n"; } /** Return HTML code corresponding to the OL block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeOL ($text, $depth, &$pos) { if ($this->debug) echo "CALL typeOL (\$text, $depth, $pos)\n"; $content = $this->typeOLUL ($text, $depth, $pos, "ol"); if ($this->debug) echo "RETURN typeOL : $content\n"; return $content; } /** Return HTML code corresponding to the UL block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeUL ($text, $depth, &$pos) { if ($this->debug) echo "CALL typeUL (\$text, $depth, $pos)\n"; $content = $this->typeOLUL ($text, $depth, $pos, "ul"); if ($this->debug) echo "RETURN typeUL : $content\n"; return $content; } /** Return the HTML code corresponding to the OL/UL block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc * @param string $type The block type : "ul" or "ol" */ private function typeOLUL ($text, $depth, &$pos, $type) { if ($this->debug) echo "CALL typeOLUL (\$text, $depth, $pos, $type)\n"; $content = ""; // End of OL/UL block : end of markdown text / depth lighter than $depth / // linetype changed $blockStart = $pos; $blockContent = ""; while (isset ($text[$pos]) && $this->depth($text[$pos]) >= $depth && $this->lineType ($text[$pos]) === $type) { if ($this->debug) echo "Start while $pos\n"; if (1) { $content .= str_repeat (" ", ($depth+2))."
  • "; $blockContent .= $text[$pos]; $pos++; // Look at continuous lines while (isset ($text[$pos]) && $this->lineType ($text[$pos]) !== "NONE" && $this->lineType ($text[$pos]) !== $type && $this->depth($text[$pos]) === $depth) { if ($this->debug) echo "Continuous line : ".$pos."\n"; $blockContent .= " ".$text[$pos]; $pos++; continue; } // Indent the li and remove the number and dot and space at start if ($type === "ol") preg_match ("/^( *)[0-9]+\. +(.*)/", $blockContent, $matches); else preg_match ("/^( *)[-+*] +(.*)/", $blockContent, $matches); if (!isset ($matches[2])) $lineTxt = $blockContent; else $lineTxt = $matches[2]; $lineTxt = $this->searchReplace ($lineTxt); $content .= $lineTxt; $blockStart = $pos; $blockContent = ""; } if (isset ($text[$pos]) && $this->depth($text[$pos]) > $depth) { if ($this->debug) echo "Detect Block\n"; $content .= "\n". $this->detectBlock ($text, $this->depth($text[$pos]), $pos). str_repeat (" ", ($depth+2))."
  • \n"; } else { $content .= "\n"; } } if ($this->debug) echo "RETURN typeOLUL : <$type>\n$content\n"; return "<$type>\n$content".str_repeat (" ", $depth)."\n"; } /** Return HTML code corresponding to the NONE block * The NONE type exists only on empty strings. Just skip the current and * empty line, and return an empty string * @param string $text The Markdown text to translate split by \n * @param integer $depth The depth of the current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeNONE ($text, $depth, &$pos) { if ($this->debug) echo "CALL typeNONE (\$text, $depth, $pos)\n"; $pos++; return ""; } /** Return HTML code corresponding to the P block * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc */ private function typeP ($text, $depth, &$pos) { if ($this->debug) echo "CALL typeP (\$text, $depth, $pos)\n"; $content = ""; // End of P block : end of markdown text / depth lighter than $depth / // linetype changed $Pinc = $pos; while (isset ($text[$pos]) && $this->depth($text[$pos]) == $depth && $this->lineType ($text[$pos]) === "p") { if (substr ($text[$pos], -2) === " ") { // Two spaces at end of line : add
    $content .= $this->searchReplace (substr ($text[$pos], 0, -2)) ."
    "; } elseif ($pos > $Pinc && substr ($content, -5) !== "
    ") { // Add a space between two lines from the same block, if this is not // the continuity of the block $content .= " ".$this->searchReplace ($text[$pos]); } else { $content .= $this->searchReplace ($text[$pos]); } $pos++; } if ($this->debug) echo "RETURN typeP :

    $content

    \n"; return "

    $content

    \n"; } /** Detect the type of the text and call the appropriate function * * @param array $text The Markdown text to translate split by \n * @param integer $depth The depth of current bloc (in number of space) * @param integer &$pos The start line number of the bloc * @return the HTML code */ private function detectBlock ($text, $depth, &$pos) { if ($this->debug) echo "CALL detectBlock (\$text, $depth, $pos)\n"; $content = ""; $blockContent = ""; // detect the type and call the right type function while (isset ($text[$pos])) { if ($this->depth ($text[$pos]) > $depth && $depth === 0) { // New block code if ($this->debug) echo "New block code\n"; $content .= $this->typeCode ($text, $this->depth ($text[$pos]), $pos); continue; } elseif ($this->depth ($text[$pos]) > $depth) { if ($this->debug) echo "CALL DEPTH > MINDEPTH (".$this->depth ($text[$pos]). " > $depth)\n"; $content .= $this->detectBlock ($text, $this->depth ($text[$pos]), $pos); continue; } elseif ($this->depth ($text[$pos]) < $depth) { if ($this->debug) echo "CALL DEPTH > MINDEPTH (".$this->depth ($text[$pos]). " < $depth)\n"; return $content; } $type = $this->lineType ($text[$pos]); $func = "type$type"; if ($this->debug) echo "FROM DETECT : CALL $func (line=".$text[$pos].")\n"; $content .= str_repeat (" ", $depth). $this->$func ($text, $depth, $pos); } return $content; } /** Return the Type of object in the provided line * p, ul, ol, code * @param string $line The line to get the type */ private function lineType ($line) { if (! isset ($line{0})) return "NONE"; if (preg_match ("/^[ \t]*[+*-] /", $line) === 1) return "ul"; if (preg_match ("/^[ \t]*[0-9]+\. /", $line) === 1) return "ol"; if (preg_match ("/^( |\t)+/", $line) === 1) return "code"; return "p"; } /** Return the depth of the provided line * @param string $line Line to analyze * @return the depth of the line */ private function depth ($line) { return strspn ($line, " "); } }