Files
DomFramework/markdown.php

387 lines
12 KiB
PHP

<?php
/** DomFramework
@package domframework
@author Dominique Fournier <dominique@fournier38.fr> */
error_reporting (E_ALL);
/** Markdown management */
class markdown
{
/** debug variable */
public $debug = false;
/** Convert the markdown language to HTML
Return the HTML string
@param string $mark Message in markdown syntax to display */
public function html ($mark)
{
if ($this->debug) unlink ("/tmp/debug");
$res = "";
$mark = htmlentities ($mark, ENT_QUOTES);
// Here are the regexp on multilines
$search = array ();
$replace = array ();
// Titles with underline (SeText)
// Titre1
// ======
$search[] = "/^(.+)\\n==+$\\n/Um";
$replace[] = "\n<h1>\\1</h1>\n";
// Titre2
// ------
$search[] = "/^(.+)\\n--+$\\n/Um";
$replace[] = "\n<h2>\\1</h2>\n";
// SEPARATORS : *** --- ___ * * * - - - _ _ _
// Must be placed before EMPHASIS
$search[] = "/^[*_-] ?[*_-] ?[*_-]$/Um";
$replace[] = "\n<hr/>\n";
$mark = preg_replace ($search, $replace, $mark);
$res = $this->paragraph ($mark);
$res = str_replace ("<p></p>", "", $res);
$res = str_replace ("<p>\n</p>", "", $res);
return $res;
}
/** Translate the Markdown paragraph in HTML
return the html */
private function paragraph ($mark)
{
// Think thereis already htmlentities passed on $mark !!!
$timeStart = microtime (TRUE);
$timeregex = 0;
// Initialization of convertions
$search = array ();
$replace = array ();
// Titles short
// == TITRE1
$search[] = "/^==+ (.+)( ==+)?$/Um";
$replace[] = "</p>\n<h1>\\1</h1>\n<p>";
// -- TITRE2
$search[] = "/^--+ (.+)( --+)?$/Um";
$replace[] = "</p>\n<h2>\\1</h2>\n<p>";
// EMPHASIS : _ or * for normal, __ or ** for strong
$search[] = "/__(.+)__/"; $replace[] = "<strong>\\1</strong>";
$search[] = "/_(.+)_/"; $replace[] = "<em>\\1</em>";
$search[] = "/\*\*(.+)\*\*/"; $replace[] = "<strong>\\1</strong>";
$search[] = "/\*(.+)\*/"; $replace[] = "<em>\\1</em>";
// CODE : `code` -> <code>
$search[] = "/\\n?\`((\\n|.)+)\`/Um";
$replace[] = " <code>\\1</code>";
// LINKS
// [Google Site](http://google.fr/ "With help bubble")
$search[] = "(\[(.+)\]\((https?://.+) \"(.+)\"\))";
$replace[] = "<a href='\\2' title='\\3'>\\1</a>";
// [Google Site](http://google.fr/)
$search[] = "(\[(.+)\]\((http.+)\))"; $replace[] = "<a href='\\2'>\\1</a>";
// Automatics links :
// <http://dominique.fournier38.fr>
// <dominique@fournier38.fr>
$search[] = "#&lt;(https?://.+)&gt;#"; $replace[] = "<a href='\\1'>\\1</a>";
$search[] = "#&lt;(.+@.+)&gt#"; $replace[] = "<a href='mailto:\\1'>\\1</a>";
// TODO : Links by reference :
// Voici un petit texte écrit par [Michel Fortin][mf].
// [mf]: http://michelf.ca/ "Mon site web"
// TITLES
// Titles ATX (Optionnal sharp at the end)
// ###### Title6
$search[] = "/\\n^###### ([^#]+)(#*)$\\n/Um";
$replace[] = "</p>\n<h6>\\1</h6>\n<p>";
// ##### Title5
$search[] = "/\\n^##### ([^#]+)(#*)$\\n/Um";
$replace[] = "</p>\n<h5>\\1</h5>\n<p>";
// #### Title4
$search[] = "/\\n^#### ([^#]+)(#*)$\\n/Um";
$replace[] = "</p>\n<h4>\\1</h4>\n<p>";
// ### Title3
$search[] = "/\\n^### ([^#]+)(#*)$\\n/Um";
$replace[] = "</p>\n<h3>\\1</h3>\n<p>";
// ## Title2
$search[] = "/\\n^## ([^#]+)(#*)$\\n/Um";
$replace[] = "</p>\n<h2>\\1</h2>\n<p>";
// # Title1
$search[] = "/\\n^# ([^#]+)(#*)$\\n/Um";
$replace[] = "</p>\n<h1>\\1</h1>\n<p>";
// End of line with double space : <br/>
$search[] = "/( )$/Um"; $replace[] = "<br/>";
// End of line with continuous on second line : add blank
// $search[] = "/(.)\\n([A-Za-z0-9])/Um"; $replace[] = "\\1 \\2";
// Cleanning the markdown text
$mark = str_replace ("\t", " ", $mark);
$mark = trim ($mark);
if ($mark === "")
return "";
$spacer = " ";
$res = "";
// P, OL, UL (but not LI !)
// Use to found the changing of types
$typeStack = array ();
// Number of spaces
$indentStack = array (-1);
// All the HTML stack (with LI)
$htmlStack = array ();
$lines = explode ("\n", $mark);
$timeInit = microtime (TRUE) - $timeStart;
$blockLI = false;
foreach ($lines as $nb=>$line)
{
$this->debugMKD ("DEBUT:$line");
if (substr (ltrim ($line), 0, 1) === "<")
{
$this->debugMKD ("HTML : Skipped");
$res .= $line;
continue;
}
$type = $this->paragraphType ($line);
$this->debugMKD ("DEBUT: Type='$type'");
$matches = array ();
switch ($type)
{
case "ol" :
preg_match ("/^( *)[0-9]+\. +(.*)/", $line, $matches);
if (!isset ($matches[2]))
$lineTxt = $line;
else
$lineTxt = $matches[2];
break ;
case "ul" :
preg_match ("/^( *)[-+*] +(.*)/", $line, $matches);
if (!isset ($matches[2]))
$lineTxt = $line;
else
$lineTxt = $matches[2];
break ;
default:
$lineTxt = $line;
}
$indent = strspn ($line, " ");
$this->debugMKD ("DEBUT: Indent='$indent'");
$this->debugMKD ("DEBUT: indentStack=".print_r ($indentStack, TRUE));
$this->debugMKD ("DEBUT: typeStack=".print_r ($typeStack, TRUE));
// Spacing
if ($indent < end ($indentStack))
{
$this->debugMKD ("DEB1 : Ending of block");
if (end ($htmlStack) === "li")
{
$this->debugMKD ("Pending <Li> : closing");
$this->debugMKD ("</li>");
$res .= "</li>";
array_pop ($htmlStack);
}
// Remove last \n to put closing tag at the end of line
$res = substr ($res, 0, -1);
$oldType = array_pop ($typeStack);
if ($oldType === "code")
{
$this->debugMKD ("</code></pre>");
$res .= "</code></pre>";
array_pop ($htmlStack);
array_pop ($indentStack);
}
else
{
$this->debugMKD (str_repeat (" ", end ($indentStack))."</$oldType>");
$res .= str_repeat (" ", end ($indentStack))."</$oldType>";
}
array_pop ($indentStack);
array_pop ($htmlStack);
if ($type === "ol" || $type === "ul")
{
$this->debugMKD ("DEB2 : Pending <Li> : closing");
$this->debugMKD ("</li>");
$res .= "\n</li>";
array_pop ($htmlStack);
}
if ($type === "")
{
$this->debugMKD ("DEB3 : End of block");
array_pop ($indentStack);
$res .= "\n";
continue;
}
}
if ($indent == end ($indentStack))
{
$this->debugMKD ("DEB1 : Continuous block $type/".end ($typeStack));
if (end ($htmlStack) === "li" && $type !== "p")
{
$this->debugMKD ("Pending <Li> : closing");
$this->debugMKD ("</li>");
$res .= "</li>";
array_pop ($htmlStack);
$blockLI = false;
}
elseif (end ($htmlStack) === "li" && $type === "p")
{
$this->debugMKD ("DEB2 : Continuous LI block");
// Suppress \n on last line
$res = substr ($res, 0, -1);
$type = "ul";
$blockLI = true;
$lineTxt = " ".$lineTxt;
}
if ($type !== end ($typeStack))
{
$this->debugMKD ("DEB2 : Continuous Block but type change");
if (end ($typeStack) !== FALSE)
{
// Remove last \n to put closing tag at the end of line
//$res = substr ($res, 0, -1);
$oldType = array_pop ($typeStack);
$this->debugMKD (str_repeat (" ", end ($indentStack))."</$oldType>");
$res .= str_repeat (" ", end ($indentStack))."</$oldType>";
array_pop ($indentStack);
array_pop ($htmlStack);
}
if ($type !== "")
{
$this->debugMKD (str_repeat (" ", end ($indentStack))."<$type>");
$res .= "\n".str_repeat (" ", end ($indentStack))."<$type>";
$htmlStack[] = $type;
array_push ($indentStack, $indent);
array_push ($typeStack, $type);
}
}
if (($type === "ol" || $type === "ul") && $blockLI === false)
{
$this->debugMKD ("DEB2 : Adding li");
$htmlStack[] = "li";
$this->debugMKD (str_repeat (" ", $indent)."<li>");
$res .= "\n".str_repeat (" ", $indent)."<li>";
}
}
if ($indent > end ($indentStack) && end ($typeStack) !== "code")
{
// The code indentation should not be parsed as a new code : the
// <code></code> continue until the end of paragraph
$this->debugMKD ("DEB1 : Starting a new block");
if ($type === "")
{
$this->debugMKD ("No type : skipped");
continue;
}
if (end ($indentStack))
array_pop ($indentStack);
if ($type === "code")
{
// Code need a pre before code
if (end ($typeStack))
{
$this->debugMKD ("DEB2 : CODE : Close older HTML");
// Remove last \n to put closing tag at the end of line
$res = substr ($res, 0, -1);
$oldType = array_pop ($typeStack);
$this->debugMKD (str_repeat (" ", end ($indentStack))."</$oldType>");
$res .= str_repeat (" ", end ($indentStack))."</$oldType>";
array_pop ($indentStack);
array_pop ($htmlStack);
}
$typetmp = "pre><code";
$htmlStack[] = "pre";
$htmlStack[] = "code";
array_push ($typeStack, "code");
}
else
{
$typetmp = $type;
$htmlStack[] = $type;
array_push ($typeStack, $type);
}
array_push ($indentStack, $indent);
$this->debugMKD (str_repeat (" ", $indent)."<$typetmp>");
$res .= "\n".str_repeat (" ", $indent)."<$typetmp>";
if ($type === "ol" || $type === "ul")
{
$this->debugMKD ("DEB2 : Adding li");
$htmlStack[] = "li";
$this->debugMKD (str_repeat (" ", $indent)."<li>");
$res .= "\n".str_repeat (" ", $indent)."<li>";
}
}
if ($type === "" && end ($indentStack))
{
$this->debugMKD ("DEB2 : Empty type");
// Remove last \n to put closing tag at the end of line
$res = substr ($res, 0, -1);
$oldType = array_pop ($typeStack);
$this->debugMKD (str_repeat (" ", end ($indentStack))."</$oldType>");
$res .= "\n".str_repeat (" ", end ($indentStack))."</$oldType>";
array_pop ($htmlStack);
}
// If code, there is no emphasis, email, and other convertions
if ($type !== "code")
{
$timetmp = microtime (TRUE);
$lineTxt = preg_replace ($search, $replace, $lineTxt);
$timeregex += (microtime (TRUE) - $timetmp);
}
$this->debugMKD ("$lineTxt");
$res .= substr ($lineTxt, end ($indentStack))."\n";
}
$this->debugMKD ("DEB1 : End of loop");
$htmlStack = array_reverse ($htmlStack);
foreach ($htmlStack as $type)
{
$this->debugMKD ("FIN</$type>");
$res .= "</$type>\n";
}
$this->debugMKD ("TimeInit=".($timeInit*1000)."ms");
$this->debugMKD ("TimeRegex=".($timeregex*1000)."ms");
$this->debugMKD ("TimeAll=".((microtime (TRUE) - $timeStart)*1000)."ms");
$this->debugMKD ("-----------\n");
return $res;
}
/** Return the Type of object in the provided line
p, ul, ol, code */
private function paragraphType ($line)
{
if (! isset ($line{0}))
return "";
if (preg_match ("/^[ \t]*[+*-] /", $line) === 1)
return "ul";
if (preg_match ("/^[ \t]*[0-9]+\. /", $line) === 1)
return "ol";
if (preg_match ("/^( |\t)+/", $line) === 1)
return "code";
return "p";
}
/** Function to display the MarkDown debug */
private function debugMKD ($msg)
{
if ($this->debug === false) return;
$trace = debug_backtrace();
$back = reset ($trace);
file_put_contents ("/tmp/debug", "[".$back["line"]."] $msg\n", FILE_APPEND);
}
}