<?php
/*
Copyright (c) 2009-2014 F3::Factory/Bong Cosca, All rights reserved.
This file is part of the Fat-Free Framework (http://fatfree.sf.net).
THE SOFTWARE AND DOCUMENTATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF
ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
PURPOSE.
Please see the license.txt file for more information.
*/
//! Markdown-to-HTML converter
class Markdown extends Prefab {
protected
//! Parsing rules
$blocks,
//! Special characters
$special;
/**
* Process blockquote
* @return string
* @param $str string
**/
protected function _blockquote($str) {
$str=preg_replace('/(?<=^|\n)\h?>\h?(.*?(?:\n+|$))/','\1',$str);
return strlen($str)?
('<blockquote>'.$this->build($str).'</blockquote>'."\n\n"):'';
}
/**
* Process whitespace-prefixed code block
* @return string
* @param $str string
**/
protected function _pre($str) {
$str=preg_replace('/(?<=^|\n)(?: {4}|\t)(.+?(?:\n+|$))/','\1',
$this->esc($str));
return strlen($str)?
('<pre><code>'.
$this->esc($this->snip($str)).
'</code></pre>'."\n\n"):
'';
}
/**
* Process fenced code block
* @return string
* @param $hint string
* @param $str string
**/
protected function _fence($hint,$str) {
$str=$this->snip($str);
$fw=Base::instance();
if ($fw->get('HIGHLIGHT')) {
switch (strtolower($hint)) {
case 'php':
$str=$fw->highlight($str);
break;
case 'apache':
preg_match_all('/(?<=^|\n)(\h*)'.
'(?:(<\/?)(\w+)((?:\h+[^>]+)*)(>)|'.
'(?:(\w+)(\h.+?)))(\h*(?:\n+|$))/',
$str,$matches,PREG_SET_ORDER);
$out='';
foreach ($matches as $match)
$out.=$match[1].
($match[3]?
('<span class="section">'.
$this->esc($match[2]).$match[3].
'</span>'.
($match[4]?
('<span class="data">'.
$this->esc($match[4]).
'</span>'):
'').
'<span class="section">'.
$this->esc($match[5]).
'</span>'):
('<span class="directive">'.
$match[6].
'</span>'.
'<span class="data">'.
$this->esc($match[7]).
'</span>')).
$match[8];
$str='<code>'.$out.'</code>';
break;
case 'html':
preg_match_all(
'/(?:(?:<(\/?)(\w+)'.
'((?:\h+(?:\w+\h*=\h*)?".+?"|[^>]+)*|'.
'\h+.+?)(\h*\/?)>)|(.+?))/s',
$str,$matches,PREG_SET_ORDER
);
$out='';
foreach ($matches as $match) {
if ($match[2]) {
$out.='<span class="xml_tag"><'.
$match[1].$match[2].'</span>';
if ($match[3]) {
preg_match_all(
'/(?:\h+(?:(?:(\w+)\h*=\h*)?'.
'(".+?")|(.+)))/',
$match[3],$parts,PREG_SET_ORDER
);
foreach ($parts as $part)
$out.=' '.
(empty($part[3])?
((empty($part[1])?
'':
('<span class="xml_attr">'.
$part[1].'</span>=')).
'<span class="xml_data">'.
$part[2].'</span>'):
('<span class="xml_tag">'.
$part[3].'</span>'));
}
$out.='<span class="xml_tag">'.
$match[4].'></span>';
}
else
$out.=$this->esc($match[5]);
}
$str='<code>'.$out.'</code>';
break;
case 'ini':
preg_match_all(
'/(?<=^|\n)(?:'.
'(;[^\n]*)|(?:<\?php.+?\?>?)|'.
'(?:\[(.+?)\])|'.
'(.+?)\h*=\h*'.
'((?:\\\\\h*\r?\n|.+?)*)'.
')((?:\r?\n)+|$)/',
$str,$matches,PREG_SET_ORDER
);
$out='';
foreach ($matches as $match) {
if ($match[1])
$out.='<span class="comment">'.$match[1].
'</span>';
elseif ($match[2])
$out.='<span class="ini_section">['.$match[2].']'.
'</span>';
elseif ($match[3])
$out.='<span class="ini_key">'.$match[3].
'</span>='.
($match[4]?
('<span class="ini_value">'.
$match[4].'</span>'):'');
else
$out.=$match[0];
if (isset($match[5]))
$out.=$match[5];
}
$str='<code>'.$out.'</code>';
break;
default:
$str='<code>'.$this->esc($str).'</code>';
break;
}
}
else
$str='<code>'.$this->esc($str).'</code>';
return '<pre>'.$str.'</pre>'."\n\n";
}
/**
* Process horizontal rule
* @return string
**/
protected function _hr() {
return '<hr />'."\n\n";
}
/**
* Process atx-style heading
* @return string
* @param $type string
* @param $str string
**/
protected function _atx($type,$str) {
$level=strlen($type);
return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
$this->scan($str).'</h'.$level.'>'."\n\n";
}
/**
* Process setext-style heading
* @return string
* @param $str string
* @param $type string
**/
protected function _setext($str,$type) {
$level=strpos('=-',$type)+1;
return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
$this->scan($str).'</h'.$level.'>'."\n\n";
}
/**
* Process ordered/unordered list
* @return string
* @param $str string
**/
protected function _li($str) {
// Initialize list parser
$len=strlen($str);
$ptr=0;
$dst='';
$first=TRUE;
$tight=TRUE;
$type='ul';
// Main loop
while ($ptr<$len) {
if (preg_match('/^\h*[*-](?:\h?[*-]){2,}(?:\n+|$)/',
substr($str,$ptr),$match)) {
$ptr+=strlen($match[0]);
// Embedded horizontal rule
return (strlen($dst)?
('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'').
'<hr />'."\n\n".$this->build(substr($str,$ptr));
}
elseif (preg_match('/(?<=^|\n)([*+-]|\d+\.)\h'.
'(.+?(?:\n+|$))((?:(?: {4}|\t)+.+?(?:\n+|$))*)/s',
substr($str,$ptr),$match)) {
$match[3]=preg_replace('/(?<=^|\n)(?: {4}|\t)/','',$match[3]);
$found=FALSE;
foreach (array_slice($this->blocks,0,-1) as $regex)
if (preg_match($regex,$match[3])) {
$found=TRUE;
break;
}
// List
if ($first) {
// First pass
if (is_numeric($match[1]))
$type='ol';
if (preg_match('/\n{2,}$/',$match[2].
($found?'':$match[3])))
// Loose structure; Use paragraphs
$tight=FALSE;
$first=FALSE;
}
// Strip leading whitespaces
$ptr+=strlen($match[0]);
$tmp=$this->snip($match[2].$match[3]);
if ($tight) {
if ($found)
$tmp=$match[2].$this->build($this->snip($match[3]));
}
else
$tmp=$this->build($tmp);
$dst.='<li>'.$this->scan(trim($tmp)).'</li>'."\n";
}
}
return strlen($dst)?
('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'';
}
/**
* Ignore raw HTML
* @return string
* @param $str string
**/
protected function _raw($str) {
return $str;
}
/**
* Process paragraph
* @return string
* @param $str string
**/
protected function _p($str) {
$str=trim($str);
if (strlen($str)) {
if (preg_match('/^(.+?\n)([>#].+)$/s',$str,$parts))
return $this->_p($parts[1]).$this->build($parts[2]);
$self=$this;
$str=preg_replace_callback(
'/([^<>\[]+)?(<[\?%].+?[\?%]>|<.+?>|\[.+?\]\s*\(.+?\))|'.
'(.+)/s',
function($expr) use($self) {
$tmp='';
if (isset($expr[4]))
$tmp.=$self->esc($expr[4]);
else {
if (isset($expr[1]))
$tmp.=$self->esc($expr[1]);
$tmp.=$expr[2];
if (isset($expr[3]))
$tmp.=$self->esc($expr[3]);
}
return $tmp;
},
$str
);
return '<p>'.$this->scan($str).'</p>'."\n\n";
}
return '';
}
/**
* Process strong/em/strikethrough spans
* @return string
* @param $str string
**/
protected function _text($str) {
$tmp='';
while ($str!=$tmp)
$str=preg_replace_callback(
'/(?<!\\\\)([*_]{1,3})(.*?)(?!\\\\)\1(?=[\s[:punct:]]|$)/',
function($expr) {
switch (strlen($expr[1])) {
case 1:
return '<em>'.$expr[2].'</em>';
case 2:
return '<strong>'.$expr[2].'</strong>';
case 3:
return '<strong><em>'.$expr[2].'</em></strong>';
}
},
preg_replace(
'/(?<!\\\\)~~(.*?)(?!\\\\)~~(?=[\s[:punct:]]|$)/',
'<del>\1</del>',
$tmp=$str
)
);
return $str;
}
/**
* Process image span
* @return string
* @param $str string
**/
protected function _img($str) {
$self=$this;
return preg_replace_callback(
'/!(?:\[(.+?)\])?\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
function($expr) use($self) {
return '<img src="'.$expr[2].'"'.
(empty($expr[1])?
'':
(' alt="'.$self->esc($expr[1]).'"')).
(empty($expr[3])?
'':
(' title="'.$self->esc($expr[3]).'"')).' />';
},
$str
);
}
/**
* Process anchor span
* @return string
* @param $str string
**/
protected function _a($str) {
$self=$this;
return preg_replace_callback(
'/(?<!\\\\)\[(.+?)(?!\\\\)\]\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
function($expr) use($self) {
return '<a href="'.$self->esc($expr[2]).'"'.
(empty($expr[3])?
'':
(' title="'.$self->esc($expr[3]).'"')).
'>'.$self->scan($expr[1]).'</a>';
},
$str
);
}
/**
* Auto-convert links
* @return string
* @param $str string
**/
protected function _auto($str) {
$self=$this;
return preg_replace_callback(
'/`.*?<(.+?)>.*?`|<(.+?)>/',
function($expr) use($self) {
if (empty($expr[1]) && parse_url($expr[2],PHP_URL_SCHEME)) {
$expr[2]=$self->esc($expr[2]);
return '<a href="'.$expr[2].'">'.$expr[2].'</a>';
}
return $expr[0];
},
$str
);
}
/**
* Process code span
* @return string
* @param $str string
**/
protected function _code($str) {
$self=$this;
return preg_replace_callback(
'/`` (.+?) ``|(?<!\\\\)`(.+?)(?!\\\\)`/',
function($expr) use($self) {
return '<code>'.
$self->esc(empty($expr[1])?$expr[2]:$expr[1]).'</code>';
},
$str
);
}
/**
* Convert characters to HTML entities
* @return string
* @param $str string
**/
function esc($str) {
if (!$this->special)
$this->special=array(
'...'=>'…',
'(tm)'=>'™',
'(r)'=>'®',
'(c)'=>'©'
);
foreach ($this->special as $key=>$val)
$str=preg_replace('/'.preg_quote($key,'/').'/i',$val,$str);
return htmlspecialchars($str,ENT_COMPAT,
Base::instance()->get('ENCODING'),FALSE);
}
/**
* Reduce multiple line feeds
* @return string
* @param $str string
**/
protected function snip($str) {
return preg_replace('/(?:(?<=\n)\n+)|\n+$/',"\n",$str);
}
/**
* Scan line for convertible spans
* @return string
* @param $str string
**/
function scan($str) {
$inline=array('img','a','text','auto','code');
foreach ($inline as $func)
$str=$this->{'_'.$func}($str);
return $str;
}
/**
* Assemble blocks
* @return string
* @param $str string
**/
protected function build($str) {
if (!$this->blocks) {
// Regexes for capturing entire blocks
$this->blocks=array(
'blockquote'=>'/^(?:\h?>\h?.*?(?:\n+|$))+/',
'pre'=>'/^(?:(?: {4}|\t).+?(?:\n+|$))+/',
'fence'=>'/^`{3}\h*(\w+)?.*?[^\n]*\n+(.+?)`{3}[^\n]*'.
'(?:\n+|$)/s',
'hr'=>'/^\h*[*_-](?:\h?[\*_-]){2,}\h*(?:\n+|$)/',
'atx'=>'/^\h*(#{1,6})\h?(.+?)\h*(?:#.*)?(?:\n+|$)/',
'setext'=>'/^\h*(.+?)\h*\n([=-])+\h*(?:\n+|$)/',
'li'=>'/^(?:(?:[*+-]|\d+\.)\h.+?(?:\n+|$)'.
'(?:(?: {4}|\t)+.+?(?:\n+|$))*)+/s',
'raw'=>'/^((?:<!--.+?-->|'.
'<(address|article|aside|audio|blockquote|canvas|dd|'.
'div|dl|fieldset|figcaption|figure|footer|form|h\d|'.
'header|hgroup|hr|noscript|object|ol|output|p|pre|'.
'section|table|tfoot|ul|video).*?'.
'(?:\/>|>(?:(?>[^><]+)|(?R))*<\/\2>))'.
'\h*(?:\n{2,}|\n*$)|<[\?%].+?[\?%]>\h*(?:\n?$|\n*))/s',
'p'=>'/^(.+?(?:\n{2,}|\n*$))/s'
);
}
$self=$this;
// Treat lines with nothing but whitespaces as empty lines
$str=preg_replace('/\n\h+(?=\n)/',"\n",$str);
// Initialize block parser
$len=strlen($str);
$ptr=0;
$dst='';
// Main loop
while ($ptr<$len) {
if (preg_match('/^ {0,3}\[([^\[\]]+)\]:\s*<?(.*?)>?\s*'.
'(?:"([^\n]*)")?(?:\n+|$)/s',substr($str,$ptr),$match)) {
// Reference-style link; Backtrack
$ptr+=strlen($match[0]);
$tmp='';
// Catch line breaks in title attribute
$ref=preg_replace('/\h/','\s',preg_quote($match[1],'/'));
while ($dst!=$tmp) {
$dst=preg_replace_callback(
'/(?<!\\\\)\[('.$ref.')(?!\\\\)\]\s*\[\]|'.
'(!?)(?:\[([^\[\]]+)\]\s*)?'.
'(?<!\\\\)\[('.$ref.')(?!\\\\)\]/',
function($expr) use($match,$self) {
return (empty($expr[2]))?
// Anchor
('<a href="'.$self->esc($match[2]).'"'.
(empty($match[3])?
'':
(' title="'.
$self->esc($match[3]).'"')).'>'.
// Link
$self->scan(
empty($expr[3])?
(empty($expr[1])?
$expr[4]:
$expr[1]):
$expr[3]
).'</a>'):
// Image
('<img src="'.$match[2].'"'.
(empty($expr[2])?
'':
(' alt="'.
$self->esc($expr[3]).'"')).
(empty($match[3])?
'':
(' title="'.
$self->esc($match[3]).'"')).
' />');
},
$tmp=$dst
);
}
}
else
foreach ($this->blocks as $func=>$regex)
if (preg_match($regex,substr($str,$ptr),$match)) {
$ptr+=strlen($match[0]);
$dst.=call_user_func_array(
array($this,'_'.$func),
count($match)>1?array_slice($match,1):$match
);
break;
}
}
return $dst;
}
/**
* Render HTML equivalent of markdown
* @return string
* @param $txt string
**/
function convert($txt) {
$txt=preg_replace_callback(
'/(<code.*?>.+?<\/code>|'.
'<[^>\n]+>|\([^\n\)]+\)|"[^"\n]+")|'.
'\\\\(.)/s',
function($expr) {
// Process escaped characters
return empty($expr[1])?$expr[2]:$expr[1];
},
$this->build(preg_replace('/\r\n|\r/',"\n",$txt))
);
return $this->snip($txt);
}
}