- <?php
-
- /*
- Copyright (c) 2009-2014 F3::Factory/Bong Cosca, All rights reserved.
-
- This file is part of the Fat-Free Framework (http://fatfree.sf.net).
-
- THE SOFTWARE AND DOCUMENTATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF
- ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
- PURPOSE.
-
- Please see the license.txt file for more information.
- */
-
- //! Markdown-to-HTML converter
- class Markdown extends Prefab {
-
- protected
- //! Parsing rules
- $blocks,
- //! Special characters
- $special;
-
- /**
- * Process blockquote
- * @return string
- * @param $str string
- **/
- protected function _blockquote($str) {
- $str=preg_replace('/(?<=^|\n)\h?>\h?(.*?(?:\n+|$))/','\1',$str);
- return strlen($str)?
- ('<blockquote>'.$this->build($str).'</blockquote>'."\n\n"):'';
- }
-
- /**
- * Process whitespace-prefixed code block
- * @return string
- * @param $str string
- **/
- protected function _pre($str) {
- $str=preg_replace('/(?<=^|\n)(?: {4}|\t)(.+?(?:\n+|$))/','\1',
- $this->esc($str));
- return strlen($str)?
- ('<pre><code>'.
- $this->esc($this->snip($str)).
- '</code></pre>'."\n\n"):
- '';
- }
-
- /**
- * Process fenced code block
- * @return string
- * @param $hint string
- * @param $str string
- **/
- protected function _fence($hint,$str) {
- $str=$this->snip($str);
- $fw=Base::instance();
- if ($fw->get('HIGHLIGHT')) {
- switch (strtolower($hint)) {
- case 'php':
- $str=$fw->highlight($str);
- break;
- case 'apache':
- preg_match_all('/(?<=^|\n)(\h*)'.
- '(?:(<\/?)(\w+)((?:\h+[^>]+)*)(>)|'.
- '(?:(\w+)(\h.+?)))(\h*(?:\n+|$))/',
- $str,$matches,PREG_SET_ORDER);
- $out='';
- foreach ($matches as $match)
- $out.=$match[1].
- ($match[3]?
- ('<span class="section">'.
- $this->esc($match[2]).$match[3].
- '</span>'.
- ($match[4]?
- ('<span class="data">'.
- $this->esc($match[4]).
- '</span>'):
- '').
- '<span class="section">'.
- $this->esc($match[5]).
- '</span>'):
- ('<span class="directive">'.
- $match[6].
- '</span>'.
- '<span class="data">'.
- $this->esc($match[7]).
- '</span>')).
- $match[8];
- $str='<code>'.$out.'</code>';
- break;
- case 'html':
- preg_match_all(
- '/(?:(?:<(\/?)(\w+)'.
- '((?:\h+(?:\w+\h*=\h*)?".+?"|[^>]+)*|'.
- '\h+.+?)(\h*\/?)>)|(.+?))/s',
- $str,$matches,PREG_SET_ORDER
- );
- $out='';
- foreach ($matches as $match) {
- if ($match[2]) {
- $out.='<span class="xml_tag"><'.
- $match[1].$match[2].'</span>';
- if ($match[3]) {
- preg_match_all(
- '/(?:\h+(?:(?:(\w+)\h*=\h*)?'.
- '(".+?")|(.+)))/',
- $match[3],$parts,PREG_SET_ORDER
- );
- foreach ($parts as $part)
- $out.=' '.
- (empty($part[3])?
- ((empty($part[1])?
- '':
- ('<span class="xml_attr">'.
- $part[1].'</span>=')).
- '<span class="xml_data">'.
- $part[2].'</span>'):
- ('<span class="xml_tag">'.
- $part[3].'</span>'));
- }
- $out.='<span class="xml_tag">'.
- $match[4].'></span>';
- }
- else
- $out.=$this->esc($match[5]);
- }
- $str='<code>'.$out.'</code>';
- break;
- case 'ini':
- preg_match_all(
- '/(?<=^|\n)(?:'.
- '(;[^\n]*)|(?:<\?php.+?\?>?)|'.
- '(?:\[(.+?)\])|'.
- '(.+?)\h*=\h*'.
- '((?:\\\\\h*\r?\n|.+?)*)'.
- ')((?:\r?\n)+|$)/',
- $str,$matches,PREG_SET_ORDER
- );
- $out='';
- foreach ($matches as $match) {
- if ($match[1])
- $out.='<span class="comment">'.$match[1].
- '</span>';
- elseif ($match[2])
- $out.='<span class="ini_section">['.$match[2].']'.
- '</span>';
- elseif ($match[3])
- $out.='<span class="ini_key">'.$match[3].
- '</span>='.
- ($match[4]?
- ('<span class="ini_value">'.
- $match[4].'</span>'):'');
- else
- $out.=$match[0];
- if (isset($match[5]))
- $out.=$match[5];
- }
- $str='<code>'.$out.'</code>';
- break;
- default:
- $str='<code>'.$this->esc($str).'</code>';
- break;
- }
- }
- else
- $str='<code>'.$this->esc($str).'</code>';
- return '<pre>'.$str.'</pre>'."\n\n";
- }
-
- /**
- * Process horizontal rule
- * @return string
- **/
- protected function _hr() {
- return '<hr />'."\n\n";
- }
-
- /**
- * Process atx-style heading
- * @return string
- * @param $type string
- * @param $str string
- **/
- protected function _atx($type,$str) {
- $level=strlen($type);
- return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
- $this->scan($str).'</h'.$level.'>'."\n\n";
- }
-
- /**
- * Process setext-style heading
- * @return string
- * @param $str string
- * @param $type string
- **/
- protected function _setext($str,$type) {
- $level=strpos('=-',$type)+1;
- return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
- $this->scan($str).'</h'.$level.'>'."\n\n";
- }
-
- /**
- * Process ordered/unordered list
- * @return string
- * @param $str string
- **/
- protected function _li($str) {
- // Initialize list parser
- $len=strlen($str);
- $ptr=0;
- $dst='';
- $first=TRUE;
- $tight=TRUE;
- $type='ul';
- // Main loop
- while ($ptr<$len) {
- if (preg_match('/^\h*[*-](?:\h?[*-]){2,}(?:\n+|$)/',
- substr($str,$ptr),$match)) {
- $ptr+=strlen($match[0]);
- // Embedded horizontal rule
- return (strlen($dst)?
- ('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'').
- '<hr />'."\n\n".$this->build(substr($str,$ptr));
- }
- elseif (preg_match('/(?<=^|\n)([*+-]|\d+\.)\h'.
- '(.+?(?:\n+|$))((?:(?: {4}|\t)+.+?(?:\n+|$))*)/s',
- substr($str,$ptr),$match)) {
- $match[3]=preg_replace('/(?<=^|\n)(?: {4}|\t)/','',$match[3]);
- $found=FALSE;
- foreach (array_slice($this->blocks,0,-1) as $regex)
- if (preg_match($regex,$match[3])) {
- $found=TRUE;
- break;
- }
- // List
- if ($first) {
- // First pass
- if (is_numeric($match[1]))
- $type='ol';
- if (preg_match('/\n{2,}$/',$match[2].
- ($found?'':$match[3])))
- // Loose structure; Use paragraphs
- $tight=FALSE;
- $first=FALSE;
- }
- // Strip leading whitespaces
- $ptr+=strlen($match[0]);
- $tmp=$this->snip($match[2].$match[3]);
- if ($tight) {
- if ($found)
- $tmp=$match[2].$this->build($this->snip($match[3]));
- }
- else
- $tmp=$this->build($tmp);
- $dst.='<li>'.$this->scan(trim($tmp)).'</li>'."\n";
- }
- }
- return strlen($dst)?
- ('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'';
- }
-
- /**
- * Ignore raw HTML
- * @return string
- * @param $str string
- **/
- protected function _raw($str) {
- return $str;
- }
-
- /**
- * Process paragraph
- * @return string
- * @param $str string
- **/
- protected function _p($str) {
- $str=trim($str);
- if (strlen($str)) {
- if (preg_match('/^(.+?\n)([>#].+)$/s',$str,$parts))
- return $this->_p($parts[1]).$this->build($parts[2]);
- $self=$this;
- $str=preg_replace_callback(
- '/([^<>\[]+)?(<[\?%].+?[\?%]>|<.+?>|\[.+?\]\s*\(.+?\))|'.
- '(.+)/s',
- function($expr) use($self) {
- $tmp='';
- if (isset($expr[4]))
- $tmp.=$self->esc($expr[4]);
- else {
- if (isset($expr[1]))
- $tmp.=$self->esc($expr[1]);
- $tmp.=$expr[2];
- if (isset($expr[3]))
- $tmp.=$self->esc($expr[3]);
- }
- return $tmp;
- },
- $str
- );
- return '<p>'.$this->scan($str).'</p>'."\n\n";
- }
- return '';
- }
-
- /**
- * Process strong/em/strikethrough spans
- * @return string
- * @param $str string
- **/
- protected function _text($str) {
- $tmp='';
- while ($str!=$tmp)
- $str=preg_replace_callback(
- '/(?<!\\\\)([*_]{1,3})(.*?)(?!\\\\)\1(?=[\s[:punct:]]|$)/',
- function($expr) {
- switch (strlen($expr[1])) {
- case 1:
- return '<em>'.$expr[2].'</em>';
- case 2:
- return '<strong>'.$expr[2].'</strong>';
- case 3:
- return '<strong><em>'.$expr[2].'</em></strong>';
- }
- },
- preg_replace(
- '/(?<!\\\\)~~(.*?)(?!\\\\)~~(?=[\s[:punct:]]|$)/',
- '<del>\1</del>',
- $tmp=$str
- )
- );
- return $str;
- }
-
- /**
- * Process image span
- * @return string
- * @param $str string
- **/
- protected function _img($str) {
- $self=$this;
- return preg_replace_callback(
- '/!(?:\[(.+?)\])?\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
- function($expr) use($self) {
- return '<img src="'.$expr[2].'"'.
- (empty($expr[1])?
- '':
- (' alt="'.$self->esc($expr[1]).'"')).
- (empty($expr[3])?
- '':
- (' title="'.$self->esc($expr[3]).'"')).' />';
- },
- $str
- );
- }
-
- /**
- * Process anchor span
- * @return string
- * @param $str string
- **/
- protected function _a($str) {
- $self=$this;
- return preg_replace_callback(
- '/(?<!\\\\)\[(.+?)(?!\\\\)\]\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
- function($expr) use($self) {
- return '<a href="'.$self->esc($expr[2]).'"'.
- (empty($expr[3])?
- '':
- (' title="'.$self->esc($expr[3]).'"')).
- '>'.$self->scan($expr[1]).'</a>';
- },
- $str
- );
- }
-
- /**
- * Auto-convert links
- * @return string
- * @param $str string
- **/
- protected function _auto($str) {
- $self=$this;
- return preg_replace_callback(
- '/`.*?<(.+?)>.*?`|<(.+?)>/',
- function($expr) use($self) {
- if (empty($expr[1]) && parse_url($expr[2],PHP_URL_SCHEME)) {
- $expr[2]=$self->esc($expr[2]);
- return '<a href="'.$expr[2].'">'.$expr[2].'</a>';
- }
- return $expr[0];
- },
- $str
- );
- }
-
- /**
- * Process code span
- * @return string
- * @param $str string
- **/
- protected function _code($str) {
- $self=$this;
- return preg_replace_callback(
- '/`` (.+?) ``|(?<!\\\\)`(.+?)(?!\\\\)`/',
- function($expr) use($self) {
- return '<code>'.
- $self->esc(empty($expr[1])?$expr[2]:$expr[1]).'</code>';
- },
- $str
- );
- }
-
- /**
- * Convert characters to HTML entities
- * @return string
- * @param $str string
- **/
- function esc($str) {
- if (!$this->special)
- $this->special=array(
- '...'=>'…',
- '(tm)'=>'™',
- '(r)'=>'®',
- '(c)'=>'©'
- );
- foreach ($this->special as $key=>$val)
- $str=preg_replace('/'.preg_quote($key,'/').'/i',$val,$str);
- return htmlspecialchars($str,ENT_COMPAT,
- Base::instance()->get('ENCODING'),FALSE);
- }
-
- /**
- * Reduce multiple line feeds
- * @return string
- * @param $str string
- **/
- protected function snip($str) {
- return preg_replace('/(?:(?<=\n)\n+)|\n+$/',"\n",$str);
- }
-
- /**
- * Scan line for convertible spans
- * @return string
- * @param $str string
- **/
- function scan($str) {
- $inline=array('img','a','text','auto','code');
- foreach ($inline as $func)
- $str=$this->{'_'.$func}($str);
- return $str;
- }
-
- /**
- * Assemble blocks
- * @return string
- * @param $str string
- **/
- protected function build($str) {
- if (!$this->blocks) {
- // Regexes for capturing entire blocks
- $this->blocks=array(
- 'blockquote'=>'/^(?:\h?>\h?.*?(?:\n+|$))+/',
- 'pre'=>'/^(?:(?: {4}|\t).+?(?:\n+|$))+/',
- 'fence'=>'/^`{3}\h*(\w+)?.*?[^\n]*\n+(.+?)`{3}[^\n]*'.
- '(?:\n+|$)/s',
- 'hr'=>'/^\h*[*_-](?:\h?[\*_-]){2,}\h*(?:\n+|$)/',
- 'atx'=>'/^\h*(#{1,6})\h?(.+?)\h*(?:#.*)?(?:\n+|$)/',
- 'setext'=>'/^\h*(.+?)\h*\n([=-])+\h*(?:\n+|$)/',
- 'li'=>'/^(?:(?:[*+-]|\d+\.)\h.+?(?:\n+|$)'.
- '(?:(?: {4}|\t)+.+?(?:\n+|$))*)+/s',
- 'raw'=>'/^((?:<!--.+?-->|'.
- '<(address|article|aside|audio|blockquote|canvas|dd|'.
- 'div|dl|fieldset|figcaption|figure|footer|form|h\d|'.
- 'header|hgroup|hr|noscript|object|ol|output|p|pre|'.
- 'section|table|tfoot|ul|video).*?'.
- '(?:\/>|>(?:(?>[^><]+)|(?R))*<\/\2>))'.
- '\h*(?:\n{2,}|\n*$)|<[\?%].+?[\?%]>\h*(?:\n?$|\n*))/s',
- 'p'=>'/^(.+?(?:\n{2,}|\n*$))/s'
- );
- }
- $self=$this;
- // Treat lines with nothing but whitespaces as empty lines
- $str=preg_replace('/\n\h+(?=\n)/',"\n",$str);
- // Initialize block parser
- $len=strlen($str);
- $ptr=0;
- $dst='';
- // Main loop
- while ($ptr<$len) {
- if (preg_match('/^ {0,3}\[([^\[\]]+)\]:\s*<?(.*?)>?\s*'.
- '(?:"([^\n]*)")?(?:\n+|$)/s',substr($str,$ptr),$match)) {
- // Reference-style link; Backtrack
- $ptr+=strlen($match[0]);
- $tmp='';
- // Catch line breaks in title attribute
- $ref=preg_replace('/\h/','\s',preg_quote($match[1],'/'));
- while ($dst!=$tmp) {
- $dst=preg_replace_callback(
- '/(?<!\\\\)\[('.$ref.')(?!\\\\)\]\s*\[\]|'.
- '(!?)(?:\[([^\[\]]+)\]\s*)?'.
- '(?<!\\\\)\[('.$ref.')(?!\\\\)\]/',
- function($expr) use($match,$self) {
- return (empty($expr[2]))?
- // Anchor
- ('<a href="'.$self->esc($match[2]).'"'.
- (empty($match[3])?
- '':
- (' title="'.
- $self->esc($match[3]).'"')).'>'.
- // Link
- $self->scan(
- empty($expr[3])?
- (empty($expr[1])?
- $expr[4]:
- $expr[1]):
- $expr[3]
- ).'</a>'):
- // Image
- ('<img src="'.$match[2].'"'.
- (empty($expr[2])?
- '':
- (' alt="'.
- $self->esc($expr[3]).'"')).
- (empty($match[3])?
- '':
- (' title="'.
- $self->esc($match[3]).'"')).
- ' />');
- },
- $tmp=$dst
- );
- }
- }
- else
- foreach ($this->blocks as $func=>$regex)
- if (preg_match($regex,substr($str,$ptr),$match)) {
- $ptr+=strlen($match[0]);
- $dst.=call_user_func_array(
- array($this,'_'.$func),
- count($match)>1?array_slice($match,1):$match
- );
- break;
- }
- }
- return $dst;
- }
-
- /**
- * Render HTML equivalent of markdown
- * @return string
- * @param $txt string
- **/
- function convert($txt) {
- $txt=preg_replace_callback(
- '/(<code.*?>.+?<\/code>|'.
- '<[^>\n]+>|\([^\n\)]+\)|"[^"\n]+")|'.
- '\\\\(.)/s',
- function($expr) {
- // Process escaped characters
- return empty($expr[1])?$expr[2]:$expr[1];
- },
- $this->build(preg_replace('/\r\n|\r/',"\n",$txt))
- );
- return $this->snip($txt);
- }
-
- }