Просмотр файла news/libraries/markdown.php

Размер файла: 13.54Kb
  1. <?php
  2.  
  3. /*
  4. Copyright (c) 2009-2014 F3::Factory/Bong Cosca, All rights reserved.
  5.  
  6. This file is part of the Fat-Free Framework (http://fatfree.sf.net).
  7.  
  8. THE SOFTWARE AND DOCUMENTATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF
  9. ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
  10. IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
  11. PURPOSE.
  12.  
  13. Please see the license.txt file for more information.
  14. */
  15.  
  16. //! Markdown-to-HTML converter
  17. class Markdown extends Prefab {
  18.  
  19. protected
  20. //! Parsing rules
  21. $blocks,
  22. //! Special characters
  23. $special;
  24.  
  25. /**
  26. * Process blockquote
  27. * @return string
  28. * @param $str string
  29. **/
  30. protected function _blockquote($str) {
  31. $str=preg_replace('/(?<=^|\n)\h?>\h?(.*?(?:\n+|$))/','\1',$str);
  32. return strlen($str)?
  33. ('<blockquote>'.$this->build($str).'</blockquote>'."\n\n"):'';
  34. }
  35.  
  36. /**
  37. * Process whitespace-prefixed code block
  38. * @return string
  39. * @param $str string
  40. **/
  41. protected function _pre($str) {
  42. $str=preg_replace('/(?<=^|\n)(?: {4}|\t)(.+?(?:\n+|$))/','\1',
  43. $this->esc($str));
  44. return strlen($str)?
  45. ('<pre><code>'.
  46. $this->esc($this->snip($str)).
  47. '</code></pre>'."\n\n"):
  48. '';
  49. }
  50.  
  51. /**
  52. * Process fenced code block
  53. * @return string
  54. * @param $hint string
  55. * @param $str string
  56. **/
  57. protected function _fence($hint,$str) {
  58. $str=$this->snip($str);
  59. $fw=Base::instance();
  60. if ($fw->get('HIGHLIGHT')) {
  61. switch (strtolower($hint)) {
  62. case 'php':
  63. $str=$fw->highlight($str);
  64. break;
  65. case 'apache':
  66. preg_match_all('/(?<=^|\n)(\h*)'.
  67. '(?:(<\/?)(\w+)((?:\h+[^>]+)*)(>)|'.
  68. '(?:(\w+)(\h.+?)))(\h*(?:\n+|$))/',
  69. $str,$matches,PREG_SET_ORDER);
  70. $out='';
  71. foreach ($matches as $match)
  72. $out.=$match[1].
  73. ($match[3]?
  74. ('<span class="section">'.
  75. $this->esc($match[2]).$match[3].
  76. '</span>'.
  77. ($match[4]?
  78. ('<span class="data">'.
  79. $this->esc($match[4]).
  80. '</span>'):
  81. '').
  82. '<span class="section">'.
  83. $this->esc($match[5]).
  84. '</span>'):
  85. ('<span class="directive">'.
  86. $match[6].
  87. '</span>'.
  88. '<span class="data">'.
  89. $this->esc($match[7]).
  90. '</span>')).
  91. $match[8];
  92. $str='<code>'.$out.'</code>';
  93. break;
  94. case 'html':
  95. preg_match_all(
  96. '/(?:(?:<(\/?)(\w+)'.
  97. '((?:\h+(?:\w+\h*=\h*)?".+?"|[^>]+)*|'.
  98. '\h+.+?)(\h*\/?)>)|(.+?))/s',
  99. $str,$matches,PREG_SET_ORDER
  100. );
  101. $out='';
  102. foreach ($matches as $match) {
  103. if ($match[2]) {
  104. $out.='<span class="xml_tag">&lt;'.
  105. $match[1].$match[2].'</span>';
  106. if ($match[3]) {
  107. preg_match_all(
  108. '/(?:\h+(?:(?:(\w+)\h*=\h*)?'.
  109. '(".+?")|(.+)))/',
  110. $match[3],$parts,PREG_SET_ORDER
  111. );
  112. foreach ($parts as $part)
  113. $out.=' '.
  114. (empty($part[3])?
  115. ((empty($part[1])?
  116. '':
  117. ('<span class="xml_attr">'.
  118. $part[1].'</span>=')).
  119. '<span class="xml_data">'.
  120. $part[2].'</span>'):
  121. ('<span class="xml_tag">'.
  122. $part[3].'</span>'));
  123. }
  124. $out.='<span class="xml_tag">'.
  125. $match[4].'&gt;</span>';
  126. }
  127. else
  128. $out.=$this->esc($match[5]);
  129. }
  130. $str='<code>'.$out.'</code>';
  131. break;
  132. case 'ini':
  133. preg_match_all(
  134. '/(?<=^|\n)(?:'.
  135. '(;[^\n]*)|(?:<\?php.+?\?>?)|'.
  136. '(?:\[(.+?)\])|'.
  137. '(.+?)\h*=\h*'.
  138. '((?:\\\\\h*\r?\n|.+?)*)'.
  139. ')((?:\r?\n)+|$)/',
  140. $str,$matches,PREG_SET_ORDER
  141. );
  142. $out='';
  143. foreach ($matches as $match) {
  144. if ($match[1])
  145. $out.='<span class="comment">'.$match[1].
  146. '</span>';
  147. elseif ($match[2])
  148. $out.='<span class="ini_section">['.$match[2].']'.
  149. '</span>';
  150. elseif ($match[3])
  151. $out.='<span class="ini_key">'.$match[3].
  152. '</span>='.
  153. ($match[4]?
  154. ('<span class="ini_value">'.
  155. $match[4].'</span>'):'');
  156. else
  157. $out.=$match[0];
  158. if (isset($match[5]))
  159. $out.=$match[5];
  160. }
  161. $str='<code>'.$out.'</code>';
  162. break;
  163. default:
  164. $str='<code>'.$this->esc($str).'</code>';
  165. break;
  166. }
  167. }
  168. else
  169. $str='<code>'.$this->esc($str).'</code>';
  170. return '<pre>'.$str.'</pre>'."\n\n";
  171. }
  172.  
  173. /**
  174. * Process horizontal rule
  175. * @return string
  176. **/
  177. protected function _hr() {
  178. return '<hr />'."\n\n";
  179. }
  180.  
  181. /**
  182. * Process atx-style heading
  183. * @return string
  184. * @param $type string
  185. * @param $str string
  186. **/
  187. protected function _atx($type,$str) {
  188. $level=strlen($type);
  189. return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
  190. $this->scan($str).'</h'.$level.'>'."\n\n";
  191. }
  192.  
  193. /**
  194. * Process setext-style heading
  195. * @return string
  196. * @param $str string
  197. * @param $type string
  198. **/
  199. protected function _setext($str,$type) {
  200. $level=strpos('=-',$type)+1;
  201. return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
  202. $this->scan($str).'</h'.$level.'>'."\n\n";
  203. }
  204.  
  205. /**
  206. * Process ordered/unordered list
  207. * @return string
  208. * @param $str string
  209. **/
  210. protected function _li($str) {
  211. // Initialize list parser
  212. $len=strlen($str);
  213. $ptr=0;
  214. $dst='';
  215. $first=TRUE;
  216. $tight=TRUE;
  217. $type='ul';
  218. // Main loop
  219. while ($ptr<$len) {
  220. if (preg_match('/^\h*[*-](?:\h?[*-]){2,}(?:\n+|$)/',
  221. substr($str,$ptr),$match)) {
  222. $ptr+=strlen($match[0]);
  223. // Embedded horizontal rule
  224. return (strlen($dst)?
  225. ('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'').
  226. '<hr />'."\n\n".$this->build(substr($str,$ptr));
  227. }
  228. elseif (preg_match('/(?<=^|\n)([*+-]|\d+\.)\h'.
  229. '(.+?(?:\n+|$))((?:(?: {4}|\t)+.+?(?:\n+|$))*)/s',
  230. substr($str,$ptr),$match)) {
  231. $match[3]=preg_replace('/(?<=^|\n)(?: {4}|\t)/','',$match[3]);
  232. $found=FALSE;
  233. foreach (array_slice($this->blocks,0,-1) as $regex)
  234. if (preg_match($regex,$match[3])) {
  235. $found=TRUE;
  236. break;
  237. }
  238. // List
  239. if ($first) {
  240. // First pass
  241. if (is_numeric($match[1]))
  242. $type='ol';
  243. if (preg_match('/\n{2,}$/',$match[2].
  244. ($found?'':$match[3])))
  245. // Loose structure; Use paragraphs
  246. $tight=FALSE;
  247. $first=FALSE;
  248. }
  249. // Strip leading whitespaces
  250. $ptr+=strlen($match[0]);
  251. $tmp=$this->snip($match[2].$match[3]);
  252. if ($tight) {
  253. if ($found)
  254. $tmp=$match[2].$this->build($this->snip($match[3]));
  255. }
  256. else
  257. $tmp=$this->build($tmp);
  258. $dst.='<li>'.$this->scan(trim($tmp)).'</li>'."\n";
  259. }
  260. }
  261. return strlen($dst)?
  262. ('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'';
  263. }
  264.  
  265. /**
  266. * Ignore raw HTML
  267. * @return string
  268. * @param $str string
  269. **/
  270. protected function _raw($str) {
  271. return $str;
  272. }
  273.  
  274. /**
  275. * Process paragraph
  276. * @return string
  277. * @param $str string
  278. **/
  279. protected function _p($str) {
  280. $str=trim($str);
  281. if (strlen($str)) {
  282. if (preg_match('/^(.+?\n)([>#].+)$/s',$str,$parts))
  283. return $this->_p($parts[1]).$this->build($parts[2]);
  284. $self=$this;
  285. $str=preg_replace_callback(
  286. '/([^<>\[]+)?(<[\?%].+?[\?%]>|<.+?>|\[.+?\]\s*\(.+?\))|'.
  287. '(.+)/s',
  288. function($expr) use($self) {
  289. $tmp='';
  290. if (isset($expr[4]))
  291. $tmp.=$self->esc($expr[4]);
  292. else {
  293. if (isset($expr[1]))
  294. $tmp.=$self->esc($expr[1]);
  295. $tmp.=$expr[2];
  296. if (isset($expr[3]))
  297. $tmp.=$self->esc($expr[3]);
  298. }
  299. return $tmp;
  300. },
  301. $str
  302. );
  303. return '<p>'.$this->scan($str).'</p>'."\n\n";
  304. }
  305. return '';
  306. }
  307.  
  308. /**
  309. * Process strong/em/strikethrough spans
  310. * @return string
  311. * @param $str string
  312. **/
  313. protected function _text($str) {
  314. $tmp='';
  315. while ($str!=$tmp)
  316. $str=preg_replace_callback(
  317. '/(?<!\\\\)([*_]{1,3})(.*?)(?!\\\\)\1(?=[\s[:punct:]]|$)/',
  318. function($expr) {
  319. switch (strlen($expr[1])) {
  320. case 1:
  321. return '<em>'.$expr[2].'</em>';
  322. case 2:
  323. return '<strong>'.$expr[2].'</strong>';
  324. case 3:
  325. return '<strong><em>'.$expr[2].'</em></strong>';
  326. }
  327. },
  328. preg_replace(
  329. '/(?<!\\\\)~~(.*?)(?!\\\\)~~(?=[\s[:punct:]]|$)/',
  330. '<del>\1</del>',
  331. $tmp=$str
  332. )
  333. );
  334. return $str;
  335. }
  336.  
  337. /**
  338. * Process image span
  339. * @return string
  340. * @param $str string
  341. **/
  342. protected function _img($str) {
  343. $self=$this;
  344. return preg_replace_callback(
  345. '/!(?:\[(.+?)\])?\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
  346. function($expr) use($self) {
  347. return '<img src="'.$expr[2].'"'.
  348. (empty($expr[1])?
  349. '':
  350. (' alt="'.$self->esc($expr[1]).'"')).
  351. (empty($expr[3])?
  352. '':
  353. (' title="'.$self->esc($expr[3]).'"')).' />';
  354. },
  355. $str
  356. );
  357. }
  358.  
  359. /**
  360. * Process anchor span
  361. * @return string
  362. * @param $str string
  363. **/
  364. protected function _a($str) {
  365. $self=$this;
  366. return preg_replace_callback(
  367. '/(?<!\\\\)\[(.+?)(?!\\\\)\]\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
  368. function($expr) use($self) {
  369. return '<a href="'.$self->esc($expr[2]).'"'.
  370. (empty($expr[3])?
  371. '':
  372. (' title="'.$self->esc($expr[3]).'"')).
  373. '>'.$self->scan($expr[1]).'</a>';
  374. },
  375. $str
  376. );
  377. }
  378.  
  379. /**
  380. * Auto-convert links
  381. * @return string
  382. * @param $str string
  383. **/
  384. protected function _auto($str) {
  385. $self=$this;
  386. return preg_replace_callback(
  387. '/`.*?<(.+?)>.*?`|<(.+?)>/',
  388. function($expr) use($self) {
  389. if (empty($expr[1]) && parse_url($expr[2],PHP_URL_SCHEME)) {
  390. $expr[2]=$self->esc($expr[2]);
  391. return '<a href="'.$expr[2].'">'.$expr[2].'</a>';
  392. }
  393. return $expr[0];
  394. },
  395. $str
  396. );
  397. }
  398.  
  399. /**
  400. * Process code span
  401. * @return string
  402. * @param $str string
  403. **/
  404. protected function _code($str) {
  405. $self=$this;
  406. return preg_replace_callback(
  407. '/`` (.+?) ``|(?<!\\\\)`(.+?)(?!\\\\)`/',
  408. function($expr) use($self) {
  409. return '<code>'.
  410. $self->esc(empty($expr[1])?$expr[2]:$expr[1]).'</code>';
  411. },
  412. $str
  413. );
  414. }
  415.  
  416. /**
  417. * Convert characters to HTML entities
  418. * @return string
  419. * @param $str string
  420. **/
  421. function esc($str) {
  422. if (!$this->special)
  423. $this->special=array(
  424. '...'=>'&hellip;',
  425. '(tm)'=>'&trade;',
  426. '(r)'=>'&reg;',
  427. '(c)'=>'&copy;'
  428. );
  429. foreach ($this->special as $key=>$val)
  430. $str=preg_replace('/'.preg_quote($key,'/').'/i',$val,$str);
  431. return htmlspecialchars($str,ENT_COMPAT,
  432. Base::instance()->get('ENCODING'),FALSE);
  433. }
  434.  
  435. /**
  436. * Reduce multiple line feeds
  437. * @return string
  438. * @param $str string
  439. **/
  440. protected function snip($str) {
  441. return preg_replace('/(?:(?<=\n)\n+)|\n+$/',"\n",$str);
  442. }
  443.  
  444. /**
  445. * Scan line for convertible spans
  446. * @return string
  447. * @param $str string
  448. **/
  449. function scan($str) {
  450. $inline=array('img','a','text','auto','code');
  451. foreach ($inline as $func)
  452. $str=$this->{'_'.$func}($str);
  453. return $str;
  454. }
  455.  
  456. /**
  457. * Assemble blocks
  458. * @return string
  459. * @param $str string
  460. **/
  461. protected function build($str) {
  462. if (!$this->blocks) {
  463. // Regexes for capturing entire blocks
  464. $this->blocks=array(
  465. 'blockquote'=>'/^(?:\h?>\h?.*?(?:\n+|$))+/',
  466. 'pre'=>'/^(?:(?: {4}|\t).+?(?:\n+|$))+/',
  467. 'fence'=>'/^`{3}\h*(\w+)?.*?[^\n]*\n+(.+?)`{3}[^\n]*'.
  468. '(?:\n+|$)/s',
  469. 'hr'=>'/^\h*[*_-](?:\h?[\*_-]){2,}\h*(?:\n+|$)/',
  470. 'atx'=>'/^\h*(#{1,6})\h?(.+?)\h*(?:#.*)?(?:\n+|$)/',
  471. 'setext'=>'/^\h*(.+?)\h*\n([=-])+\h*(?:\n+|$)/',
  472. 'li'=>'/^(?:(?:[*+-]|\d+\.)\h.+?(?:\n+|$)'.
  473. '(?:(?: {4}|\t)+.+?(?:\n+|$))*)+/s',
  474. 'raw'=>'/^((?:<!--.+?-->|'.
  475. '<(address|article|aside|audio|blockquote|canvas|dd|'.
  476. 'div|dl|fieldset|figcaption|figure|footer|form|h\d|'.
  477. 'header|hgroup|hr|noscript|object|ol|output|p|pre|'.
  478. 'section|table|tfoot|ul|video).*?'.
  479. '(?:\/>|>(?:(?>[^><]+)|(?R))*<\/\2>))'.
  480. '\h*(?:\n{2,}|\n*$)|<[\?%].+?[\?%]>\h*(?:\n?$|\n*))/s',
  481. 'p'=>'/^(.+?(?:\n{2,}|\n*$))/s'
  482. );
  483. }
  484. $self=$this;
  485. // Treat lines with nothing but whitespaces as empty lines
  486. $str=preg_replace('/\n\h+(?=\n)/',"\n",$str);
  487. // Initialize block parser
  488. $len=strlen($str);
  489. $ptr=0;
  490. $dst='';
  491. // Main loop
  492. while ($ptr<$len) {
  493. if (preg_match('/^ {0,3}\[([^\[\]]+)\]:\s*<?(.*?)>?\s*'.
  494. '(?:"([^\n]*)")?(?:\n+|$)/s',substr($str,$ptr),$match)) {
  495. // Reference-style link; Backtrack
  496. $ptr+=strlen($match[0]);
  497. $tmp='';
  498. // Catch line breaks in title attribute
  499. $ref=preg_replace('/\h/','\s',preg_quote($match[1],'/'));
  500. while ($dst!=$tmp) {
  501. $dst=preg_replace_callback(
  502. '/(?<!\\\\)\[('.$ref.')(?!\\\\)\]\s*\[\]|'.
  503. '(!?)(?:\[([^\[\]]+)\]\s*)?'.
  504. '(?<!\\\\)\[('.$ref.')(?!\\\\)\]/',
  505. function($expr) use($match,$self) {
  506. return (empty($expr[2]))?
  507. // Anchor
  508. ('<a href="'.$self->esc($match[2]).'"'.
  509. (empty($match[3])?
  510. '':
  511. (' title="'.
  512. $self->esc($match[3]).'"')).'>'.
  513. // Link
  514. $self->scan(
  515. empty($expr[3])?
  516. (empty($expr[1])?
  517. $expr[4]:
  518. $expr[1]):
  519. $expr[3]
  520. ).'</a>'):
  521. // Image
  522. ('<img src="'.$match[2].'"'.
  523. (empty($expr[2])?
  524. '':
  525. (' alt="'.
  526. $self->esc($expr[3]).'"')).
  527. (empty($match[3])?
  528. '':
  529. (' title="'.
  530. $self->esc($match[3]).'"')).
  531. ' />');
  532. },
  533. $tmp=$dst
  534. );
  535. }
  536. }
  537. else
  538. foreach ($this->blocks as $func=>$regex)
  539. if (preg_match($regex,substr($str,$ptr),$match)) {
  540. $ptr+=strlen($match[0]);
  541. $dst.=call_user_func_array(
  542. array($this,'_'.$func),
  543. count($match)>1?array_slice($match,1):$match
  544. );
  545. break;
  546. }
  547. }
  548. return $dst;
  549. }
  550.  
  551. /**
  552. * Render HTML equivalent of markdown
  553. * @return string
  554. * @param $txt string
  555. **/
  556. function convert($txt) {
  557. $txt=preg_replace_callback(
  558. '/(<code.*?>.+?<\/code>|'.
  559. '<[^>\n]+>|\([^\n\)]+\)|"[^"\n]+")|'.
  560. '\\\\(.)/s',
  561. function($expr) {
  562. // Process escaped characters
  563. return empty($expr[1])?$expr[2]:$expr[1];
  564. },
  565. $this->build(preg_replace('/\r\n|\r/',"\n",$txt))
  566. );
  567. return $this->snip($txt);
  568. }
  569.  
  570. }