markdown.php (119729B)
1 <?php 2 // Markdown extension, https://github.com/annaesvensson/yellow-markdown 3 4 class YellowMarkdown { 5 const VERSION = "0.9.8"; 6 public $yellow; // access to API 7 8 // Handle initialisation 9 public function onLoad($yellow) { 10 $this->yellow = $yellow; 11 } 12 13 // Handle page content in raw format 14 public function onParseContentRaw($page, $text) { 15 $markdown = new YellowMarkdownParser($this->yellow, $page); 16 $text = $markdown->transform($text); 17 $text = $this->yellow->lookup->normaliseData($text, "html"); 18 return $text; 19 } 20 } 21 22 // PHP Markdown Lib 23 // Copyright (c) 2004-2021 Michel Fortin 24 // <https://michelf.ca/> 25 // All rights reserved. 26 // 27 // Original Markdown 28 // Copyright (c) 2004-2006 John Gruber 29 // <https://daringfireball.net/> 30 // All rights reserved. 31 // 32 // Redistribution and use in source and binary forms, with or without 33 // modification, are permitted provided that the following conditions are 34 // met: 35 // 36 // * Redistributions of source code must retain the above copyright notice, 37 // this list of conditions and the following disclaimer. 38 // 39 // * Redistributions in binary form must reproduce the above copyright 40 // notice, this list of conditions and the following disclaimer in the 41 // documentation and/or other materials provided with the distribution. 42 // 43 // * Neither the name "Markdown" nor the names of its contributors may 44 // be used to endorse or promote products derived from this software 45 // without specific prior written permission. 46 // 47 // This software is provided by the copyright holders and contributors "as 48 // is" and any express or implied warranties, including, but not limited 49 // to, the implied warranties of merchantability and fitness for a 50 // particular purpose are disclaimed. In no event shall the copyright owner 51 // or contributors be liable for any direct, indirect, incidental, special, 52 // exemplary, or consequential damages (including, but not limited to, 53 // procurement of substitute goods or services; loss of use, data, or 54 // profits; or business interruption) however caused and on any theory of 55 // liability, whether in contract, strict liability, or tort (including 56 // negligence or otherwise) arising in any way out of the use of this 57 // software, even if advised of the possibility of such damage. 58 59 class MarkdownParser { 60 /** 61 * Define the package version 62 * @var string 63 */ 64 const MARKDOWNLIB_VERSION = "1.9.1"; 65 66 /** 67 * Simple function interface - Initialize the parser and return the result 68 * of its transform method. This will work fine for derived classes too. 69 * 70 * @api 71 * 72 * @param string $text 73 * @return string 74 */ 75 public static function defaultTransform($text) { 76 // Take parser class on which this function was called. 77 $parser_class = \get_called_class(); 78 79 // Try to take parser from the static parser list 80 static $parser_list; 81 $parser =& $parser_list[$parser_class]; 82 83 // Create the parser it not already set 84 if (!$parser) { 85 $parser = new $parser_class; 86 } 87 88 // Transform text using parser. 89 return $parser->transform($text); 90 } 91 92 /** 93 * Configuration variables 94 */ 95 96 /** 97 * Change to ">" for HTML output. 98 * @var string 99 */ 100 public $empty_element_suffix = " />"; 101 102 /** 103 * The width of indentation of the output markup 104 * @var int 105 */ 106 public $tab_width = 4; 107 108 /** 109 * Change to `true` to disallow markup or entities. 110 * @var boolean 111 */ 112 public $no_markup = false; 113 public $no_entities = false; 114 115 116 /** 117 * Change to `true` to enable line breaks on \n without two trailling spaces 118 * @var boolean 119 */ 120 public $hard_wrap = false; 121 122 /** 123 * Predefined URLs and titles for reference links and images. 124 * @var array 125 */ 126 public $predef_urls = array(); 127 public $predef_titles = array(); 128 129 /** 130 * Optional filter function for URLs 131 * @var callable|null 132 */ 133 public $url_filter_func = null; 134 135 /** 136 * Optional header id="" generation callback function. 137 * @var callable|null 138 */ 139 public $header_id_func = null; 140 141 /** 142 * Optional function for converting code block content to HTML 143 * @var callable|null 144 */ 145 public $code_block_content_func = null; 146 147 /** 148 * Optional function for converting code span content to HTML. 149 * @var callable|null 150 */ 151 public $code_span_content_func = null; 152 153 /** 154 * Class attribute to toggle "enhanced ordered list" behaviour 155 * setting this to true will allow ordered lists to start from the index 156 * number that is defined first. 157 * 158 * For example: 159 * 2. List item two 160 * 3. List item three 161 * 162 * Becomes: 163 * <ol start="2"> 164 * <li>List item two</li> 165 * <li>List item three</li> 166 * </ol> 167 * 168 * @var bool 169 */ 170 public $enhanced_ordered_list = false; 171 172 /** 173 * Parser implementation 174 */ 175 176 /** 177 * Regex to match balanced [brackets]. 178 * Needed to insert a maximum bracked depth while converting to PHP. 179 * @var int 180 */ 181 protected $nested_brackets_depth = 6; 182 protected $nested_brackets_re; 183 184 protected $nested_url_parenthesis_depth = 4; 185 protected $nested_url_parenthesis_re; 186 187 /** 188 * Table of hash values for escaped characters: 189 * @var string 190 */ 191 protected $escape_chars = '\`*_{}[]()>#+-.!'; 192 protected $escape_chars_re; 193 194 /** 195 * Constructor function. Initialize appropriate member variables. 196 * @return void 197 */ 198 public function __construct() { 199 $this->_initDetab(); 200 $this->prepareItalicsAndBold(); 201 202 $this->nested_brackets_re = 203 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 204 str_repeat('\])*', $this->nested_brackets_depth); 205 206 $this->nested_url_parenthesis_re = 207 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 208 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 209 210 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 211 212 // Sort document, block, and span gamut in ascendent priority order. 213 asort($this->document_gamut); 214 asort($this->block_gamut); 215 asort($this->span_gamut); 216 } 217 218 219 /** 220 * Internal hashes used during transformation. 221 * @var array 222 */ 223 protected $urls = array(); 224 protected $titles = array(); 225 protected $html_hashes = array(); 226 227 /** 228 * Status flag to avoid invalid nesting. 229 * @var boolean 230 */ 231 protected $in_anchor = false; 232 233 /** 234 * Status flag to avoid invalid nesting. 235 * @var boolean 236 */ 237 protected $in_emphasis_processing = false; 238 239 /** 240 * Called before the transformation process starts to setup parser states. 241 * @return void 242 */ 243 protected function setup() { 244 // Clear global hashes. 245 $this->urls = $this->predef_urls; 246 $this->titles = $this->predef_titles; 247 $this->html_hashes = array(); 248 $this->in_anchor = false; 249 $this->in_emphasis_processing = false; 250 } 251 252 /** 253 * Called after the transformation process to clear any variable which may 254 * be taking up memory unnecessarly. 255 * @return void 256 */ 257 protected function teardown() { 258 $this->urls = array(); 259 $this->titles = array(); 260 $this->html_hashes = array(); 261 } 262 263 /** 264 * Main function. Performs some preprocessing on the input text and pass 265 * it through the document gamut. 266 * 267 * @api 268 * 269 * @param string $text 270 * @return string 271 */ 272 public function transform($text) { 273 $this->setup(); 274 275 # Remove UTF-8 BOM and marker character in input, if present. 276 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 277 278 # Standardize line endings: 279 # DOS to Unix and Mac to Unix 280 $text = preg_replace('{\r\n?}', "\n", $text); 281 282 # Make sure $text ends with a couple of newlines: 283 $text .= "\n\n"; 284 285 # Convert all tabs to spaces. 286 $text = $this->detab($text); 287 288 # Turn block-level HTML blocks into hash entries 289 $text = $this->hashHTMLBlocks($text); 290 291 # Strip any lines consisting only of spaces and tabs. 292 # This makes subsequent regexen easier to write, because we can 293 # match consecutive blank lines with /\n+/ instead of something 294 # contorted like /[ ]*\n+/ . 295 $text = preg_replace('/^[ ]+$/m', '', $text); 296 297 # Run document gamut methods. 298 foreach ($this->document_gamut as $method => $priority) { 299 $text = $this->$method($text); 300 } 301 302 $this->teardown(); 303 304 return $text . "\n"; 305 } 306 307 /** 308 * Define the document gamut 309 * @var array 310 */ 311 protected $document_gamut = array( 312 // Strip link definitions, store in hashes. 313 "stripLinkDefinitions" => 20, 314 "runBasicBlockGamut" => 30, 315 ); 316 317 /** 318 * Strips link definitions from text, stores the URLs and titles in 319 * hash references 320 * @param string $text 321 * @return string 322 */ 323 protected function stripLinkDefinitions($text) { 324 325 $less_than_tab = $this->tab_width - 1; 326 327 // Link defs are in the form: ^[id]: url "optional title" 328 $text = preg_replace_callback('{ 329 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 330 [ ]* 331 \n? # maybe *one* newline 332 [ ]* 333 (?: 334 <(.+?)> # url = $2 335 | 336 (\S+?) # url = $3 337 ) 338 [ ]* 339 \n? # maybe one newline 340 [ ]* 341 (?: 342 (?<=\s) # lookbehind for whitespace 343 ["(] 344 (.*?) # title = $4 345 [")] 346 [ ]* 347 )? # title is optional 348 (?:\n+|\Z) 349 }xm', 350 array($this, '_stripLinkDefinitions_callback'), 351 $text 352 ); 353 return $text; 354 } 355 356 /** 357 * The callback to strip link definitions 358 * @param array $matches 359 * @return string 360 */ 361 protected function _stripLinkDefinitions_callback($matches) { 362 $link_id = strtolower($matches[1]); 363 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 364 $this->urls[$link_id] = $url; 365 $this->titles[$link_id] =& $matches[4]; 366 return ''; // String that will replace the block 367 } 368 369 /** 370 * Hashify HTML blocks 371 * @param string $text 372 * @return string 373 */ 374 protected function hashHTMLBlocks($text) { 375 if ($this->no_markup) { 376 return $text; 377 } 378 379 $less_than_tab = $this->tab_width - 1; 380 381 /** 382 * Hashify HTML blocks: 383 * 384 * We only want to do this for block-level HTML tags, such as headers, 385 * lists, and tables. That's because we still want to wrap <p>s around 386 * "paragraphs" that are wrapped in non-block-level tags, such as 387 * anchors, phrase emphasis, and spans. The list of tags we're looking 388 * for is hard-coded: 389 * 390 * * List "a" is made of tags which can be both inline or block-level. 391 * These will be treated block-level when the start tag is alone on 392 * its line, otherwise they're not matched here and will be taken as 393 * inline later. 394 * * List "b" is made of tags which are always block-level; 395 */ 396 $block_tags_a_re = 'ins|del'; 397 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 398 'script|noscript|style|form|fieldset|iframe|math|svg|'. 399 'article|section|nav|aside|hgroup|header|footer|'. 400 'figure|details|summary'; 401 402 // Regular expression for the content of a block tag. 403 $nested_tags_level = 4; 404 $attr = ' 405 (?> # optional tag attributes 406 \s # starts with whitespace 407 (?> 408 [^>"/]+ # text outside quotes 409 | 410 /+(?!>) # slash not followed by ">" 411 | 412 "[^"]*" # text inside double quotes (tolerate ">") 413 | 414 \'[^\']*\' # text inside single quotes (tolerate ">") 415 )* 416 )? 417 '; 418 $content = 419 str_repeat(' 420 (?> 421 [^<]+ # content without tag 422 | 423 <\2 # nested opening tag 424 '.$attr.' # attributes 425 (?> 426 /> 427 | 428 >', $nested_tags_level). // end of opening tag 429 '.*?'. // last level nested tag content 430 str_repeat(' 431 </\2\s*> # closing nested tag 432 ) 433 | 434 <(?!/\2\s*> # other tags with a different name 435 ) 436 )*', 437 $nested_tags_level); 438 $content2 = str_replace('\2', '\3', $content); 439 440 /** 441 * First, look for nested blocks, e.g.: 442 * <div> 443 * <div> 444 * tags for inner block must be indented. 445 * </div> 446 * </div> 447 * 448 * The outermost tags must start at the left margin for this to match, 449 * and the inner nested divs must be indented. 450 * We need to do this before the next, more liberal match, because the 451 * next match will start at the first `<div>` and stop at the 452 * first `</div>`. 453 */ 454 $text = preg_replace_callback('{(?> 455 (?> 456 (?<=\n) # Starting on its own line 457 | # or 458 \A\n? # the at beginning of the doc 459 ) 460 ( # save in $1 461 462 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 463 # in between. 464 465 [ ]{0,'.$less_than_tab.'} 466 <('.$block_tags_b_re.')# start tag = $2 467 '.$attr.'> # attributes followed by > and \n 468 '.$content.' # content, support nesting 469 </\2> # the matching end tag 470 [ ]* # trailing spaces/tabs 471 (?=\n+|\Z) # followed by a newline or end of document 472 473 | # Special version for tags of group a. 474 475 [ ]{0,'.$less_than_tab.'} 476 <('.$block_tags_a_re.')# start tag = $3 477 '.$attr.'>[ ]*\n # attributes followed by > 478 '.$content2.' # content, support nesting 479 </\3> # the matching end tag 480 [ ]* # trailing spaces/tabs 481 (?=\n+|\Z) # followed by a newline or end of document 482 483 | # Special case just for <hr />. It was easier to make a special 484 # case than to make the other regex more complicated. 485 486 [ ]{0,'.$less_than_tab.'} 487 <(hr) # start tag = $2 488 '.$attr.' # attributes 489 /?> # the matching end tag 490 [ ]* 491 (?=\n{2,}|\Z) # followed by a blank line or end of document 492 493 | # Special case for standalone HTML comments: 494 495 [ ]{0,'.$less_than_tab.'} 496 (?s: 497 <!-- .*? --> 498 ) 499 [ ]* 500 (?=\n{2,}|\Z) # followed by a blank line or end of document 501 502 | # PHP and ASP-style processor instructions (<? and <%) 503 504 [ ]{0,'.$less_than_tab.'} 505 (?s: 506 <([?%]) # $2 507 .*? 508 \2> 509 ) 510 [ ]* 511 (?=\n{2,}|\Z) # followed by a blank line or end of document 512 513 ) 514 )}Sxmi', 515 array($this, '_hashHTMLBlocks_callback'), 516 $text 517 ); 518 519 return $text; 520 } 521 522 /** 523 * The callback for hashing HTML blocks 524 * @param string $matches 525 * @return string 526 */ 527 protected function _hashHTMLBlocks_callback($matches) { 528 $text = $matches[1]; 529 $key = $this->hashBlock($text); 530 return "\n\n$key\n\n"; 531 } 532 533 /** 534 * Called whenever a tag must be hashed when a function insert an atomic 535 * element in the text stream. Passing $text to through this function gives 536 * a unique text-token which will be reverted back when calling unhash. 537 * 538 * The $boundary argument specify what character should be used to surround 539 * the token. By convension, "B" is used for block elements that needs not 540 * to be wrapped into paragraph tags at the end, ":" is used for elements 541 * that are word separators and "X" is used in the general case. 542 * 543 * @param string $text 544 * @param string $boundary 545 * @return string 546 */ 547 protected function hashPart($text, $boundary = 'X') { 548 // Swap back any tag hash found in $text so we do not have to `unhash` 549 // multiple times at the end. 550 $text = $this->unhash($text); 551 552 // Then hash the block. 553 static $i = 0; 554 $key = "$boundary\x1A" . ++$i . $boundary; 555 $this->html_hashes[$key] = $text; 556 return $key; // String that will replace the tag. 557 } 558 559 /** 560 * Shortcut function for hashPart with block-level boundaries. 561 * @param string $text 562 * @return string 563 */ 564 protected function hashBlock($text) { 565 return $this->hashPart($text, 'B'); 566 } 567 568 /** 569 * Define the block gamut - these are all the transformations that form 570 * block-level tags like paragraphs, headers, and list items. 571 * @var array 572 */ 573 protected $block_gamut = array( 574 "doHeaders" => 10, 575 "doHorizontalRules" => 20, 576 "doLists" => 40, 577 "doCodeBlocks" => 50, 578 "doBlockQuotes" => 60, 579 ); 580 581 /** 582 * Run block gamut tranformations. 583 * 584 * We need to escape raw HTML in Markdown source before doing anything 585 * else. This need to be done for each block, and not only at the 586 * begining in the Markdown function since hashed blocks can be part of 587 * list items and could have been indented. Indented blocks would have 588 * been seen as a code block in a previous pass of hashHTMLBlocks. 589 * 590 * @param string $text 591 * @return string 592 */ 593 protected function runBlockGamut($text) { 594 $text = $this->hashHTMLBlocks($text); 595 return $this->runBasicBlockGamut($text); 596 } 597 598 /** 599 * Run block gamut tranformations, without hashing HTML blocks. This is 600 * useful when HTML blocks are known to be already hashed, like in the first 601 * whole-document pass. 602 * 603 * @param string $text 604 * @return string 605 */ 606 protected function runBasicBlockGamut($text) { 607 608 foreach ($this->block_gamut as $method => $priority) { 609 $text = $this->$method($text); 610 } 611 612 // Finally form paragraph and restore hashed blocks. 613 $text = $this->formParagraphs($text); 614 615 return $text; 616 } 617 618 /** 619 * Convert horizontal rules 620 * @param string $text 621 * @return string 622 */ 623 protected function doHorizontalRules($text) { 624 return preg_replace( 625 '{ 626 ^[ ]{0,3} # Leading space 627 ([-*_]) # $1: First marker 628 (?> # Repeated marker group 629 [ ]{0,2} # Zero, one, or two spaces. 630 \1 # Marker character 631 ){2,} # Group repeated at least twice 632 [ ]* # Tailing spaces 633 $ # End of line. 634 }mx', 635 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 636 $text 637 ); 638 } 639 640 /** 641 * These are all the transformations that occur *within* block-level 642 * tags like paragraphs, headers, and list items. 643 * @var array 644 */ 645 protected $span_gamut = array( 646 // Process character escapes, code spans, and inline HTML 647 // in one shot. 648 "parseSpan" => -30, 649 // Process anchor and image tags. Images must come first, 650 // because ![foo][f] looks like an anchor. 651 "doImages" => 10, 652 "doAnchors" => 20, 653 // Make links out of things like `<https://example.com/>` 654 // Must come after doAnchors, because you can use < and > 655 // delimiters in inline links like [this](<url>). 656 "doAutoLinks" => 30, 657 "encodeAmpsAndAngles" => 40, 658 "doItalicsAndBold" => 50, 659 "doHardBreaks" => 60, 660 ); 661 662 /** 663 * Run span gamut transformations 664 * @param string $text 665 * @return string 666 */ 667 protected function runSpanGamut($text) { 668 foreach ($this->span_gamut as $method => $priority) { 669 $text = $this->$method($text); 670 } 671 672 return $text; 673 } 674 675 /** 676 * Do hard breaks 677 * @param string $text 678 * @return string 679 */ 680 protected function doHardBreaks($text) { 681 if ($this->hard_wrap) { 682 return preg_replace_callback('/ *\n/', 683 array($this, '_doHardBreaks_callback'), $text); 684 } else { 685 return preg_replace_callback('/ {2,}\n/', 686 array($this, '_doHardBreaks_callback'), $text); 687 } 688 } 689 690 /** 691 * Trigger part hashing for the hard break (callback method) 692 * @param array $matches 693 * @return string 694 */ 695 protected function _doHardBreaks_callback($matches) { 696 return $this->hashPart("<br$this->empty_element_suffix\n"); 697 } 698 699 /** 700 * Turn Markdown link shortcuts into XHTML <a> tags. 701 * @param string $text 702 * @return string 703 */ 704 protected function doAnchors($text) { 705 if ($this->in_anchor) { 706 return $text; 707 } 708 $this->in_anchor = true; 709 710 // First, handle reference-style links: [link text] [id] 711 $text = preg_replace_callback('{ 712 ( # wrap whole match in $1 713 \[ 714 ('.$this->nested_brackets_re.') # link text = $2 715 \] 716 717 [ ]? # one optional space 718 (?:\n[ ]*)? # one optional newline followed by spaces 719 720 \[ 721 (.*?) # id = $3 722 \] 723 ) 724 }xs', 725 array($this, '_doAnchors_reference_callback'), $text); 726 727 // Next, inline-style links: [link text](url "optional title") 728 $text = preg_replace_callback('{ 729 ( # wrap whole match in $1 730 \[ 731 ('.$this->nested_brackets_re.') # link text = $2 732 \] 733 \( # literal paren 734 [ \n]* 735 (?: 736 <(.+?)> # href = $3 737 | 738 ('.$this->nested_url_parenthesis_re.') # href = $4 739 ) 740 [ \n]* 741 ( # $5 742 ([\'"]) # quote char = $6 743 (.*?) # Title = $7 744 \6 # matching quote 745 [ \n]* # ignore any spaces/tabs between closing quote and ) 746 )? # title is optional 747 \) 748 ) 749 }xs', 750 array($this, '_doAnchors_inline_callback'), $text); 751 752 // Last, handle reference-style shortcuts: [link text] 753 // These must come last in case you've also got [link text][1] 754 // or [link text](/foo) 755 $text = preg_replace_callback('{ 756 ( # wrap whole match in $1 757 \[ 758 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 759 \] 760 ) 761 }xs', 762 array($this, '_doAnchors_reference_callback'), $text); 763 764 $this->in_anchor = false; 765 return $text; 766 } 767 768 /** 769 * Callback method to parse referenced anchors 770 * @param string $matches 771 * @return string 772 */ 773 protected function _doAnchors_reference_callback($matches) { 774 $whole_match = $matches[1]; 775 $link_text = $matches[2]; 776 $link_id =& $matches[3]; 777 778 if ($link_id == "") { 779 // for shortcut links like [this][] or [this]. 780 $link_id = $link_text; 781 } 782 783 // lower-case and turn embedded newlines into spaces 784 $link_id = strtolower($link_id); 785 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 786 787 if (isset($this->urls[$link_id])) { 788 $url = $this->urls[$link_id]; 789 $url = $this->encodeURLAttribute($url); 790 791 $result = "<a href=\"$url\""; 792 if ( isset( $this->titles[$link_id] ) ) { 793 $title = $this->titles[$link_id]; 794 $title = $this->encodeAttribute($title); 795 $result .= " title=\"$title\""; 796 } 797 798 $link_text = $this->runSpanGamut($link_text); 799 $result .= ">$link_text</a>"; 800 $result = $this->hashPart($result); 801 } else { 802 $result = $whole_match; 803 } 804 return $result; 805 } 806 807 /** 808 * Callback method to parse inline anchors 809 * @param string $matches 810 * @return string 811 */ 812 protected function _doAnchors_inline_callback($matches) { 813 $link_text = $this->runSpanGamut($matches[2]); 814 $url = $matches[3] === '' ? $matches[4] : $matches[3]; 815 $title =& $matches[7]; 816 817 // If the URL was of the form <s p a c e s> it got caught by the HTML 818 // tag parser and hashed. Need to reverse the process before using 819 // the URL. 820 $unhashed = $this->unhash($url); 821 if ($unhashed !== $url) 822 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 823 824 $url = $this->encodeURLAttribute($url); 825 826 $result = "<a href=\"$url\""; 827 if (isset($title)) { 828 $title = $this->encodeAttribute($title); 829 $result .= " title=\"$title\""; 830 } 831 832 $link_text = $this->runSpanGamut($link_text); 833 $result .= ">$link_text</a>"; 834 835 return $this->hashPart($result); 836 } 837 838 /** 839 * Turn Markdown image shortcuts into <img> tags. 840 * @param string $text 841 * @return string 842 */ 843 protected function doImages($text) { 844 // First, handle reference-style labeled images: ![alt text][id] 845 $text = preg_replace_callback('{ 846 ( # wrap whole match in $1 847 !\[ 848 ('.$this->nested_brackets_re.') # alt text = $2 849 \] 850 851 [ ]? # one optional space 852 (?:\n[ ]*)? # one optional newline followed by spaces 853 854 \[ 855 (.*?) # id = $3 856 \] 857 858 ) 859 }xs', 860 array($this, '_doImages_reference_callback'), $text); 861 862 // Next, handle inline images:  863 // Don't forget: encode * and _ 864 $text = preg_replace_callback('{ 865 ( # wrap whole match in $1 866 !\[ 867 ('.$this->nested_brackets_re.') # alt text = $2 868 \] 869 \s? # One optional whitespace character 870 \( # literal paren 871 [ \n]* 872 (?: 873 <(\S*)> # src url = $3 874 | 875 ('.$this->nested_url_parenthesis_re.') # src url = $4 876 ) 877 [ \n]* 878 ( # $5 879 ([\'"]) # quote char = $6 880 (.*?) # title = $7 881 \6 # matching quote 882 [ \n]* 883 )? # title is optional 884 \) 885 ) 886 }xs', 887 array($this, '_doImages_inline_callback'), $text); 888 889 return $text; 890 } 891 892 /** 893 * Callback to parse references image tags 894 * @param array $matches 895 * @return string 896 */ 897 protected function _doImages_reference_callback($matches) { 898 $whole_match = $matches[1]; 899 $alt_text = $matches[2]; 900 $link_id = strtolower($matches[3]); 901 902 if ($link_id == "") { 903 $link_id = strtolower($alt_text); // for shortcut links like ![this][]. 904 } 905 906 $alt_text = $this->encodeAttribute($alt_text); 907 if (isset($this->urls[$link_id])) { 908 $url = $this->encodeURLAttribute($this->urls[$link_id]); 909 $result = "<img src=\"$url\" alt=\"$alt_text\""; 910 if (isset($this->titles[$link_id])) { 911 $title = $this->titles[$link_id]; 912 $title = $this->encodeAttribute($title); 913 $result .= " title=\"$title\""; 914 } 915 $result .= $this->empty_element_suffix; 916 $result = $this->hashPart($result); 917 } else { 918 // If there's no such link ID, leave intact: 919 $result = $whole_match; 920 } 921 922 return $result; 923 } 924 925 /** 926 * Callback to parse inline image tags 927 * @param array $matches 928 * @return string 929 */ 930 protected function _doImages_inline_callback($matches) { 931 $whole_match = $matches[1]; 932 $alt_text = $matches[2]; 933 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 934 $title =& $matches[7]; 935 936 $alt_text = $this->encodeAttribute($alt_text); 937 $url = $this->encodeURLAttribute($url); 938 $result = "<img src=\"$url\" alt=\"$alt_text\""; 939 if (isset($title)) { 940 $title = $this->encodeAttribute($title); 941 $result .= " title=\"$title\""; // $title already quoted 942 } 943 $result .= $this->empty_element_suffix; 944 945 return $this->hashPart($result); 946 } 947 948 /** 949 * Parse Markdown heading elements to HTML 950 * @param string $text 951 * @return string 952 */ 953 protected function doHeaders($text) { 954 /** 955 * Setext-style headers: 956 * Header 1 957 * ======== 958 * 959 * Header 2 960 * -------- 961 */ 962 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 963 array($this, '_doHeaders_callback_setext'), $text); 964 965 /** 966 * atx-style headers: 967 * # Header 1 968 * ## Header 2 969 * ## Header 2 with closing hashes ## 970 * ... 971 * ###### Header 6 972 */ 973 $text = preg_replace_callback('{ 974 ^(\#{1,6}) # $1 = string of #\'s 975 [ ]* 976 (.+?) # $2 = Header text 977 [ ]* 978 \#* # optional closing #\'s (not counted) 979 \n+ 980 }xm', 981 array($this, '_doHeaders_callback_atx'), $text); 982 983 return $text; 984 } 985 986 /** 987 * Setext header parsing callback 988 * @param array $matches 989 * @return string 990 */ 991 protected function _doHeaders_callback_setext($matches) { 992 // Terrible hack to check we haven't found an empty list item. 993 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) { 994 return $matches[0]; 995 } 996 997 $level = $matches[2][0] == '=' ? 1 : 2; 998 999 // ID attribute generation 1000 $idAtt = $this->_generateIdFromHeaderValue($matches[1]); 1001 1002 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>"; 1003 return "\n" . $this->hashBlock($block) . "\n\n"; 1004 } 1005 1006 /** 1007 * ATX header parsing callback 1008 * @param array $matches 1009 * @return string 1010 */ 1011 protected function _doHeaders_callback_atx($matches) { 1012 // ID attribute generation 1013 $idAtt = $this->_generateIdFromHeaderValue($matches[2]); 1014 1015 $level = strlen($matches[1]); 1016 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>"; 1017 return "\n" . $this->hashBlock($block) . "\n\n"; 1018 } 1019 1020 /** 1021 * If a header_id_func property is set, we can use it to automatically 1022 * generate an id attribute. 1023 * 1024 * This method returns a string in the form id="foo", or an empty string 1025 * otherwise. 1026 * @param string $headerValue 1027 * @return string 1028 */ 1029 protected function _generateIdFromHeaderValue($headerValue) { 1030 if (!is_callable($this->header_id_func)) { 1031 return ""; 1032 } 1033 1034 $idValue = call_user_func($this->header_id_func, $headerValue); 1035 if (!$idValue) { 1036 return ""; 1037 } 1038 1039 return ' id="' . $this->encodeAttribute($idValue) . '"'; 1040 } 1041 1042 /** 1043 * Form HTML ordered (numbered) and unordered (bulleted) lists. 1044 * @param string $text 1045 * @return string 1046 */ 1047 protected function doLists($text) { 1048 $less_than_tab = $this->tab_width - 1; 1049 1050 // Re-usable patterns to match list item bullets and number markers: 1051 $marker_ul_re = '[*+-]'; 1052 $marker_ol_re = '\d+[\.]'; 1053 1054 $markers_relist = array( 1055 $marker_ul_re => $marker_ol_re, 1056 $marker_ol_re => $marker_ul_re, 1057 ); 1058 1059 foreach ($markers_relist as $marker_re => $other_marker_re) { 1060 // Re-usable pattern to match any entirel ul or ol list: 1061 $whole_list_re = ' 1062 ( # $1 = whole list 1063 ( # $2 1064 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 1065 ('.$marker_re.') # $4 = first list item marker 1066 [ ]+ 1067 ) 1068 (?s:.+?) 1069 ( # $5 1070 \z 1071 | 1072 \n{2,} 1073 (?=\S) 1074 (?! # Negative lookahead for another list item marker 1075 [ ]* 1076 '.$marker_re.'[ ]+ 1077 ) 1078 | 1079 (?= # Lookahead for another kind of list 1080 \n 1081 \3 # Must have the same indentation 1082 '.$other_marker_re.'[ ]+ 1083 ) 1084 ) 1085 ) 1086 '; // mx 1087 1088 // We use a different prefix before nested lists than top-level lists. 1089 //See extended comment in _ProcessListItems(). 1090 1091 if ($this->list_level) { 1092 $text = preg_replace_callback('{ 1093 ^ 1094 '.$whole_list_re.' 1095 }mx', 1096 array($this, '_doLists_callback'), $text); 1097 } else { 1098 $text = preg_replace_callback('{ 1099 (?:(?<=\n)\n|\A\n?) # Must eat the newline 1100 '.$whole_list_re.' 1101 }mx', 1102 array($this, '_doLists_callback'), $text); 1103 } 1104 } 1105 1106 return $text; 1107 } 1108 1109 /** 1110 * List parsing callback 1111 * @param array $matches 1112 * @return string 1113 */ 1114 protected function _doLists_callback($matches) { 1115 // Re-usable patterns to match list item bullets and number markers: 1116 $marker_ul_re = '[*+-]'; 1117 $marker_ol_re = '\d+[\.]'; 1118 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 1119 $marker_ol_start_re = '[0-9]+'; 1120 1121 $list = $matches[1]; 1122 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 1123 1124 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 1125 1126 $list .= "\n"; 1127 $result = $this->processListItems($list, $marker_any_re); 1128 1129 $ol_start = 1; 1130 if ($this->enhanced_ordered_list) { 1131 // Get the start number for ordered list. 1132 if ($list_type == 'ol') { 1133 $ol_start_array = array(); 1134 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array); 1135 if ($ol_start_check){ 1136 $ol_start = $ol_start_array[0]; 1137 } 1138 } 1139 } 1140 1141 if ($ol_start > 1 && $list_type == 'ol'){ 1142 $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>"); 1143 } else { 1144 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 1145 } 1146 return "\n". $result ."\n\n"; 1147 } 1148 1149 /** 1150 * Nesting tracker for list levels 1151 * @var integer 1152 */ 1153 protected $list_level = 0; 1154 1155 /** 1156 * Process the contents of a single ordered or unordered list, splitting it 1157 * into individual list items. 1158 * @param string $list_str 1159 * @param string $marker_any_re 1160 * @return string 1161 */ 1162 protected function processListItems($list_str, $marker_any_re) { 1163 /** 1164 * The $this->list_level global keeps track of when we're inside a list. 1165 * Each time we enter a list, we increment it; when we leave a list, 1166 * we decrement. If it's zero, we're not in a list anymore. 1167 * 1168 * We do this because when we're not inside a list, we want to treat 1169 * something like this: 1170 * 1171 * I recommend upgrading to version 1172 * 8. Oops, now this line is treated 1173 * as a sub-list. 1174 * 1175 * As a single paragraph, despite the fact that the second line starts 1176 * with a digit-period-space sequence. 1177 * 1178 * Whereas when we're inside a list (or sub-list), that line will be 1179 * treated as the start of a sub-list. What a kludge, huh? This is 1180 * an aspect of Markdown's syntax that's hard to parse perfectly 1181 * without resorting to mind-reading. Perhaps the solution is to 1182 * change the syntax rules such that sub-lists must start with a 1183 * starting cardinal number; e.g. "1." or "a.". 1184 */ 1185 $this->list_level++; 1186 1187 // Trim trailing blank lines: 1188 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1189 1190 $list_str = preg_replace_callback('{ 1191 (\n)? # leading line = $1 1192 (^[ ]*) # leading whitespace = $2 1193 ('.$marker_any_re.' # list marker and space = $3 1194 (?:[ ]+|(?=\n)) # space only required if item is not empty 1195 ) 1196 ((?s:.*?)) # list item text = $4 1197 (?:(\n+(?=\n))|\n) # tailing blank line = $5 1198 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 1199 }xm', 1200 array($this, '_processListItems_callback'), $list_str); 1201 1202 $this->list_level--; 1203 return $list_str; 1204 } 1205 1206 /** 1207 * List item parsing callback 1208 * @param array $matches 1209 * @return string 1210 */ 1211 protected function _processListItems_callback($matches) { 1212 $item = $matches[4]; 1213 $leading_line =& $matches[1]; 1214 $leading_space =& $matches[2]; 1215 $marker_space = $matches[3]; 1216 $tailing_blank_line =& $matches[5]; 1217 1218 if ($leading_line || $tailing_blank_line || 1219 preg_match('/\n{2,}/', $item)) 1220 { 1221 // Replace marker with the appropriate whitespace indentation 1222 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 1223 $item = $this->runBlockGamut($this->outdent($item)."\n"); 1224 } else { 1225 // Recursion for sub-lists: 1226 $item = $this->doLists($this->outdent($item)); 1227 $item = $this->formParagraphs($item, false); 1228 } 1229 1230 return "<li>" . $item . "</li>\n"; 1231 } 1232 1233 /** 1234 * Process Markdown `<pre><code>` blocks. 1235 * @param string $text 1236 * @return string 1237 */ 1238 protected function doCodeBlocks($text) { 1239 $text = preg_replace_callback('{ 1240 (?:\n\n|\A\n?) 1241 ( # $1 = the code block -- one or more lines, starting with a space/tab 1242 (?> 1243 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 1244 .*\n+ 1245 )+ 1246 ) 1247 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 1248 }xm', 1249 array($this, '_doCodeBlocks_callback'), $text); 1250 1251 return $text; 1252 } 1253 1254 /** 1255 * Code block parsing callback 1256 * @param array $matches 1257 * @return string 1258 */ 1259 protected function _doCodeBlocks_callback($matches) { 1260 $codeblock = $matches[1]; 1261 1262 $codeblock = $this->outdent($codeblock); 1263 if (is_callable($this->code_block_content_func)) { 1264 $codeblock = call_user_func($this->code_block_content_func, $codeblock, ""); 1265 } else { 1266 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1267 } 1268 1269 # trim leading newlines and trailing newlines 1270 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1271 1272 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1273 return "\n\n" . $this->hashBlock($codeblock) . "\n\n"; 1274 } 1275 1276 /** 1277 * Create a code span markup for $code. Called from handleSpanToken. 1278 * @param string $code 1279 * @return string 1280 */ 1281 protected function makeCodeSpan($code) { 1282 if (is_callable($this->code_span_content_func)) { 1283 $code = call_user_func($this->code_span_content_func, $code); 1284 } else { 1285 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1286 } 1287 return $this->hashPart("<code>$code</code>"); 1288 } 1289 1290 /** 1291 * Define the emphasis operators with their regex matches 1292 * @var array 1293 */ 1294 protected $em_relist = array( 1295 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)', 1296 '*' => '(?<![\s*])\*(?!\*)', 1297 '_' => '(?<![\s_])_(?!_)', 1298 ); 1299 1300 /** 1301 * Define the strong operators with their regex matches 1302 * @var array 1303 */ 1304 protected $strong_relist = array( 1305 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)', 1306 '**' => '(?<![\s*])\*\*(?!\*)', 1307 '__' => '(?<![\s_])__(?!_)', 1308 ); 1309 1310 /** 1311 * Define the emphasis + strong operators with their regex matches 1312 * @var array 1313 */ 1314 protected $em_strong_relist = array( 1315 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)', 1316 '***' => '(?<![\s*])\*\*\*(?!\*)', 1317 '___' => '(?<![\s_])___(?!_)', 1318 ); 1319 1320 /** 1321 * Container for prepared regular expressions 1322 * @var array 1323 */ 1324 protected $em_strong_prepared_relist; 1325 1326 /** 1327 * Prepare regular expressions for searching emphasis tokens in any 1328 * context. 1329 * @return void 1330 */ 1331 protected function prepareItalicsAndBold() { 1332 foreach ($this->em_relist as $em => $em_re) { 1333 foreach ($this->strong_relist as $strong => $strong_re) { 1334 // Construct list of allowed token expressions. 1335 $token_relist = array(); 1336 if (isset($this->em_strong_relist["$em$strong"])) { 1337 $token_relist[] = $this->em_strong_relist["$em$strong"]; 1338 } 1339 $token_relist[] = $em_re; 1340 $token_relist[] = $strong_re; 1341 1342 // Construct master expression from list. 1343 $token_re = '{(' . implode('|', $token_relist) . ')}'; 1344 $this->em_strong_prepared_relist["$em$strong"] = $token_re; 1345 } 1346 } 1347 } 1348 1349 /** 1350 * Convert Markdown italics (emphasis) and bold (strong) to HTML 1351 * @param string $text 1352 * @return string 1353 */ 1354 protected function doItalicsAndBold($text) { 1355 if ($this->in_emphasis_processing) { 1356 return $text; // avoid reentrency 1357 } 1358 $this->in_emphasis_processing = true; 1359 1360 $token_stack = array(''); 1361 $text_stack = array(''); 1362 $em = ''; 1363 $strong = ''; 1364 $tree_char_em = false; 1365 1366 while (1) { 1367 // Get prepared regular expression for seraching emphasis tokens 1368 // in current context. 1369 $token_re = $this->em_strong_prepared_relist["$em$strong"]; 1370 1371 // Each loop iteration search for the next emphasis token. 1372 // Each token is then passed to handleSpanToken. 1373 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1374 $text_stack[0] .= $parts[0]; 1375 $token =& $parts[1]; 1376 $text =& $parts[2]; 1377 1378 if (empty($token)) { 1379 // Reached end of text span: empty stack without emitting. 1380 // any more emphasis. 1381 while ($token_stack[0]) { 1382 $text_stack[1] .= array_shift($token_stack); 1383 $text_stack[0] .= array_shift($text_stack); 1384 } 1385 break; 1386 } 1387 1388 $token_len = strlen($token); 1389 if ($tree_char_em) { 1390 // Reached closing marker while inside a three-char emphasis. 1391 if ($token_len == 3) { 1392 // Three-char closing marker, close em and strong. 1393 array_shift($token_stack); 1394 $span = array_shift($text_stack); 1395 $span = $this->runSpanGamut($span); 1396 $span = "<strong><em>$span</em></strong>"; 1397 $text_stack[0] .= $this->hashPart($span); 1398 $em = ''; 1399 $strong = ''; 1400 } else { 1401 // Other closing marker: close one em or strong and 1402 // change current token state to match the other 1403 $token_stack[0] = str_repeat($token[0], 3-$token_len); 1404 $tag = $token_len == 2 ? "strong" : "em"; 1405 $span = $text_stack[0]; 1406 $span = $this->runSpanGamut($span); 1407 $span = "<$tag>$span</$tag>"; 1408 $text_stack[0] = $this->hashPart($span); 1409 $$tag = ''; // $$tag stands for $em or $strong 1410 } 1411 $tree_char_em = false; 1412 } else if ($token_len == 3) { 1413 if ($em) { 1414 // Reached closing marker for both em and strong. 1415 // Closing strong marker: 1416 for ($i = 0; $i < 2; ++$i) { 1417 $shifted_token = array_shift($token_stack); 1418 $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 1419 $span = array_shift($text_stack); 1420 $span = $this->runSpanGamut($span); 1421 $span = "<$tag>$span</$tag>"; 1422 $text_stack[0] .= $this->hashPart($span); 1423 $$tag = ''; // $$tag stands for $em or $strong 1424 } 1425 } else { 1426 // Reached opening three-char emphasis marker. Push on token 1427 // stack; will be handled by the special condition above. 1428 $em = $token[0]; 1429 $strong = "$em$em"; 1430 array_unshift($token_stack, $token); 1431 array_unshift($text_stack, ''); 1432 $tree_char_em = true; 1433 } 1434 } else if ($token_len == 2) { 1435 if ($strong) { 1436 // Unwind any dangling emphasis marker: 1437 if (strlen($token_stack[0]) == 1) { 1438 $text_stack[1] .= array_shift($token_stack); 1439 $text_stack[0] .= array_shift($text_stack); 1440 $em = ''; 1441 } 1442 // Closing strong marker: 1443 array_shift($token_stack); 1444 $span = array_shift($text_stack); 1445 $span = $this->runSpanGamut($span); 1446 $span = "<strong>$span</strong>"; 1447 $text_stack[0] .= $this->hashPart($span); 1448 $strong = ''; 1449 } else { 1450 array_unshift($token_stack, $token); 1451 array_unshift($text_stack, ''); 1452 $strong = $token; 1453 } 1454 } else { 1455 // Here $token_len == 1 1456 if ($em) { 1457 if (strlen($token_stack[0]) == 1) { 1458 // Closing emphasis marker: 1459 array_shift($token_stack); 1460 $span = array_shift($text_stack); 1461 $span = $this->runSpanGamut($span); 1462 $span = "<em>$span</em>"; 1463 $text_stack[0] .= $this->hashPart($span); 1464 $em = ''; 1465 } else { 1466 $text_stack[0] .= $token; 1467 } 1468 } else { 1469 array_unshift($token_stack, $token); 1470 array_unshift($text_stack, ''); 1471 $em = $token; 1472 } 1473 } 1474 } 1475 $this->in_emphasis_processing = false; 1476 return $text_stack[0]; 1477 } 1478 1479 /** 1480 * Parse Markdown blockquotes to HTML 1481 * @param string $text 1482 * @return string 1483 */ 1484 protected function doBlockQuotes($text) { 1485 $text = preg_replace_callback('/ 1486 ( # Wrap whole match in $1 1487 (?> 1488 ^[ ]*>[ ]? # ">" at the start of a line 1489 .+\n # rest of the first line 1490 (.+\n)* # subsequent consecutive lines 1491 \n* # blanks 1492 )+ 1493 ) 1494 /xm', 1495 array($this, '_doBlockQuotes_callback'), $text); 1496 1497 return $text; 1498 } 1499 1500 /** 1501 * Blockquote parsing callback 1502 * @param array $matches 1503 * @return string 1504 */ 1505 protected function _doBlockQuotes_callback($matches) { 1506 $bq = $matches[1]; 1507 // trim one level of quoting - trim whitespace-only lines 1508 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1509 $bq = $this->runBlockGamut($bq); // recurse 1510 1511 $bq = preg_replace('/^/m', " ", $bq); 1512 // These leading spaces cause problem with <pre> content, 1513 // so we need to fix that: 1514 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 1515 array($this, '_doBlockQuotes_callback2'), $bq); 1516 1517 return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n"; 1518 } 1519 1520 /** 1521 * Blockquote parsing callback 1522 * @param array $matches 1523 * @return string 1524 */ 1525 protected function _doBlockQuotes_callback2($matches) { 1526 $pre = $matches[1]; 1527 $pre = preg_replace('/^ /m', '', $pre); 1528 return $pre; 1529 } 1530 1531 /** 1532 * Parse paragraphs 1533 * 1534 * @param string $text String to process in paragraphs 1535 * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags 1536 * @return string 1537 */ 1538 protected function formParagraphs($text, $wrap_in_p = true) { 1539 // Strip leading and trailing lines: 1540 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1541 1542 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1543 1544 // Wrap <p> tags and unhashify HTML blocks 1545 foreach ($grafs as $key => $value) { 1546 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1547 // Is a paragraph. 1548 $value = $this->runSpanGamut($value); 1549 if ($wrap_in_p) { 1550 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1551 $value .= "</p>"; 1552 } 1553 $grafs[$key] = $this->unhash($value); 1554 } else { 1555 // Is a block. 1556 // Modify elements of @grafs in-place... 1557 $graf = $value; 1558 $block = $this->html_hashes[$graf]; 1559 $graf = $block; 1560 // if (preg_match('{ 1561 // \A 1562 // ( # $1 = <div> tag 1563 // <div \s+ 1564 // [^>]* 1565 // \b 1566 // markdown\s*=\s* ([\'"]) # $2 = attr quote char 1567 // 1 1568 // \2 1569 // [^>]* 1570 // > 1571 // ) 1572 // ( # $3 = contents 1573 // .* 1574 // ) 1575 // (</div>) # $4 = closing tag 1576 // \z 1577 // }xs', $block, $matches)) 1578 // { 1579 // list(, $div_open, , $div_content, $div_close) = $matches; 1580 // 1581 // // We can't call Markdown(), because that resets the hash; 1582 // // that initialization code should be pulled into its own sub, though. 1583 // $div_content = $this->hashHTMLBlocks($div_content); 1584 // 1585 // // Run document gamut methods on the content. 1586 // foreach ($this->document_gamut as $method => $priority) { 1587 // $div_content = $this->$method($div_content); 1588 // } 1589 // 1590 // $div_open = preg_replace( 1591 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1592 // 1593 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1594 // } 1595 $grafs[$key] = $graf; 1596 } 1597 } 1598 1599 return implode("\n\n", $grafs); 1600 } 1601 1602 /** 1603 * Encode text for a double-quoted HTML attribute. This function 1604 * is *not* suitable for attributes enclosed in single quotes. 1605 * @param string $text 1606 * @return string 1607 */ 1608 protected function encodeAttribute($text) { 1609 $text = $this->encodeAmpsAndAngles($text); 1610 $text = str_replace('"', '"', $text); 1611 return $text; 1612 } 1613 1614 /** 1615 * Encode text for a double-quoted HTML attribute containing a URL, 1616 * applying the URL filter if set. Also generates the textual 1617 * representation for the URL (removing mailto: or tel:) storing it in $text. 1618 * This function is *not* suitable for attributes enclosed in single quotes. 1619 * 1620 * @param string $url 1621 * @param string $text Passed by reference 1622 * @return string URL 1623 */ 1624 protected function encodeURLAttribute($url, &$text = null) { 1625 if (is_callable($this->url_filter_func)) { 1626 $url = call_user_func($this->url_filter_func, $url); 1627 } 1628 1629 if (preg_match('{^mailto:}i', $url)) { 1630 $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7); 1631 } else if (preg_match('{^tel:}i', $url)) { 1632 $url = $this->encodeAttribute($url); 1633 $text = substr($url, 4); 1634 } else { 1635 $url = $this->encodeAttribute($url); 1636 $text = $url; 1637 } 1638 1639 return $url; 1640 } 1641 1642 /** 1643 * Smart processing for ampersands and angle brackets that need to 1644 * be encoded. Valid character entities are left alone unless the 1645 * no-entities mode is set. 1646 * @param string $text 1647 * @return string 1648 */ 1649 protected function encodeAmpsAndAngles($text) { 1650 if ($this->no_entities) { 1651 $text = str_replace('&', '&', $text); 1652 } else { 1653 // Ampersand-encoding based entirely on Nat Irons's Amputator 1654 // MT plugin: <http://bumppo.net/projects/amputator/> 1655 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1656 '&', $text); 1657 } 1658 // Encode remaining <'s 1659 $text = str_replace('<', '<', $text); 1660 1661 return $text; 1662 } 1663 1664 /** 1665 * Parse Markdown automatic links to anchor HTML tags 1666 * @param string $text 1667 * @return string 1668 */ 1669 protected function doAutoLinks($text) { 1670 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i', 1671 array($this, '_doAutoLinks_url_callback'), $text); 1672 1673 // Email addresses: <address@domain.foo> 1674 $text = preg_replace_callback('{ 1675 < 1676 (?:mailto:)? 1677 ( 1678 (?: 1679 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 1680 | 1681 ".*?" 1682 ) 1683 \@ 1684 (?: 1685 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1686 | 1687 \[[\d.a-fA-F:]+\] # IPv4 & IPv6 1688 ) 1689 ) 1690 > 1691 }xi', 1692 array($this, '_doAutoLinks_email_callback'), $text); 1693 1694 return $text; 1695 } 1696 1697 /** 1698 * Parse URL callback 1699 * @param array $matches 1700 * @return string 1701 */ 1702 protected function _doAutoLinks_url_callback($matches) { 1703 $url = $this->encodeURLAttribute($matches[1], $text); 1704 $link = "<a href=\"$url\">$text</a>"; 1705 return $this->hashPart($link); 1706 } 1707 1708 /** 1709 * Parse email address callback 1710 * @param array $matches 1711 * @return string 1712 */ 1713 protected function _doAutoLinks_email_callback($matches) { 1714 $addr = $matches[1]; 1715 $url = $this->encodeURLAttribute("mailto:$addr", $text); 1716 $link = "<a href=\"$url\">$text</a>"; 1717 return $this->hashPart($link); 1718 } 1719 1720 /** 1721 * Input: some text to obfuscate, e.g. "mailto:foo@example.com" 1722 * 1723 * Output: the same text but with most characters encoded as either a 1724 * decimal or hex entity, in the hopes of foiling most address 1725 * harvesting spam bots. E.g.: 1726 * 1727 * mailto:foo 1728 * @example.co 1729 * m 1730 * 1731 * Note: the additional output $tail is assigned the same value as the 1732 * ouput, minus the number of characters specified by $head_length. 1733 * 1734 * Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1735 * With some optimizations by Milian Wolff. Forced encoding of HTML 1736 * attribute special characters by Allan Odgaard. 1737 * 1738 * @param string $text 1739 * @param string $tail Passed by reference 1740 * @param integer $head_length 1741 * @return string 1742 */ 1743 protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) { 1744 if ($text == "") { 1745 return $tail = ""; 1746 } 1747 1748 $chars = preg_split('/(?<!^)(?!$)/', $text); 1749 $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed. 1750 1751 foreach ($chars as $key => $char) { 1752 $ord = ord($char); 1753 // Ignore non-ascii chars. 1754 if ($ord < 128) { 1755 $r = ($seed * (1 + $key)) % 100; // Pseudo-random function. 1756 // roughly 10% raw, 45% hex, 45% dec 1757 // '@' *must* be encoded. I insist. 1758 // '"' and '>' have to be encoded inside the attribute 1759 if ($r > 90 && strpos('@"&>', $char) === false) { 1760 /* do nothing */ 1761 } else if ($r < 45) { 1762 $chars[$key] = '&#x'.dechex($ord).';'; 1763 } else { 1764 $chars[$key] = '&#'.$ord.';'; 1765 } 1766 } 1767 } 1768 1769 $text = implode('', $chars); 1770 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text; 1771 1772 return $text; 1773 } 1774 1775 /** 1776 * Take the string $str and parse it into tokens, hashing embeded HTML, 1777 * escaped characters and handling code spans. 1778 * @param string $str 1779 * @return string 1780 */ 1781 protected function parseSpan($str) { 1782 $output = ''; 1783 1784 $span_re = '{ 1785 ( 1786 \\\\'.$this->escape_chars_re.' 1787 | 1788 (?<![`\\\\]) 1789 `+ # code span marker 1790 '.( $this->no_markup ? '' : ' 1791 | 1792 <!-- .*? --> # comment 1793 | 1794 <\?.*?\?> | <%.*?%> # processing instruction 1795 | 1796 <[!$]?[-a-zA-Z0-9:_]+ # regular tags 1797 (?> 1798 \s 1799 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1800 )? 1801 > 1802 | 1803 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag 1804 | 1805 </[-a-zA-Z0-9:_]+\s*> # closing tag 1806 ').' 1807 ) 1808 }xs'; 1809 1810 while (1) { 1811 // Each loop iteration seach for either the next tag, the next 1812 // openning code span marker, or the next escaped character. 1813 // Each token is then passed to handleSpanToken. 1814 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1815 1816 // Create token from text preceding tag. 1817 if ($parts[0] != "") { 1818 $output .= $parts[0]; 1819 } 1820 1821 // Check if we reach the end. 1822 if (isset($parts[1])) { 1823 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1824 $str = $parts[2]; 1825 } else { 1826 break; 1827 } 1828 } 1829 1830 return $output; 1831 } 1832 1833 /** 1834 * Handle $token provided by parseSpan by determining its nature and 1835 * returning the corresponding value that should replace it. 1836 * @param string $token 1837 * @param string $str Passed by reference 1838 * @return string 1839 */ 1840 protected function handleSpanToken($token, &$str) { 1841 switch ($token[0]) { 1842 case "\\": 1843 return $this->hashPart("&#". ord($token[1]). ";"); 1844 case "`": 1845 // Search for end marker in remaining text. 1846 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 1847 $str, $matches)) 1848 { 1849 $str = $matches[2]; 1850 $codespan = $this->makeCodeSpan($matches[1]); 1851 return $this->hashPart($codespan); 1852 } 1853 return $token; // Return as text since no ending marker found. 1854 default: 1855 return $this->hashPart($token); 1856 } 1857 } 1858 1859 /** 1860 * Remove one level of line-leading tabs or spaces 1861 * @param string $text 1862 * @return string 1863 */ 1864 protected function outdent($text) { 1865 return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text); 1866 } 1867 1868 1869 /** 1870 * String length function for detab. `_initDetab` will create a function to 1871 * handle UTF-8 if the default function does not exist. 1872 * @var string 1873 */ 1874 protected $utf8_strlen = 'mb_strlen'; 1875 1876 /** 1877 * Replace tabs with the appropriate amount of spaces. 1878 * 1879 * For each line we separate the line in blocks delemited by tab characters. 1880 * Then we reconstruct every line by adding the appropriate number of space 1881 * between each blocks. 1882 * 1883 * @param string $text 1884 * @return string 1885 */ 1886 protected function detab($text) { 1887 $text = preg_replace_callback('/^.*\t.*$/m', 1888 array($this, '_detab_callback'), $text); 1889 1890 return $text; 1891 } 1892 1893 /** 1894 * Replace tabs callback 1895 * @param string $matches 1896 * @return string 1897 */ 1898 protected function _detab_callback($matches) { 1899 $line = $matches[0]; 1900 $strlen = $this->utf8_strlen; // strlen function for UTF-8. 1901 1902 // Split in blocks. 1903 $blocks = explode("\t", $line); 1904 // Add each blocks to the line. 1905 $line = $blocks[0]; 1906 unset($blocks[0]); // Do not add first block twice. 1907 foreach ($blocks as $block) { 1908 // Calculate amount of space, insert spaces, insert block. 1909 $amount = $this->tab_width - 1910 $strlen($line, 'UTF-8') % $this->tab_width; 1911 $line .= str_repeat(" ", $amount) . $block; 1912 } 1913 return $line; 1914 } 1915 1916 /** 1917 * Check for the availability of the function in the `utf8_strlen` property 1918 * (initially `mb_strlen`). If the function is not available, create a 1919 * function that will loosely count the number of UTF-8 characters with a 1920 * regular expression. 1921 * @return void 1922 */ 1923 protected function _initDetab() { 1924 1925 if (function_exists($this->utf8_strlen)) { 1926 return; 1927 } 1928 1929 $this->utf8_strlen = function($text) { 1930 return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m); 1931 }; 1932 } 1933 1934 /** 1935 * Swap back in all the tags hashed by _HashHTMLBlocks. 1936 * @param string $text 1937 * @return string 1938 */ 1939 protected function unhash($text) { 1940 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1941 array($this, '_unhash_callback'), $text); 1942 } 1943 1944 /** 1945 * Unhashing callback 1946 * @param array $matches 1947 * @return string 1948 */ 1949 protected function _unhash_callback($matches) { 1950 return $this->html_hashes[$matches[0]]; 1951 } 1952 } 1953 1954 class MarkdownExtraParser extends MarkdownParser { 1955 /** 1956 * Configuration variables 1957 */ 1958 1959 /** 1960 * Prefix for footnote ids. 1961 * @var string 1962 */ 1963 public $fn_id_prefix = ""; 1964 1965 /** 1966 * Optional title attribute for footnote links. 1967 * @var string 1968 */ 1969 public $fn_link_title = ""; 1970 1971 /** 1972 * Optional class attribute for footnote links and backlinks. 1973 * @var string 1974 */ 1975 public $fn_link_class = "footnote-ref"; 1976 public $fn_backlink_class = "footnote-backref"; 1977 1978 /** 1979 * Content to be displayed within footnote backlinks. The default is '↩'; 1980 * the U+FE0E on the end is a Unicode variant selector used to prevent iOS 1981 * from displaying the arrow character as an emoji. 1982 * Optionally use '^^' and '%%' to refer to the footnote number and 1983 * reference number respectively. {@see parseFootnotePlaceholders()} 1984 * @var string 1985 */ 1986 public $fn_backlink_html = '↩︎'; 1987 1988 /** 1989 * Optional title and aria-label attributes for footnote backlinks for 1990 * added accessibility (to ensure backlink uniqueness). 1991 * Use '^^' and '%%' to refer to the footnote number and reference number 1992 * respectively. {@see parseFootnotePlaceholders()} 1993 * @var string 1994 */ 1995 public $fn_backlink_title = ""; 1996 public $fn_backlink_label = ""; 1997 1998 /** 1999 * Class name for table cell alignment (%% replaced left/center/right) 2000 * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center' 2001 * If empty, the align attribute is used instead of a class name. 2002 * @var string 2003 */ 2004 public $table_align_class_tmpl = ''; 2005 2006 /** 2007 * Optional class prefix for fenced code block. 2008 * @var string 2009 */ 2010 public $code_class_prefix = ""; 2011 2012 /** 2013 * Class attribute for code blocks goes on the `code` tag; 2014 * setting this to true will put attributes on the `pre` tag instead. 2015 * @var boolean 2016 */ 2017 public $code_attr_on_pre = false; 2018 2019 /** 2020 * Predefined abbreviations. 2021 * @var array 2022 */ 2023 public $predef_abbr = array(); 2024 2025 /** 2026 * Only convert atx-style headers if there's a space between the header and # 2027 * @var boolean 2028 */ 2029 public $hashtag_protection = false; 2030 2031 /** 2032 * Determines whether footnotes should be appended to the end of the document. 2033 * If true, footnote html can be retrieved from $this->footnotes_assembled. 2034 * @var boolean 2035 */ 2036 public $omit_footnotes = false; 2037 2038 2039 /** 2040 * After parsing, the HTML for the list of footnotes appears here. 2041 * This is available only if $omit_footnotes == true. 2042 * 2043 * Note: when placing the content of `footnotes_assembled` on the page, 2044 * consider adding the attribute `role="doc-endnotes"` to the `div` or 2045 * `section` that will enclose the list of footnotes so they are 2046 * reachable to accessibility tools the same way they would be with the 2047 * default HTML output. 2048 * @var null|string 2049 */ 2050 public $footnotes_assembled = null; 2051 2052 /** 2053 * Parser implementation 2054 */ 2055 2056 /** 2057 * Constructor function. Initialize the parser object. 2058 * @return void 2059 */ 2060 public function __construct() { 2061 // Add extra escapable characters before parent constructor 2062 // initialize the table. 2063 $this->escape_chars .= ':|'; 2064 2065 // Insert extra document, block, and span transformations. 2066 // Parent constructor will do the sorting. 2067 $this->document_gamut += array( 2068 "doFencedCodeBlocks" => 5, 2069 "stripFootnotes" => 15, 2070 "stripAbbreviations" => 25, 2071 "appendFootnotes" => 50, 2072 ); 2073 $this->block_gamut += array( 2074 "doFencedCodeBlocks" => 5, 2075 "doTables" => 15, 2076 "doDefLists" => 45, 2077 ); 2078 $this->span_gamut += array( 2079 "doFootnotes" => 5, 2080 "doAbbreviations" => 70, 2081 ); 2082 2083 $this->enhanced_ordered_list = true; 2084 parent::__construct(); 2085 } 2086 2087 2088 /** 2089 * Extra variables used during extra transformations. 2090 * @var array 2091 */ 2092 protected $footnotes = array(); 2093 protected $footnotes_ordered = array(); 2094 protected $footnotes_ref_count = array(); 2095 protected $footnotes_numbers = array(); 2096 protected $abbr_desciptions = array(); 2097 /** @var string */ 2098 protected $abbr_word_re = ''; 2099 2100 /** 2101 * Give the current footnote number. 2102 * @var integer 2103 */ 2104 protected $footnote_counter = 1; 2105 2106 /** 2107 * Ref attribute for links 2108 * @var array 2109 */ 2110 protected $ref_attr = array(); 2111 2112 /** 2113 * Setting up Extra-specific variables. 2114 */ 2115 protected function setup() { 2116 parent::setup(); 2117 2118 $this->footnotes = array(); 2119 $this->footnotes_ordered = array(); 2120 $this->footnotes_ref_count = array(); 2121 $this->footnotes_numbers = array(); 2122 $this->abbr_desciptions = array(); 2123 $this->abbr_word_re = ''; 2124 $this->footnote_counter = 1; 2125 $this->footnotes_assembled = null; 2126 2127 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 2128 if ($this->abbr_word_re) 2129 $this->abbr_word_re .= '|'; 2130 $this->abbr_word_re .= preg_quote($abbr_word); 2131 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 2132 } 2133 } 2134 2135 /** 2136 * Clearing Extra-specific variables. 2137 */ 2138 protected function teardown() { 2139 $this->footnotes = array(); 2140 $this->footnotes_ordered = array(); 2141 $this->footnotes_ref_count = array(); 2142 $this->footnotes_numbers = array(); 2143 $this->abbr_desciptions = array(); 2144 $this->abbr_word_re = ''; 2145 2146 if ( ! $this->omit_footnotes ) 2147 $this->footnotes_assembled = null; 2148 2149 parent::teardown(); 2150 } 2151 2152 2153 /** 2154 * Extra attribute parser 2155 */ 2156 2157 /** 2158 * Expression to use to catch attributes (includes the braces) 2159 * @var string 2160 */ 2161 protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}'; 2162 2163 /** 2164 * Expression to use when parsing in a context when no capture is desired 2165 * @var string 2166 */ 2167 protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}'; 2168 2169 /** 2170 * Parse attributes caught by the $this->id_class_attr_catch_re expression 2171 * and return the HTML-formatted list of attributes. 2172 * 2173 * Currently supported attributes are .class and #id. 2174 * 2175 * In addition, this method also supports supplying a default Id value, 2176 * which will be used to populate the id attribute in case it was not 2177 * overridden. 2178 * @param string $tag_name 2179 * @param string $attr 2180 * @param mixed $defaultIdValue 2181 * @param array $classes 2182 * @return string 2183 */ 2184 protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) { 2185 if (empty($attr) && !$defaultIdValue && empty($classes)) { 2186 return ""; 2187 } 2188 2189 // Split on components 2190 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches); 2191 $elements = $matches[0]; 2192 2193 // Handle classes and IDs (only first ID taken into account) 2194 $attributes = array(); 2195 $id = false; 2196 foreach ($elements as $element) { 2197 if ($element[0] === '.') { 2198 $classes[] = substr($element, 1); 2199 } else if ($element[0] === '#') { 2200 if ($id === false) $id = substr($element, 1); 2201 } else if (strpos($element, '=') > 0) { 2202 $parts = explode('=', $element, 2); 2203 $attributes[] = $parts[0] . '="' . $parts[1] . '"'; 2204 } 2205 } 2206 2207 if ($id === false || $id === '') { 2208 $id = $defaultIdValue; 2209 } 2210 2211 // Compose attributes as string 2212 $attr_str = ""; 2213 if (!empty($id)) { 2214 $attr_str .= ' id="'.$this->encodeAttribute($id) .'"'; 2215 } 2216 if (!empty($classes)) { 2217 $attr_str .= ' class="'. implode(" ", $classes) . '"'; 2218 } 2219 if (!$this->no_markup && !empty($attributes)) { 2220 $attr_str .= ' '.implode(" ", $attributes); 2221 } 2222 return $attr_str; 2223 } 2224 2225 /** 2226 * Strips link definitions from text, stores the URLs and titles in 2227 * hash references. 2228 * @param string $text 2229 * @return string 2230 */ 2231 protected function stripLinkDefinitions($text) { 2232 $less_than_tab = $this->tab_width - 1; 2233 2234 // Link defs are in the form: ^[id]: url "optional title" 2235 $text = preg_replace_callback('{ 2236 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 2237 [ ]* 2238 \n? # maybe *one* newline 2239 [ ]* 2240 (?: 2241 <(.+?)> # url = $2 2242 | 2243 (\S+?) # url = $3 2244 ) 2245 [ ]* 2246 \n? # maybe one newline 2247 [ ]* 2248 (?: 2249 (?<=\s) # lookbehind for whitespace 2250 ["(] 2251 (.*?) # title = $4 2252 [")] 2253 [ ]* 2254 )? # title is optional 2255 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr 2256 (?:\n+|\Z) 2257 }xm', 2258 array($this, '_stripLinkDefinitions_callback'), 2259 $text); 2260 return $text; 2261 } 2262 2263 /** 2264 * Strip link definition callback 2265 * @param array $matches 2266 * @return string 2267 */ 2268 protected function _stripLinkDefinitions_callback($matches) { 2269 $link_id = strtolower($matches[1]); 2270 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 2271 $this->urls[$link_id] = $url; 2272 $this->titles[$link_id] =& $matches[4]; 2273 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); 2274 return ''; // String that will replace the block 2275 } 2276 2277 2278 /** 2279 * HTML block parser 2280 */ 2281 2282 /** 2283 * Tags that are always treated as block tags 2284 * @var string 2285 */ 2286 protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure|details|summary'; 2287 2288 /** 2289 * Tags treated as block tags only if the opening tag is alone on its line 2290 * @var string 2291 */ 2292 protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; 2293 2294 /** 2295 * Tags where markdown="1" default to span mode: 2296 * @var string 2297 */ 2298 protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 2299 2300 /** 2301 * Tags which must not have their contents modified, no matter where 2302 * they appear 2303 * @var string 2304 */ 2305 protected $clean_tags_re = 'script|style|math|svg'; 2306 2307 /** 2308 * Tags that do not need to be closed. 2309 * @var string 2310 */ 2311 protected $auto_close_tags_re = 'hr|img|param|source|track'; 2312 2313 /** 2314 * Hashify HTML Blocks and "clean tags". 2315 * 2316 * We only want to do this for block-level HTML tags, such as headers, 2317 * lists, and tables. That's because we still want to wrap <p>s around 2318 * "paragraphs" that are wrapped in non-block-level tags, such as anchors, 2319 * phrase emphasis, and spans. The list of tags we're looking for is 2320 * hard-coded. 2321 * 2322 * This works by calling _HashHTMLBlocks_InMarkdown, which then calls 2323 * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 2324 * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back 2325 * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 2326 * These two functions are calling each other. It's recursive! 2327 * @param string $text 2328 * @return string 2329 */ 2330 protected function hashHTMLBlocks($text) { 2331 if ($this->no_markup) { 2332 return $text; 2333 } 2334 2335 // Call the HTML-in-Markdown hasher. 2336 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 2337 2338 return $text; 2339 } 2340 2341 /** 2342 * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 2343 * 2344 * * $indent is the number of space to be ignored when checking for code 2345 * blocks. This is important because if we don't take the indent into 2346 * account, something like this (which looks right) won't work as expected: 2347 * 2348 * <div> 2349 * <div markdown="1"> 2350 * Hello World. <-- Is this a Markdown code block or text? 2351 * </div> <-- Is this a Markdown code block or a real tag? 2352 * <div> 2353 * 2354 * If you don't like this, just don't indent the tag on which 2355 * you apply the markdown="1" attribute. 2356 * 2357 * * If $enclosing_tag_re is not empty, stops at the first unmatched closing 2358 * tag with that name. Nested tags supported. 2359 * 2360 * * If $span is true, text inside must treated as span. So any double 2361 * newline will be replaced by a single newline so that it does not create 2362 * paragraphs. 2363 * 2364 * Returns an array of that form: ( processed text , remaining text ) 2365 * 2366 * @param string $text 2367 * @param integer $indent 2368 * @param string $enclosing_tag_re 2369 * @param boolean $span 2370 * @return array 2371 */ 2372 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 2373 $enclosing_tag_re = '', $span = false) 2374 { 2375 2376 if ($text === '') return array('', ''); 2377 2378 // Regex to check for the presense of newlines around a block tag. 2379 $newline_before_re = '/(?:^\n?|\n\n)*$/'; 2380 $newline_after_re = 2381 '{ 2382 ^ # Start of text following the tag. 2383 (?>[ ]*<!--.*?-->)? # Optional comment. 2384 [ ]*\n # Must be followed by newline. 2385 }xs'; 2386 2387 // Regex to match any tag. 2388 $block_tag_re = 2389 '{ 2390 ( # $2: Capture whole tag. 2391 </? # Any opening or closing tag. 2392 (?> # Tag name. 2393 ' . $this->block_tags_re . ' | 2394 ' . $this->context_block_tags_re . ' | 2395 ' . $this->clean_tags_re . ' | 2396 (?!\s)'.$enclosing_tag_re . ' 2397 ) 2398 (?: 2399 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 2400 (?> 2401 ".*?" | # Double quotes (can contain `>`) 2402 \'.*?\' | # Single quotes (can contain `>`) 2403 .+? # Anything but quotes and `>`. 2404 )*? 2405 )? 2406 > # End of tag. 2407 | 2408 <!-- .*? --> # HTML Comment 2409 | 2410 <\?.*?\?> | <%.*?%> # Processing instruction 2411 | 2412 <!\[CDATA\[.*?\]\]> # CData Block 2413 ' . ( !$span ? ' # If not in span. 2414 | 2415 # Indented code block 2416 (?: ^[ ]*\n | ^ | \n[ ]*\n ) 2417 [ ]{' . ($indent + 4) . '}[^\n]* \n 2418 (?> 2419 (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n 2420 )* 2421 | 2422 # Fenced code block marker 2423 (?<= ^ | \n ) 2424 [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,}) 2425 [ ]* 2426 (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name 2427 [ ]* 2428 (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes 2429 [ ]* 2430 (?= \n ) 2431 ' : '' ) . ' # End (if not is span). 2432 | 2433 # Code span marker 2434 # Note, this regex needs to go after backtick fenced 2435 # code blocks but it should also be kept outside of the 2436 # "if not in span" condition adding backticks to the parser 2437 `+ 2438 ) 2439 }xs'; 2440 2441 2442 $depth = 0; // Current depth inside the tag tree. 2443 $parsed = ""; // Parsed text that will be returned. 2444 2445 // Loop through every tag until we find the closing tag of the parent 2446 // or loop until reaching the end of text if no parent tag specified. 2447 do { 2448 // Split the text using the first $tag_match pattern found. 2449 // Text before pattern will be first in the array, text after 2450 // pattern will be at the end, and between will be any catches made 2451 // by the pattern. 2452 $parts = preg_split($block_tag_re, $text, 2, 2453 PREG_SPLIT_DELIM_CAPTURE); 2454 2455 // If in Markdown span mode, add a empty-string span-level hash 2456 // after each newline to prevent triggering any block element. 2457 if ($span) { 2458 $void = $this->hashPart("", ':'); 2459 $newline = "\n$void"; 2460 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 2461 } 2462 2463 $parsed .= $parts[0]; // Text before current tag. 2464 2465 // If end of $text has been reached. Stop loop. 2466 if (count($parts) < 3) { 2467 $text = ""; 2468 break; 2469 } 2470 2471 $tag = $parts[1]; // Tag to handle. 2472 $text = $parts[2]; // Remaining text after current tag. 2473 2474 // Check for: Fenced code block marker. 2475 // Note: need to recheck the whole tag to disambiguate backtick 2476 // fences from code spans 2477 if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) { 2478 // Fenced code block marker: find matching end marker. 2479 $fence_indent = strlen($capture[1]); // use captured indent in re 2480 $fence_re = $capture[2]; // use captured fence in re 2481 if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text, 2482 $matches)) 2483 { 2484 // End marker found: pass text unchanged until marker. 2485 $parsed .= $tag . $matches[0]; 2486 $text = substr($text, strlen($matches[0])); 2487 } 2488 else { 2489 // No end marker: just skip it. 2490 $parsed .= $tag; 2491 } 2492 } 2493 // Check for: Indented code block. 2494 else if ($tag[0] === "\n" || $tag[0] === " ") { 2495 // Indented code block: pass it unchanged, will be handled 2496 // later. 2497 $parsed .= $tag; 2498 } 2499 // Check for: Code span marker 2500 // Note: need to check this after backtick fenced code blocks 2501 else if ($tag[0] === "`") { 2502 // Find corresponding end marker. 2503 $tag_re = preg_quote($tag); 2504 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}', 2505 $text, $matches)) 2506 { 2507 // End marker found: pass text unchanged until marker. 2508 $parsed .= $tag . $matches[0]; 2509 $text = substr($text, strlen($matches[0])); 2510 } 2511 else { 2512 // Unmatched marker: just skip it. 2513 $parsed .= $tag; 2514 } 2515 } 2516 // Check for: Opening Block level tag or 2517 // Opening Context Block tag (like ins and del) 2518 // used as a block tag (tag is alone on it's line). 2519 else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) || 2520 ( preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) && 2521 preg_match($newline_before_re, $parsed) && 2522 preg_match($newline_after_re, $text) ) 2523 ) 2524 { 2525 // Need to parse tag and following text using the HTML parser. 2526 list($block_text, $text) = 2527 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 2528 2529 // Make sure it stays outside of any paragraph by adding newlines. 2530 $parsed .= "\n\n$block_text\n\n"; 2531 } 2532 // Check for: Clean tag (like script, math) 2533 // HTML Comments, processing instructions. 2534 else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) || 2535 $tag[1] === '!' || $tag[1] === '?') 2536 { 2537 // Need to parse tag and following text using the HTML parser. 2538 // (don't check for markdown attribute) 2539 list($block_text, $text) = 2540 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 2541 2542 $parsed .= $block_text; 2543 } 2544 // Check for: Tag with same name as enclosing tag. 2545 else if ($enclosing_tag_re !== '' && 2546 // Same name as enclosing tag. 2547 preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag)) 2548 { 2549 // Increase/decrease nested tag count. 2550 if ($tag[1] === '/') { 2551 $depth--; 2552 } else if ($tag[strlen($tag)-2] !== '/') { 2553 $depth++; 2554 } 2555 2556 if ($depth < 0) { 2557 // Going out of parent element. Clean up and break so we 2558 // return to the calling function. 2559 $text = $tag . $text; 2560 break; 2561 } 2562 2563 $parsed .= $tag; 2564 } 2565 else { 2566 $parsed .= $tag; 2567 } 2568 } while ($depth >= 0); 2569 2570 return array($parsed, $text); 2571 } 2572 2573 /** 2574 * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 2575 * 2576 * * Calls $hash_method to convert any blocks. 2577 * * Stops when the first opening tag closes. 2578 * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 2579 * (it is not inside clean tags) 2580 * 2581 * Returns an array of that form: ( processed text , remaining text ) 2582 * @param string $text 2583 * @param string $hash_method 2584 * @param bool $md_attr Handle `markdown="1"` attribute 2585 * @return array 2586 */ 2587 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 2588 if ($text === '') return array('', ''); 2589 2590 // Regex to match `markdown` attribute inside of a tag. 2591 $markdown_attr_re = ' 2592 { 2593 \s* # Eat whitespace before the `markdown` attribute 2594 markdown 2595 \s*=\s* 2596 (?> 2597 (["\']) # $1: quote delimiter 2598 (.*?) # $2: attribute value 2599 \1 # matching delimiter 2600 | 2601 ([^\s>]*) # $3: unquoted attribute value 2602 ) 2603 () # $4: make $3 always defined (avoid warnings) 2604 }xs'; 2605 2606 // Regex to match any tag. 2607 $tag_re = '{ 2608 ( # $2: Capture whole tag. 2609 </? # Any opening or closing tag. 2610 [\w:$]+ # Tag name. 2611 (?: 2612 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 2613 (?> 2614 ".*?" | # Double quotes (can contain `>`) 2615 \'.*?\' | # Single quotes (can contain `>`) 2616 .+? # Anything but quotes and `>`. 2617 )*? 2618 )? 2619 > # End of tag. 2620 | 2621 <!-- .*? --> # HTML Comment 2622 | 2623 <\?.*?\?> | <%.*?%> # Processing instruction 2624 | 2625 <!\[CDATA\[.*?\]\]> # CData Block 2626 ) 2627 }xs'; 2628 2629 $original_text = $text; // Save original text in case of faliure. 2630 2631 $depth = 0; // Current depth inside the tag tree. 2632 $block_text = ""; // Temporary text holder for current text. 2633 $parsed = ""; // Parsed text that will be returned. 2634 $base_tag_name_re = ''; 2635 2636 // Get the name of the starting tag. 2637 // (This pattern makes $base_tag_name_re safe without quoting.) 2638 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 2639 $base_tag_name_re = $matches[1]; 2640 2641 // Loop through every tag until we find the corresponding closing tag. 2642 do { 2643 // Split the text using the first $tag_match pattern found. 2644 // Text before pattern will be first in the array, text after 2645 // pattern will be at the end, and between will be any catches made 2646 // by the pattern. 2647 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 2648 2649 if (count($parts) < 3) { 2650 // End of $text reached with unbalenced tag(s). 2651 // In that case, we return original text unchanged and pass the 2652 // first character as filtered to prevent an infinite loop in the 2653 // parent function. 2654 return array($original_text[0], substr($original_text, 1)); 2655 } 2656 2657 $block_text .= $parts[0]; // Text before current tag. 2658 $tag = $parts[1]; // Tag to handle. 2659 $text = $parts[2]; // Remaining text after current tag. 2660 2661 // Check for: Auto-close tag (like <hr/>) 2662 // Comments and Processing Instructions. 2663 if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) || 2664 $tag[1] === '!' || $tag[1] === '?') 2665 { 2666 // Just add the tag to the block as if it was text. 2667 $block_text .= $tag; 2668 } 2669 else { 2670 // Increase/decrease nested tag count. Only do so if 2671 // the tag's name match base tag's. 2672 if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) { 2673 if ($tag[1] === '/') { 2674 $depth--; 2675 } else if ($tag[strlen($tag)-2] !== '/') { 2676 $depth++; 2677 } 2678 } 2679 2680 // Check for `markdown="1"` attribute and handle it. 2681 if ($md_attr && 2682 preg_match($markdown_attr_re, $tag, $attr_m) && 2683 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 2684 { 2685 // Remove `markdown` attribute from opening tag. 2686 $tag = preg_replace($markdown_attr_re, '', $tag); 2687 2688 // Check if text inside this tag must be parsed in span mode. 2689 $mode = $attr_m[2] . $attr_m[3]; 2690 $span_mode = $mode === 'span' || ($mode !== 'block' && 2691 preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag)); 2692 2693 // Calculate indent before tag. 2694 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 2695 $strlen = $this->utf8_strlen; 2696 $indent = $strlen($matches[1], 'UTF-8'); 2697 } else { 2698 $indent = 0; 2699 } 2700 2701 // End preceding block with this tag. 2702 $block_text .= $tag; 2703 $parsed .= $this->$hash_method($block_text); 2704 2705 // Get enclosing tag name for the ParseMarkdown function. 2706 // (This pattern makes $tag_name_re safe without quoting.) 2707 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 2708 $tag_name_re = $matches[1]; 2709 2710 // Parse the content using the HTML-in-Markdown parser. 2711 list ($block_text, $text) 2712 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 2713 $tag_name_re, $span_mode); 2714 2715 // Outdent markdown text. 2716 if ($indent > 0) { 2717 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 2718 $block_text); 2719 } 2720 2721 // Append tag content to parsed text. 2722 if (!$span_mode) { 2723 $parsed .= "\n\n$block_text\n\n"; 2724 } else { 2725 $parsed .= (string) $block_text; 2726 } 2727 2728 // Start over with a new block. 2729 $block_text = ""; 2730 } 2731 else $block_text .= $tag; 2732 } 2733 2734 } while ($depth > 0); 2735 2736 // Hash last block text that wasn't processed inside the loop. 2737 $parsed .= $this->$hash_method($block_text); 2738 2739 return array($parsed, $text); 2740 } 2741 2742 /** 2743 * Called whenever a tag must be hashed when a function inserts a "clean" tag 2744 * in $text, it passes through this function and is automaticaly escaped, 2745 * blocking invalid nested overlap. 2746 * @param string $text 2747 * @return string 2748 */ 2749 protected function hashClean($text) { 2750 return $this->hashPart($text, 'C'); 2751 } 2752 2753 /** 2754 * Turn Markdown link shortcuts into XHTML <a> tags. 2755 * @param string $text 2756 * @return string 2757 */ 2758 protected function doAnchors($text) { 2759 if ($this->in_anchor) { 2760 return $text; 2761 } 2762 $this->in_anchor = true; 2763 2764 // First, handle reference-style links: [link text] [id] 2765 $text = preg_replace_callback('{ 2766 ( # wrap whole match in $1 2767 \[ 2768 (' . $this->nested_brackets_re . ') # link text = $2 2769 \] 2770 2771 [ ]? # one optional space 2772 (?:\n[ ]*)? # one optional newline followed by spaces 2773 2774 \[ 2775 (.*?) # id = $3 2776 \] 2777 ) 2778 }xs', 2779 array($this, '_doAnchors_reference_callback'), $text); 2780 2781 // Next, inline-style links: [link text](url "optional title") 2782 $text = preg_replace_callback('{ 2783 ( # wrap whole match in $1 2784 \[ 2785 (' . $this->nested_brackets_re . ') # link text = $2 2786 \] 2787 \( # literal paren 2788 [ \n]* 2789 (?: 2790 <(.+?)> # href = $3 2791 | 2792 (' . $this->nested_url_parenthesis_re . ') # href = $4 2793 ) 2794 [ \n]* 2795 ( # $5 2796 ([\'"]) # quote char = $6 2797 (.*?) # Title = $7 2798 \6 # matching quote 2799 [ \n]* # ignore any spaces/tabs between closing quote and ) 2800 )? # title is optional 2801 \) 2802 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes 2803 ) 2804 }xs', 2805 array($this, '_doAnchors_inline_callback'), $text); 2806 2807 // Last, handle reference-style shortcuts: [link text] 2808 // These must come last in case you've also got [link text][1] 2809 // or [link text](/foo) 2810 $text = preg_replace_callback('{ 2811 ( # wrap whole match in $1 2812 \[ 2813 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 2814 \] 2815 ) 2816 }xs', 2817 array($this, '_doAnchors_reference_callback'), $text); 2818 2819 $this->in_anchor = false; 2820 return $text; 2821 } 2822 2823 /** 2824 * Callback for reference anchors 2825 * @param array $matches 2826 * @return string 2827 */ 2828 protected function _doAnchors_reference_callback($matches) { 2829 $whole_match = $matches[1]; 2830 $link_text = $matches[2]; 2831 $link_id =& $matches[3]; 2832 2833 if ($link_id == "") { 2834 // for shortcut links like [this][] or [this]. 2835 $link_id = $link_text; 2836 } 2837 2838 // lower-case and turn embedded newlines into spaces 2839 $link_id = strtolower($link_id); 2840 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 2841 2842 if (isset($this->urls[$link_id])) { 2843 $url = $this->urls[$link_id]; 2844 $url = $this->encodeURLAttribute($url); 2845 2846 $result = "<a href=\"$url\""; 2847 if ( isset( $this->titles[$link_id] ) ) { 2848 $title = $this->titles[$link_id]; 2849 $title = $this->encodeAttribute($title); 2850 $result .= " title=\"$title\""; 2851 } 2852 if (isset($this->ref_attr[$link_id])) 2853 $result .= $this->ref_attr[$link_id]; 2854 2855 $link_text = $this->runSpanGamut($link_text); 2856 $result .= ">$link_text</a>"; 2857 $result = $this->hashPart($result); 2858 } 2859 else { 2860 $result = $whole_match; 2861 } 2862 return $result; 2863 } 2864 2865 /** 2866 * Callback for inline anchors 2867 * @param array $matches 2868 * @return string 2869 */ 2870 protected function _doAnchors_inline_callback($matches) { 2871 $link_text = $this->runSpanGamut($matches[2]); 2872 $url = $matches[3] === '' ? $matches[4] : $matches[3]; 2873 $title_quote =& $matches[6]; 2874 $title =& $matches[7]; 2875 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 2876 2877 // if the URL was of the form <s p a c e s> it got caught by the HTML 2878 // tag parser and hashed. Need to reverse the process before using the URL. 2879 $unhashed = $this->unhash($url); 2880 if ($unhashed !== $url) 2881 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 2882 2883 $url = $this->encodeURLAttribute($url); 2884 2885 $result = "<a href=\"$url\""; 2886 if (isset($title) && $title_quote) { 2887 $title = $this->encodeAttribute($title); 2888 $result .= " title=\"$title\""; 2889 } 2890 $result .= $attr; 2891 2892 $link_text = $this->runSpanGamut($link_text); 2893 $result .= ">$link_text</a>"; 2894 2895 return $this->hashPart($result); 2896 } 2897 2898 /** 2899 * Turn Markdown image shortcuts into <img> tags. 2900 * @param string $text 2901 * @return string 2902 */ 2903 protected function doImages($text) { 2904 // First, handle reference-style labeled images: ![alt text][id] 2905 $text = preg_replace_callback('{ 2906 ( # wrap whole match in $1 2907 !\[ 2908 (' . $this->nested_brackets_re . ') # alt text = $2 2909 \] 2910 2911 [ ]? # one optional space 2912 (?:\n[ ]*)? # one optional newline followed by spaces 2913 2914 \[ 2915 (.*?) # id = $3 2916 \] 2917 2918 ) 2919 }xs', 2920 array($this, '_doImages_reference_callback'), $text); 2921 2922 // Next, handle inline images:  2923 // Don't forget: encode * and _ 2924 $text = preg_replace_callback('{ 2925 ( # wrap whole match in $1 2926 !\[ 2927 (' . $this->nested_brackets_re . ') # alt text = $2 2928 \] 2929 \s? # One optional whitespace character 2930 \( # literal paren 2931 [ \n]* 2932 (?: 2933 <(\S*)> # src url = $3 2934 | 2935 (' . $this->nested_url_parenthesis_re . ') # src url = $4 2936 ) 2937 [ \n]* 2938 ( # $5 2939 ([\'"]) # quote char = $6 2940 (.*?) # title = $7 2941 \6 # matching quote 2942 [ \n]* 2943 )? # title is optional 2944 \) 2945 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes 2946 ) 2947 }xs', 2948 array($this, '_doImages_inline_callback'), $text); 2949 2950 return $text; 2951 } 2952 2953 /** 2954 * Callback for referenced images 2955 * @param array $matches 2956 * @return string 2957 */ 2958 protected function _doImages_reference_callback($matches) { 2959 $whole_match = $matches[1]; 2960 $alt_text = $matches[2]; 2961 $link_id = strtolower($matches[3]); 2962 2963 if ($link_id === "") { 2964 $link_id = strtolower($alt_text); // for shortcut links like ![this][]. 2965 } 2966 2967 $alt_text = $this->encodeAttribute($alt_text); 2968 if (isset($this->urls[$link_id])) { 2969 $url = $this->encodeURLAttribute($this->urls[$link_id]); 2970 $result = "<img src=\"$url\" alt=\"$alt_text\""; 2971 if (isset($this->titles[$link_id])) { 2972 $title = $this->titles[$link_id]; 2973 $title = $this->encodeAttribute($title); 2974 $result .= " title=\"$title\""; 2975 } 2976 if (isset($this->ref_attr[$link_id])) { 2977 $result .= $this->ref_attr[$link_id]; 2978 } 2979 $result .= $this->empty_element_suffix; 2980 $result = $this->hashPart($result); 2981 } 2982 else { 2983 // If there's no such link ID, leave intact: 2984 $result = $whole_match; 2985 } 2986 2987 return $result; 2988 } 2989 2990 /** 2991 * Callback for inline images 2992 * @param array $matches 2993 * @return string 2994 */ 2995 protected function _doImages_inline_callback($matches) { 2996 $alt_text = $matches[2]; 2997 $url = $matches[3] === '' ? $matches[4] : $matches[3]; 2998 $title_quote =& $matches[6]; 2999 $title =& $matches[7]; 3000 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 3001 3002 $alt_text = $this->encodeAttribute($alt_text); 3003 $url = $this->encodeURLAttribute($url); 3004 $result = "<img src=\"$url\" alt=\"$alt_text\""; 3005 if (isset($title) && $title_quote) { 3006 $title = $this->encodeAttribute($title); 3007 $result .= " title=\"$title\""; // $title already quoted 3008 } 3009 $result .= $attr; 3010 $result .= $this->empty_element_suffix; 3011 3012 return $this->hashPart($result); 3013 } 3014 3015 /** 3016 * Process markdown headers. Redefined to add ID and class attribute support. 3017 * @param string $text 3018 * @return string 3019 */ 3020 protected function doHeaders($text) { 3021 // Setext-style headers: 3022 // Header 1 {#header1} 3023 // ======== 3024 // 3025 // Header 2 {#header2 .class1 .class2} 3026 // -------- 3027 // 3028 $text = preg_replace_callback( 3029 '{ 3030 (^.+?) # $1: Header text 3031 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes 3032 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 3033 }mx', 3034 array($this, '_doHeaders_callback_setext'), $text); 3035 3036 // atx-style headers: 3037 // # Header 1 {#header1} 3038 // ## Header 2 {#header2} 3039 // ## Header 2 with closing hashes ## {#header3.class1.class2} 3040 // ... 3041 // ###### Header 6 {.class2} 3042 // 3043 $text = preg_replace_callback('{ 3044 ^(\#{1,6}) # $1 = string of #\'s 3045 [ ]'.($this->hashtag_protection ? '+' : '*').' 3046 (.+?) # $2 = Header text 3047 [ ]* 3048 \#* # optional closing #\'s (not counted) 3049 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes 3050 [ ]* 3051 \n+ 3052 }xm', 3053 array($this, '_doHeaders_callback_atx'), $text); 3054 3055 return $text; 3056 } 3057 3058 /** 3059 * Callback for setext headers 3060 * @param array $matches 3061 * @return string 3062 */ 3063 protected function _doHeaders_callback_setext($matches) { 3064 if ($matches[3] === '-' && preg_match('{^- }', $matches[1])) { 3065 return $matches[0]; 3066 } 3067 3068 $level = $matches[3][0] === '=' ? 1 : 2; 3069 3070 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null; 3071 3072 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId); 3073 $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>"; 3074 return "\n" . $this->hashBlock($block) . "\n\n"; 3075 } 3076 3077 /** 3078 * Callback for atx headers 3079 * @param array $matches 3080 * @return string 3081 */ 3082 protected function _doHeaders_callback_atx($matches) { 3083 $level = strlen($matches[1]); 3084 3085 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null; 3086 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId); 3087 $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>"; 3088 return "\n" . $this->hashBlock($block) . "\n\n"; 3089 } 3090 3091 /** 3092 * Form HTML tables. 3093 * @param string $text 3094 * @return string 3095 */ 3096 protected function doTables($text) { 3097 $less_than_tab = $this->tab_width - 1; 3098 // Find tables with leading pipe. 3099 // 3100 // | Header 1 | Header 2 3101 // | -------- | -------- 3102 // | Cell 1 | Cell 2 3103 // | Cell 3 | Cell 4 3104 $text = preg_replace_callback(' 3105 { 3106 ^ # Start of a line 3107 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 3108 [|] # Optional leading pipe (present) 3109 (.+) \n # $1: Header row (at least one pipe) 3110 3111 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 3112 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 3113 3114 ( # $3: Cells 3115 (?> 3116 [ ]* # Allowed whitespace. 3117 [|] .* \n # Row content. 3118 )* 3119 ) 3120 (?=\n|\Z) # Stop at final double newline. 3121 }xm', 3122 array($this, '_doTable_leadingPipe_callback'), $text); 3123 3124 // Find tables without leading pipe. 3125 // 3126 // Header 1 | Header 2 3127 // -------- | -------- 3128 // Cell 1 | Cell 2 3129 // Cell 3 | Cell 4 3130 $text = preg_replace_callback(' 3131 { 3132 ^ # Start of a line 3133 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 3134 (\S.*[|].*) \n # $1: Header row (at least one pipe) 3135 3136 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 3137 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 3138 3139 ( # $3: Cells 3140 (?> 3141 .* [|] .* \n # Row content 3142 )* 3143 ) 3144 (?=\n|\Z) # Stop at final double newline. 3145 }xm', 3146 array($this, '_DoTable_callback'), $text); 3147 3148 return $text; 3149 } 3150 3151 /** 3152 * Callback for removing the leading pipe for each row 3153 * @param array $matches 3154 * @return string 3155 */ 3156 protected function _doTable_leadingPipe_callback($matches) { 3157 $head = $matches[1]; 3158 $underline = $matches[2]; 3159 $content = $matches[3]; 3160 3161 $content = preg_replace('/^ *[|]/m', '', $content); 3162 3163 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 3164 } 3165 3166 /** 3167 * Make the align attribute in a table 3168 * @param string $alignname 3169 * @return string 3170 */ 3171 protected function _doTable_makeAlignAttr($alignname) { 3172 if (empty($this->table_align_class_tmpl)) { 3173 return " align=\"$alignname\""; 3174 } 3175 3176 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl); 3177 return " class=\"$classname\""; 3178 } 3179 3180 /** 3181 * Calback for processing tables 3182 * @param array $matches 3183 * @return string 3184 */ 3185 protected function _doTable_callback($matches) { 3186 $head = $matches[1]; 3187 $underline = $matches[2]; 3188 $content = $matches[3]; 3189 3190 // Remove any tailing pipes for each line. 3191 $head = preg_replace('/[|] *$/m', '', $head); 3192 $underline = preg_replace('/[|] *$/m', '', $underline); 3193 $content = preg_replace('/[|] *$/m', '', $content); 3194 3195 // Reading alignement from header underline. 3196 $separators = preg_split('/ *[|] */', $underline); 3197 foreach ($separators as $n => $s) { 3198 if (preg_match('/^ *-+: *$/', $s)) 3199 $attr[$n] = $this->_doTable_makeAlignAttr('right'); 3200 else if (preg_match('/^ *:-+: *$/', $s)) 3201 $attr[$n] = $this->_doTable_makeAlignAttr('center'); 3202 else if (preg_match('/^ *:-+ *$/', $s)) 3203 $attr[$n] = $this->_doTable_makeAlignAttr('left'); 3204 else 3205 $attr[$n] = ''; 3206 } 3207 3208 // Parsing span elements, including code spans, character escapes, 3209 // and inline HTML tags, so that pipes inside those gets ignored. 3210 $head = $this->parseSpan($head); 3211 $headers = preg_split('/ *[|] */', $head); 3212 $col_count = count($headers); 3213 $attr = array_pad($attr, $col_count, ''); 3214 3215 // Write column headers. 3216 $text = "<table>\n"; 3217 $text .= "<thead>\n"; 3218 $text .= "<tr>\n"; 3219 foreach ($headers as $n => $header) { 3220 $text .= " <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n"; 3221 } 3222 $text .= "</tr>\n"; 3223 $text .= "</thead>\n"; 3224 3225 // Split content by row. 3226 $rows = explode("\n", trim($content, "\n")); 3227 3228 $text .= "<tbody>\n"; 3229 foreach ($rows as $row) { 3230 // Parsing span elements, including code spans, character escapes, 3231 // and inline HTML tags, so that pipes inside those gets ignored. 3232 $row = $this->parseSpan($row); 3233 3234 // Split row by cell. 3235 $row_cells = preg_split('/ *[|] */', $row, $col_count); 3236 $row_cells = array_pad($row_cells, $col_count, ''); 3237 3238 $text .= "<tr>\n"; 3239 foreach ($row_cells as $n => $cell) { 3240 $text .= " <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n"; 3241 } 3242 $text .= "</tr>\n"; 3243 } 3244 $text .= "</tbody>\n"; 3245 $text .= "</table>"; 3246 3247 return $this->hashBlock($text) . "\n"; 3248 } 3249 3250 /** 3251 * Form HTML definition lists. 3252 * @param string $text 3253 * @return string 3254 */ 3255 protected function doDefLists($text) { 3256 $less_than_tab = $this->tab_width - 1; 3257 3258 // Re-usable pattern to match any entire dl list: 3259 $whole_list_re = '(?> 3260 ( # $1 = whole list 3261 ( # $2 3262 [ ]{0,' . $less_than_tab . '} 3263 ((?>.*\S.*\n)+) # $3 = defined term 3264 \n? 3265 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 3266 ) 3267 (?s:.+?) 3268 ( # $4 3269 \z 3270 | 3271 \n{2,} 3272 (?=\S) 3273 (?! # Negative lookahead for another term 3274 [ ]{0,' . $less_than_tab . '} 3275 (?: \S.*\n )+? # defined term 3276 \n? 3277 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 3278 ) 3279 (?! # Negative lookahead for another definition 3280 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 3281 ) 3282 ) 3283 ) 3284 )'; // mx 3285 3286 $text = preg_replace_callback('{ 3287 (?>\A\n?|(?<=\n\n)) 3288 ' . $whole_list_re . ' 3289 }mx', 3290 array($this, '_doDefLists_callback'), $text); 3291 3292 return $text; 3293 } 3294 3295 /** 3296 * Callback for processing definition lists 3297 * @param array $matches 3298 * @return string 3299 */ 3300 protected function _doDefLists_callback($matches) { 3301 // Re-usable patterns to match list item bullets and number markers: 3302 $list = $matches[1]; 3303 3304 // Turn double returns into triple returns, so that we can make a 3305 // paragraph for the last item in a list, if necessary: 3306 $result = trim($this->processDefListItems($list)); 3307 $result = "<dl>\n" . $result . "\n</dl>"; 3308 return $this->hashBlock($result) . "\n\n"; 3309 } 3310 3311 /** 3312 * Process the contents of a single definition list, splitting it 3313 * into individual term and definition list items. 3314 * @param string $list_str 3315 * @return string 3316 */ 3317 protected function processDefListItems($list_str) { 3318 3319 $less_than_tab = $this->tab_width - 1; 3320 3321 // Trim trailing blank lines: 3322 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 3323 3324 // Process definition terms. 3325 $list_str = preg_replace_callback('{ 3326 (?>\A\n?|\n\n+) # leading line 3327 ( # definition terms = $1 3328 [ ]{0,' . $less_than_tab . '} # leading whitespace 3329 (?!\:[ ]|[ ]) # negative lookahead for a definition 3330 # mark (colon) or more whitespace. 3331 (?> \S.* \n)+? # actual term (not whitespace). 3332 ) 3333 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 3334 # with a definition mark. 3335 }xm', 3336 array($this, '_processDefListItems_callback_dt'), $list_str); 3337 3338 // Process actual definitions. 3339 $list_str = preg_replace_callback('{ 3340 \n(\n+)? # leading line = $1 3341 ( # marker space = $2 3342 [ ]{0,' . $less_than_tab . '} # whitespace before colon 3343 \:[ ]+ # definition mark (colon) 3344 ) 3345 ((?s:.+?)) # definition text = $3 3346 (?= \n+ # stop at next definition mark, 3347 (?: # next term or end of text 3348 [ ]{0,' . $less_than_tab . '} \:[ ] | 3349 <dt> | \z 3350 ) 3351 ) 3352 }xm', 3353 array($this, '_processDefListItems_callback_dd'), $list_str); 3354 3355 return $list_str; 3356 } 3357 3358 /** 3359 * Callback for <dt> elements in definition lists 3360 * @param array $matches 3361 * @return string 3362 */ 3363 protected function _processDefListItems_callback_dt($matches) { 3364 $terms = explode("\n", trim($matches[1])); 3365 $text = ''; 3366 foreach ($terms as $term) { 3367 $term = $this->runSpanGamut(trim($term)); 3368 $text .= "\n<dt>" . $term . "</dt>"; 3369 } 3370 return $text . "\n"; 3371 } 3372 3373 /** 3374 * Callback for <dd> elements in definition lists 3375 * @param array $matches 3376 * @return string 3377 */ 3378 protected function _processDefListItems_callback_dd($matches) { 3379 $leading_line = $matches[1]; 3380 $marker_space = $matches[2]; 3381 $def = $matches[3]; 3382 3383 if ($leading_line || preg_match('/\n{2,}/', $def)) { 3384 // Replace marker with the appropriate whitespace indentation 3385 $def = str_repeat(' ', strlen($marker_space)) . $def; 3386 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 3387 $def = "\n". $def ."\n"; 3388 } 3389 else { 3390 $def = rtrim($def); 3391 $def = $this->runSpanGamut($this->outdent($def)); 3392 } 3393 3394 return "\n<dd>" . $def . "</dd>\n"; 3395 } 3396 3397 /** 3398 * Adding the fenced code block syntax to regular Markdown: 3399 * 3400 * ~~~ 3401 * Code block 3402 * ~~~ 3403 * 3404 * @param string $text 3405 * @return string 3406 */ 3407 protected function doFencedCodeBlocks($text) { 3408 3409 $text = preg_replace_callback('{ 3410 (?:\n|\A) 3411 # 1: Opening marker 3412 ( 3413 (?:~{3,}|`{3,}) # 3 or more tildes/backticks. 3414 ) 3415 [ ]* 3416 (?: 3417 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 3418 )? 3419 [ ]* 3420 (?: 3421 ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes 3422 )? 3423 [ ]* \n # Whitespace and newline following marker. 3424 3425 # 4: Content 3426 ( 3427 (?> 3428 (?!\1 [ ]* \n) # Not a closing marker. 3429 .*\n+ 3430 )+ 3431 ) 3432 3433 # Closing marker. 3434 \1 [ ]* (?= \n ) 3435 }xm', 3436 array($this, '_doFencedCodeBlocks_callback'), $text); 3437 3438 return $text; 3439 } 3440 3441 /** 3442 * Callback to process fenced code blocks 3443 * @param array $matches 3444 * @return string 3445 */ 3446 protected function _doFencedCodeBlocks_callback($matches) { 3447 $classname =& $matches[2]; 3448 $attrs =& $matches[3]; 3449 $codeblock = $matches[4]; 3450 3451 if ($this->code_block_content_func) { 3452 $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname); 3453 } else { 3454 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 3455 } 3456 3457 $codeblock = preg_replace_callback('/^\n+/', 3458 array($this, '_doFencedCodeBlocks_newlines'), $codeblock); 3459 3460 $classes = array(); 3461 if ($classname !== "") { 3462 if ($classname[0] === '.') { 3463 $classname = substr($classname, 1); 3464 } 3465 $classes[] = $this->code_class_prefix . $classname; 3466 } 3467 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes); 3468 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; 3469 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; 3470 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>"; 3471 3472 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 3473 } 3474 3475 /** 3476 * Replace new lines in fenced code blocks 3477 * @param array $matches 3478 * @return string 3479 */ 3480 protected function _doFencedCodeBlocks_newlines($matches) { 3481 return str_repeat("<br$this->empty_element_suffix", 3482 strlen($matches[0])); 3483 } 3484 3485 /** 3486 * Redefining emphasis markers so that emphasis by underscore does not 3487 * work in the middle of a word. 3488 * @var array 3489 */ 3490 protected $em_relist = array( 3491 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)', 3492 '*' => '(?<![\s*])\*(?!\*)', 3493 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])', 3494 ); 3495 protected $strong_relist = array( 3496 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)', 3497 '**' => '(?<![\s*])\*\*(?!\*)', 3498 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])', 3499 ); 3500 protected $em_strong_relist = array( 3501 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)', 3502 '***' => '(?<![\s*])\*\*\*(?!\*)', 3503 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])', 3504 ); 3505 3506 /** 3507 * Parse text into paragraphs 3508 * @param string $text String to process in paragraphs 3509 * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags 3510 * @return string HTML output 3511 */ 3512 protected function formParagraphs($text, $wrap_in_p = true) { 3513 // Strip leading and trailing lines: 3514 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 3515 3516 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 3517 3518 // Wrap <p> tags and unhashify HTML blocks 3519 foreach ($grafs as $key => $value) { 3520 $value = trim($this->runSpanGamut($value)); 3521 3522 // Check if this should be enclosed in a paragraph. 3523 // Clean tag hashes & block tag hashes are left alone. 3524 $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 3525 3526 if ($is_p) { 3527 $value = "<p>$value</p>"; 3528 } 3529 $grafs[$key] = $value; 3530 } 3531 3532 // Join grafs in one text, then unhash HTML tags. 3533 $text = implode("\n\n", $grafs); 3534 3535 // Finish by removing any tag hashes still present in $text. 3536 $text = $this->unhash($text); 3537 3538 return $text; 3539 } 3540 3541 3542 /** 3543 * Footnotes - Strips link definitions from text, stores the URLs and 3544 * titles in hash references. 3545 * @param string $text 3546 * @return string 3547 */ 3548 protected function stripFootnotes($text) { 3549 $less_than_tab = $this->tab_width - 1; 3550 3551 // Link defs are in the form: [^id]: url "optional title" 3552 $text = preg_replace_callback('{ 3553 ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?: # note_id = $1 3554 [ ]* 3555 \n? # maybe *one* newline 3556 ( # text = $2 (no blank lines allowed) 3557 (?: 3558 .+ # actual text 3559 | 3560 \n # newlines but 3561 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker. 3562 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 3563 # by non-indented content 3564 )* 3565 ) 3566 }xm', 3567 array($this, '_stripFootnotes_callback'), 3568 $text); 3569 return $text; 3570 } 3571 3572 /** 3573 * Callback for stripping footnotes 3574 * @param array $matches 3575 * @return string 3576 */ 3577 protected function _stripFootnotes_callback($matches) { 3578 $note_id = $this->fn_id_prefix . $matches[1]; 3579 $this->footnotes[$note_id] = $this->outdent($matches[2]); 3580 return ''; // String that will replace the block 3581 } 3582 3583 /** 3584 * Replace footnote references in $text [^id] with a special text-token 3585 * which will be replaced by the actual footnote marker in appendFootnotes. 3586 * @param string $text 3587 * @return string 3588 */ 3589 protected function doFootnotes($text) { 3590 if (!$this->in_anchor) { 3591 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 3592 } 3593 return $text; 3594 } 3595 3596 /** 3597 * Append footnote list to text 3598 * @param string $text 3599 * @return string 3600 */ 3601 protected function appendFootnotes($text) { 3602 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 3603 array($this, '_appendFootnotes_callback'), $text); 3604 3605 if ( ! empty( $this->footnotes_ordered ) ) { 3606 $this->_doFootnotes(); 3607 if ( ! $this->omit_footnotes ) { 3608 $text .= "\n\n"; 3609 $text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n"; 3610 $text .= "<hr" . $this->empty_element_suffix . "\n"; 3611 $text .= $this->footnotes_assembled; 3612 $text .= "</div>"; 3613 } 3614 } 3615 return $text; 3616 } 3617 3618 3619 /** 3620 * Generates the HTML for footnotes. Called by appendFootnotes, even if 3621 * footnotes are not being appended. 3622 * @return void 3623 */ 3624 protected function _doFootnotes() { 3625 $attr = array(); 3626 if ($this->fn_backlink_class !== "") { 3627 $class = $this->fn_backlink_class; 3628 $class = $this->encodeAttribute($class); 3629 $attr['class'] = " class=\"$class\""; 3630 } 3631 $attr['role'] = " role=\"doc-backlink\""; 3632 $num = 0; 3633 3634 $text = "<ol>\n\n"; 3635 while (!empty($this->footnotes_ordered)) { 3636 $footnote = reset($this->footnotes_ordered); 3637 $note_id = key($this->footnotes_ordered); 3638 unset($this->footnotes_ordered[$note_id]); 3639 $ref_count = $this->footnotes_ref_count[$note_id]; 3640 unset($this->footnotes_ref_count[$note_id]); 3641 unset($this->footnotes[$note_id]); 3642 3643 $footnote .= "\n"; // Need to append newline before parsing. 3644 $footnote = $this->runBlockGamut("$footnote\n"); 3645 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 3646 array($this, '_appendFootnotes_callback'), $footnote); 3647 3648 $num++; 3649 $note_id = $this->encodeAttribute($note_id); 3650 3651 // Prepare backlink, multiple backlinks if multiple references 3652 // Do not create empty backlinks if the html is blank 3653 $backlink = ""; 3654 if (!empty($this->fn_backlink_html)) { 3655 for ($ref_num = 1; $ref_num <= $ref_count; ++$ref_num) { 3656 if (!empty($this->fn_backlink_title)) { 3657 $attr['title'] = ' title="' . $this->encodeAttribute($this->fn_backlink_title) . '"'; 3658 } 3659 if (!empty($this->fn_backlink_label)) { 3660 $attr['label'] = ' aria-label="' . $this->encodeAttribute($this->fn_backlink_label) . '"'; 3661 } 3662 $parsed_attr = $this->parseFootnotePlaceholders( 3663 implode('', $attr), 3664 $num, 3665 $ref_num 3666 ); 3667 $backlink_text = $this->parseFootnotePlaceholders( 3668 $this->fn_backlink_html, 3669 $num, 3670 $ref_num 3671 ); 3672 $ref_count_mark = $ref_num > 1 ? $ref_num : ''; 3673 $backlink .= " <a href=\"#fnref$ref_count_mark:$note_id\"$parsed_attr>$backlink_text</a>"; 3674 } 3675 $backlink = trim($backlink); 3676 } 3677 3678 // Add backlink to last paragraph; create new paragraph if needed. 3679 if (!empty($backlink)) { 3680 if (preg_match('{</p>$}', $footnote)) { 3681 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 3682 } else { 3683 $footnote .= "\n\n<p>$backlink</p>"; 3684 } 3685 } 3686 3687 $text .= "<li id=\"fn:$note_id\" role=\"doc-endnote\">\n"; 3688 $text .= $footnote . "\n"; 3689 $text .= "</li>\n\n"; 3690 } 3691 $text .= "</ol>\n"; 3692 3693 $this->footnotes_assembled = $text; 3694 } 3695 3696 /** 3697 * Callback for appending footnotes 3698 * @param array $matches 3699 * @return string 3700 */ 3701 protected function _appendFootnotes_callback($matches) { 3702 $node_id = $this->fn_id_prefix . $matches[1]; 3703 3704 // Create footnote marker only if it has a corresponding footnote *and* 3705 // the footnote hasn't been used by another marker. 3706 if (isset($this->footnotes[$node_id])) { 3707 $num =& $this->footnotes_numbers[$node_id]; 3708 if (!isset($num)) { 3709 // Transfer footnote content to the ordered list and give it its 3710 // number 3711 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 3712 $this->footnotes_ref_count[$node_id] = 1; 3713 $num = $this->footnote_counter++; 3714 $ref_count_mark = ''; 3715 } else { 3716 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; 3717 } 3718 3719 $attr = ""; 3720 if ($this->fn_link_class !== "") { 3721 $class = $this->fn_link_class; 3722 $class = $this->encodeAttribute($class); 3723 $attr .= " class=\"$class\""; 3724 } 3725 if ($this->fn_link_title !== "") { 3726 $title = $this->fn_link_title; 3727 $title = $this->encodeAttribute($title); 3728 $attr .= " title=\"$title\""; 3729 } 3730 $attr .= " role=\"doc-noteref\""; 3731 3732 $attr = str_replace("%%", $num, $attr); 3733 $node_id = $this->encodeAttribute($node_id); 3734 3735 return 3736 "<sup id=\"fnref$ref_count_mark:$node_id\">". 3737 "<a href=\"#fn:$node_id\"$attr>$num</a>". 3738 "</sup>"; 3739 } 3740 3741 return "[^" . $matches[1] . "]"; 3742 } 3743 3744 /** 3745 * Build footnote label by evaluating any placeholders. 3746 * - ^^ footnote number 3747 * - %% footnote reference number (Nth reference to footnote number) 3748 * @param string $label 3749 * @param int $footnote_number 3750 * @param int $reference_number 3751 * @return string 3752 */ 3753 protected function parseFootnotePlaceholders($label, $footnote_number, $reference_number) { 3754 return str_replace( 3755 array('^^', '%%'), 3756 array($footnote_number, $reference_number), 3757 $label 3758 ); 3759 } 3760 3761 3762 /** 3763 * Abbreviations - strips abbreviations from text, stores titles in hash 3764 * references. 3765 * @param string $text 3766 * @return string 3767 */ 3768 protected function stripAbbreviations($text) { 3769 $less_than_tab = $this->tab_width - 1; 3770 3771 // Link defs are in the form: [id]*: url "optional title" 3772 $text = preg_replace_callback('{ 3773 ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?: # abbr_id = $1 3774 (.*) # text = $2 (no blank lines allowed) 3775 }xm', 3776 array($this, '_stripAbbreviations_callback'), 3777 $text); 3778 return $text; 3779 } 3780 3781 /** 3782 * Callback for stripping abbreviations 3783 * @param array $matches 3784 * @return string 3785 */ 3786 protected function _stripAbbreviations_callback($matches) { 3787 $abbr_word = $matches[1]; 3788 $abbr_desc = $matches[2]; 3789 if ($this->abbr_word_re) { 3790 $this->abbr_word_re .= '|'; 3791 } 3792 $this->abbr_word_re .= preg_quote($abbr_word); 3793 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 3794 return ''; // String that will replace the block 3795 } 3796 3797 /** 3798 * Find defined abbreviations in text and wrap them in <abbr> elements. 3799 * @param string $text 3800 * @return string 3801 */ 3802 protected function doAbbreviations($text) { 3803 if ($this->abbr_word_re) { 3804 // cannot use the /x modifier because abbr_word_re may 3805 // contain significant spaces: 3806 $text = preg_replace_callback('{' . 3807 '(?<![\w\x1A])' . 3808 '(?:' . $this->abbr_word_re . ')' . 3809 '(?![\w\x1A])' . 3810 '}', 3811 array($this, '_doAbbreviations_callback'), $text); 3812 } 3813 return $text; 3814 } 3815 3816 /** 3817 * Callback for processing abbreviations 3818 * @param array $matches 3819 * @return string 3820 */ 3821 protected function _doAbbreviations_callback($matches) { 3822 $abbr = $matches[0]; 3823 if (isset($this->abbr_desciptions[$abbr])) { 3824 $desc = $this->abbr_desciptions[$abbr]; 3825 if (empty($desc)) { 3826 return $this->hashPart("<abbr>$abbr</abbr>"); 3827 } 3828 $desc = $this->encodeAttribute($desc); 3829 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 3830 } 3831 return $matches[0]; 3832 } 3833 } 3834 3835 // Markdown parser, Copyright Datenstrom, License GPLv2 3836 3837 class YellowMarkdownParser extends MarkdownExtraParser { 3838 public $yellow; // access to API 3839 public $page; // access to page 3840 public $idAttributes; // id attributes 3841 3842 public function __construct($yellow, $page) { 3843 $this->yellow = $yellow; 3844 $this->page = $page; 3845 $this->idAttributes = array(); 3846 $this->url_filter_func = function($url) use ($yellow, $page) { 3847 return $yellow->lookup->normaliseLocation($url, $page->getPage("main")->location); 3848 }; 3849 $this->span_gamut += array("doStrikethrough" => 55); 3850 $this->block_gamut += array("doCollapsibleBlocks" => 65); 3851 $this->block_gamut += array("doGeneralBlocks" => 75); 3852 $this->document_gamut += array("doFootnotesLinks" => 55); 3853 $this->escape_chars .= "~?"; 3854 parent::__construct(); 3855 } 3856 3857 // Handle striketrough 3858 public function doStrikethrough($text) { 3859 $parts = preg_split("/(?<![~])(~~)(?![~])/", $text, -1, PREG_SPLIT_DELIM_CAPTURE); 3860 if (count($parts)>3) { 3861 $text = ""; 3862 $open = false; 3863 foreach ($parts as $part) { 3864 if ($part=="~~") { 3865 $text .= $open ? "</del>" : "<del>"; 3866 $open = !$open; 3867 } else { 3868 $text .= $part; 3869 } 3870 } 3871 if ($open) $text .= "</del>"; 3872 } 3873 return $text; 3874 } 3875 3876 // Handle links 3877 public function doAutoLinks($text) { 3878 $text = preg_replace_callback("/<(\w+:[^\'\">\s]+)>/", array($this, "_doAutoLinks_url_callback"), $text); 3879 $text = preg_replace_callback("/<([\w\+\-\.]+@[\w\-\.]+)>/", array($this, "_doAutoLinks_email_callback"), $text); 3880 $text = preg_replace_callback("/^\s*\[(\w+)([^\]]*)\]\s*$/", array($this, "_doAutoLinks_shortcutBlock_callback"), $text); 3881 $text = preg_replace_callback("/\[(\w+)(.*?)\]/", array($this, "_doAutoLinks_shortcutInline_callback"), $text); 3882 $text = preg_replace_callback("/\[\-\-(.*?)\-\-\]/", array($this, "_doAutoLinks_shortcutComment_callback"), $text); 3883 $text = preg_replace_callback("/\:([\w\+\-\_]+)\:/", array($this, "_doAutoLinks_shortcutSymbol_callback"), $text); 3884 $text = preg_replace_callback("/((http|https|ftp):\/\/\S+[^\'\"\,\.\;\:\*\~\s]+)/", array($this, "_doAutoLinks_url_callback"), $text); 3885 $text = preg_replace_callback("/([\w\+\-\.]+@[\w\-\.]+\.[\w]+)/", array($this, "_doAutoLinks_email_callback"), $text); 3886 return $text; 3887 } 3888 3889 // Handle shortcuts, block style 3890 public function _doAutoLinks_shortcutBlock_callback($matches) { 3891 $output = $this->page->parseContentElement($matches[1], trim($matches[2]), "", "block"); 3892 return is_null($output) ? $matches[0] : $this->hashBlock($output); 3893 } 3894 3895 // Handle shortcuts, inline style 3896 public function _doAutoLinks_shortcutInline_callback($matches) { 3897 $output = $this->page->parseContentElement($matches[1], trim($matches[2]), "", "inline"); 3898 return is_null($output) ? $matches[0] : $this->hashPart($output); 3899 } 3900 3901 // Handle shortcuts, comment style 3902 public function _doAutoLinks_shortcutComment_callback($matches) { 3903 $output = "<!--".htmlspecialchars($matches[1], ENT_NOQUOTES)."-->"; 3904 return $this->hashBlock($output); 3905 } 3906 3907 // Handle shortcuts, symbol style 3908 public function _doAutoLinks_shortcutSymbol_callback($matches) { 3909 $output = $this->page->parseContentElement("", $matches[1], "", "symbol"); 3910 return is_null($output) ? $matches[0] : $this->hashPart($output); 3911 } 3912 3913 // Handle fenced code blocks 3914 public function _doFencedCodeBlocks_callback($matches) { 3915 $name = $this->getBlockName($matches[2], $matches[3]); 3916 $text = $matches[4]; 3917 $attributes = $matches[3]; 3918 $output = $this->page->parseContentElement($name, $text, $attributes, "code"); 3919 if (is_null($output)) { 3920 $attr = $this->doExtraAttributes("pre", ".$matches[2] $matches[3]"); 3921 $output = "<pre$attr><code>".htmlspecialchars($text, ENT_NOQUOTES)."</code></pre>"; 3922 } 3923 return "\n\n".$this->hashBlock($output)."\n\n"; 3924 } 3925 3926 // Handle headers, text style 3927 public function _doHeaders_callback_setext($matches) { 3928 if ($matches[3]=="-" && preg_match('{^- }', $matches[1])) return $matches[0]; 3929 $text = $matches[1]; 3930 $level = $matches[3][0]=="=" ? 1 : 2; 3931 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2]); 3932 if (is_string_empty($attr) && $level>=2) $attr = $this->getIdAttribute($text); 3933 $output = "<h$level$attr>".$this->runSpanGamut($text)."</h$level>"; 3934 return "\n".$this->hashBlock($output)."\n\n"; 3935 } 3936 3937 // Handle headers, atx style 3938 public function _doHeaders_callback_atx($matches) { 3939 $text = $matches[2]; 3940 $level = strlen($matches[1]); 3941 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3]); 3942 if (is_string_empty($attr) && $level>=2) $attr = $this->getIdAttribute($text); 3943 $output = "<h$level$attr>".$this->runSpanGamut($text)."</h$level>"; 3944 return "\n".$this->hashBlock($output)."\n\n"; 3945 } 3946 3947 // Handle inline links 3948 public function _doAnchors_inline_callback($matches) { 3949 $url = $matches[3]=="" ? $matches[4] : $matches[3]; 3950 $text = $matches[2]; 3951 $title = isset($matches[7]) ? $matches[7] : ""; 3952 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 3953 $output = "<a href=\"".$this->encodeURLAttribute($url)."\""; 3954 if (!is_string_empty($title)) $output .= " title=\"".$this->encodeAttribute($title)."\""; 3955 $output .= $attr; 3956 $output .= ">".$this->runSpanGamut($text)."</a>"; 3957 return $this->hashPart($output); 3958 } 3959 3960 // Handle inline images 3961 public function _doImages_inline_callback($matches) { 3962 $src = $matches[3]=="" ? $matches[4] : $matches[3]; 3963 if (!preg_match("/^\w+:/", $src)) { 3964 $src = $this->yellow->system->get("coreServerBase").$this->yellow->system->get("coreImageLocation").$src; 3965 } 3966 $alt = $matches[2]; 3967 $title = isset($matches[7]) ? $matches[7] : $matches[2]; 3968 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 3969 $output = "<img src=\"".$this->encodeURLAttribute($src)."\""; 3970 if (!is_string_empty($alt)) $output .= " alt=\"".$this->encodeAttribute($alt)."\""; 3971 if (!is_string_empty($title)) $output .= " title=\"".$this->encodeAttribute($title)."\""; 3972 $output .= $attr; 3973 $output .= $this->empty_element_suffix; 3974 return $this->hashPart($output); 3975 } 3976 3977 // Handle lists, task list 3978 public function _processListItems_callback($matches) { 3979 $attr = ""; 3980 $item = $matches[4]; 3981 $leadingLine = $matches[1]; 3982 $tailingLine = $matches[5]; 3983 if ($leadingLine || $tailingLine || preg_match('/\n{2,}/', $item)) 3984 { 3985 $item = $matches[2].str_repeat(' ', strlen($matches[3])).$item; 3986 $item = $this->runBlockGamut($this->outdent($item)."\n"); 3987 } else { 3988 $item = $this->doLists($this->outdent($item)); 3989 $item = $this->formParagraphs($item, false); 3990 $token = substr($item, 0, 4); 3991 if ($token=="[ ] " || $token=="[x] ") { 3992 $attr = " class=\"task-list-item\""; 3993 $item = ($token=='[ ] ' ? "<input type=\"checkbox\" disabled=\"disabled\" /> " : 3994 "<input type=\"checkbox\" disabled=\"disabled\" checked=\"checked\" /> ").substr($item, 4); 3995 } 3996 } 3997 return "<li$attr>".$item."</li>\n"; 3998 } 3999 4000 // Handle blockquotes, CommonMark compatible 4001 public function doBlockQuotes($text) { 4002 return preg_replace_callback("/((?>^[ ]*>[ ]?.+\n(.+\n)*)+)/m", array($this, "_doBlockQuotes_callback"), $text); 4003 } 4004 4005 // Handle collapsible block elements 4006 public function doCollapsibleBlocks($text) { 4007 return preg_replace_callback("/((?>^[ ]*\?[ ]?.+\n(.+\n)*)+)/m", array($this, "_doCollapsibleBlocks_callback"), $text); 4008 } 4009 4010 // Handle collapsible block elements over multiple lines 4011 public function _doCollapsibleBlocks_callback($matches) { 4012 $name = $attributes = $attr = ""; 4013 $text = preg_replace("/^[ ]*\?[ ]?/m", "", $matches[1]); 4014 if (preg_match("/^[ ]*".$this->id_class_attr_catch_re."[ ]*\n([\S\s]*)$/m", $text, $parts)) { 4015 $name = $this->getBlockName("", $parts[1]); 4016 $text = $parts[2]; 4017 $attributes = $parts[1]; 4018 $attr = $this->doExtraAttributes("details", $parts[1]); 4019 } 4020 if (!is_string_empty($text)) { 4021 $output = $this->page->parseContentElement($name, $text, $attributes, "collapsible"); 4022 if (is_null($output)) { 4023 $summary = ""; 4024 if (preg_match("/^(.*?)\n\n(.*)$/s", $text, $parts)) { 4025 $summary = $parts[1]; 4026 $text = $parts[2]; 4027 } 4028 $output = "<details$attr>\n"; 4029 if (!is_string_empty($summary)) $output .= "<summary>".$this->runSpanGamut($summary)."</summary>\n"; 4030 $output .= $this->runBlockGamut($text)."\n</details>"; 4031 } 4032 } else { 4033 $output = "<details$attr></details>"; 4034 } 4035 return "\n".$this->hashBlock($output)."\n\n"; 4036 } 4037 4038 // Handle general block elements 4039 public function doGeneralBlocks($text) { 4040 return preg_replace_callback("/((?>^[ ]*!(?!\[)[ ]?.+\n(.+\n)*)+)/m", array($this, "_doGeneralBlocks_callback"), $text); 4041 } 4042 4043 // Handle general block elements over multiple lines 4044 public function _doGeneralBlocks_callback($matches) { 4045 $name = $attributes = $attr = ""; 4046 $text = preg_replace("/^[ ]*![ ]?/m", "", $matches[1]); 4047 if (preg_match("/^[ ]*".$this->id_class_attr_catch_re."[ ]*\n([\S\s]*)$/m", $text, $parts)) { 4048 $name = $this->getBlockName("", $parts[1]); 4049 $text = $parts[2]; 4050 $attributes = $parts[1]; 4051 $attr = $this->doExtraAttributes("div", $parts[1]); 4052 } 4053 if (!is_string_empty($text)) { 4054 $output = $this->page->parseContentElement($name, $text, $attributes, "general"); 4055 if (is_null($output)) { 4056 $output = "<div$attr>\n".$this->runBlockGamut($text)."\n</div>"; 4057 } 4058 } else { 4059 $output = "<div$attr></div>"; 4060 } 4061 return "\n".$this->hashBlock($output)."\n\n"; 4062 } 4063 4064 // Handle footnotes links, normalise ids and links 4065 public function doFootnotesLinks($text) { 4066 if (!is_null($this->footnotes_assembled)) { 4067 $callbackId = function ($matches) { 4068 $id = str_replace(":", "-", $matches[2]); 4069 return "<$matches[1] id=\"$id\" $matches[3]>"; 4070 }; 4071 $text = preg_replace_callback("/<(li|sup) id=\"(fn:\d+)\"(.*?)>/", $callbackId, $text); 4072 $text = preg_replace_callback("/<(li|sup) id=\"(fnref\d*:\d+)\"(.*?)>/", $callbackId, $text); 4073 $callbackHref = function ($matches) { 4074 $href = $this->page->base.$this->page->location.str_replace(":", "-", $matches[2]); 4075 return "<$matches[1] href=\"$href\" $matches[3]>"; 4076 }; 4077 $text = preg_replace_callback("/<(a) href=\"(#fn:\d+)\"(.*?)>/", $callbackHref, $text); 4078 $text = preg_replace_callback("/<(a) href=\"(#fnref\d*:\d+)\"(.*?)>/", $callbackHref, $text); 4079 } 4080 return $text; 4081 } 4082 4083 // Return suitable name for block element 4084 public function getBlockName($language, $attributes) { 4085 if (!is_string_empty($language)) { 4086 $name = ltrim($language, "."); 4087 } else { 4088 $name = ""; 4089 foreach (explode(" ", $attributes) as $token) { 4090 if (substru($token, 0, 1)==".") { $name = substru($token, 1); break; } 4091 } 4092 } 4093 return $name; 4094 } 4095 4096 // Return unique id attribute 4097 public function getIdAttribute($text) { 4098 $attr = ""; 4099 $text = $this->yellow->lookup->normaliseName($text, true, false, true); 4100 $text = trim(preg_replace("/-+/", "-", $text), "-"); 4101 if (!isset($this->idAttributes[$text])) { 4102 $this->idAttributes[$text] = $text; 4103 $attr = " id=\"$text\""; 4104 } else { 4105 $attr = " id=\"error-duplicate-heading\""; 4106 } 4107 return $attr; 4108 } 4109 }