source: documentation/trunk/packages/dokuwiki-2011-05-25a/lib/plugins/code/syntax.php@ 25027

Last change on this file since 25027 was 25027, checked in by jmt12, 12 years ago

Adding the packages directory, and within it a configured version of dokuwiki all ready to run

File size: 34.7 KB
Line 
1<?php
2if (! class_exists('syntax_plugin_code')) {
3 if (! defined('DOKU_PLUGIN')) {
4 if (! defined('DOKU_INC')) {
5 define('DOKU_INC',
6 realpath(dirname(__FILE__) . '/../../') . '/');
7 } // if
8 define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
9 } // if
10 // Include parent class:
11 require_once(DOKU_PLUGIN . 'syntax.php');
12 // We're dealing with "GeSHi" here, hence include it:
13 require_once(DOKU_INC . 'inc/geshi.php');
14
15/**
16 * <tt>syntax_plugin_code.php </tt>- A PHP4 class that implements the
17 * <tt>DokuWiki</tt> plugin for <tt>highlighting</tt> code fragments.
18 *
19 * <p>
20 * Usage:<br>
21 * <tt>&#60;code [language startno |[fh] text |[hs]]&#62;...&#60;/code&#62;</tt>
22 * </p><pre>
23 * Copyright (C) 2006, 2008 M.Watermann, D-10247 Berlin, FRG
24 * All rights reserved
25 * EMail : &lt;[email protected]&gt;
26 * </pre><div class="disclaimer">
27 * This program is free software; you can redistribute it and/or modify
28 * it under the terms of the GNU General Public License as published by
29 * the Free Software Foundation; either
30 * <a href="http://www.gnu.org/licenses/gpl.html">version 3</a> of the
31 * License, or (at your option) any later version.<br>
32 * This software is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 * </div>
37 * @author <a href="mailto:[email protected]">Matthias Watermann</a>
38 * @version <tt>$Id: syntax_plugin_code.php,v 1.29 2008/07/22 09:22:47 matthias Exp $</tt>
39 * @since created 24-Dec-2006
40 */
41class syntax_plugin_code extends DokuWiki_Syntax_Plugin {
42
43 /**
44 * @privatesection
45 */
46 //@{
47
48 /**
49 * Additional markup used with older DokuWiki installations.
50 *
51 * @private
52 * @see _fixJS()
53 */
54 var $_JSmarkup = FALSE;
55
56 /**
57 * Indention "text" used by <tt>_addLines()</tt>.
58 *
59 * <p>
60 * Note that we're using raw <em>UTF-8 NonBreakable Spaces</em> here.
61 * </p>
62 * @private
63 * @see _addLines()
64 */
65 var $_lead = array('', ' ', '  ', '   ', '    ',
66 '     ', '      ', '       ');
67
68
69 /**
70 * Section counter for ODT export
71 *
72 * @private
73 * @see render()
74 * @since created 08-Jun-2008
75 */
76 var $_odtSect = 0;
77
78 /**
79 * Prepare the markup to render the DIFF text.
80 *
81 * @param $aText String The DIFF text to markup.
82 * @param $aFormat String The DIFF format used ("u", "c", "n|r", "s").
83 * @param $aDoc String Reference to the current renderer's
84 * <tt>doc</tt> property.
85 * @return Boolean <tt>TRUE</tt>.
86 * @private
87 * @see render()
88 */
89 function _addDiff(&$aText, &$aFormat, &$aDoc) {
90 // Since we're inside a PRE block we need the leading LFs:
91 $ADD = "\n" . '<span class="diff_addedline">';
92 $DEL = "\n" . '<span class="diff_deletedline">';
93 $HEAD = "\n" . '<span class="diff_blockheader">';
94 $CLOSE = '</span>';
95 // Common headers for all formats;
96 // the RegEx needs at least ")#" appended!
97 $DiffHead = '#\n((?:diff\s[^\n]*)|(?:Index:\s[^\n]*)|(?:={60,})'
98 . '|(?:RCS file:\s[^\n]*)|(?:retrieving revision [0-9][^\n]*)';
99 switch ($aFormat) {
100 case 'u': // unified output
101 $aDoc .= preg_replace(
102 array($DiffHead . '|(?:@@[^\n]*))#',
103 '|\n(\+[^\n]*)|',
104 '|\n(\-[^\n]*)|'),
105 array($HEAD . '\1' . $CLOSE,
106 $ADD . '\1' . $CLOSE,
107 $DEL . '\1' . $CLOSE),
108 $aText);
109 return TRUE;
110 case 'c': // context output
111 $sections = preg_split('|(\n\*{5,})|',
112 preg_replace($DiffHead . ')#',
113 $HEAD . '\1' . $CLOSE,
114 $aText),
115 -1, PREG_SPLIT_DELIM_CAPTURE);
116 $sections[0] = preg_replace(
117 array('|\n(\-{3}[^\n]*)|',
118 '|\n(\*{3}[^\n]*)|'),
119 array($ADD . '\1' . $CLOSE,
120 $DEL . '\1' . $CLOSE),
121 $sections[0]);
122 $c = count($sections);
123 for ($i = 1; $c > $i; ++$i) {
124 $hits = array();
125 if (preg_match('|^\n(\*{5,})|',
126 $sections[$i], $hits)) {
127 unset($hits[0]);
128 $sections[$i] = $HEAD . $hits[1] . $CLOSE;
129 } else if (preg_match('|^\n(\x2A{3}\s[^\n]*)(.*)|s',
130 $sections[$i], $hits)) {
131 unset($hits[0]); // free mem
132 $parts = preg_split('|\n(\-{3}\s[^\n]*)|',
133 $hits[2], -1, PREG_SPLIT_DELIM_CAPTURE);
134 // $parts[0] == OLD code
135 $parts[0] = preg_replace('|\n([!\-][^\n]*)|',
136 $DEL . '\1' . $CLOSE, $parts[0]);
137 // $parts[1] == head of NEW code
138 $parts[1] = $ADD . $parts[1] . $CLOSE;
139 // $parts[2] == NEW code
140 $parts[2] = preg_replace(
141 array('|\n([!\x2B][^\n]*)|',
142 '|\n(\x2A{3}[^\n]*)|'),
143 array($ADD . '\1' . $CLOSE,
144 $DEL . '\1' . $CLOSE),
145 $parts[2]);
146 if (isset($parts[3])) {
147 // TRUE when handling multi-file patches
148 $parts[3] = preg_replace('|^(\x2D{3}[^\n]*)|',
149 $ADD . '\1' . $CLOSE, $parts[3]);
150 } // if
151 $sections[$i] = $DEL . $hits[1] . $CLOSE
152 . implode('', $parts);
153 } // if
154 // ELSE: leave $sections[$i] as is
155 } // for
156 $aDoc .= implode('', $sections);
157 return TRUE;
158 case 'n': // RCS output
159 // Only added lines are there so we highlight just the
160 // diff indicators while leaving the text alone.
161 $aDoc .= preg_replace(
162 array($DiffHead . ')#',
163 '|\n(d[0-9]+\s+[0-9]+)|',
164 '|\n(a[0-9]+\s+[0-9]+)|'),
165 array($HEAD . '\1' . $CLOSE,
166 $DEL . '\1' . $CLOSE,
167 $ADD . '\1' . $CLOSE),
168 $aText);
169 return TRUE;
170 case 's': // simple output
171 $aDoc .= preg_replace(
172 array($DiffHead
173 . '|((?:[0-9a-z]+(?:,[0-9a-z]+)*)(?:[^\n]*)))#',
174 '|\n(\x26#60;[^\n]*)|',
175 '|\n(\x26#62;[^\n]*)|'),
176 array($HEAD . '\1' . $CLOSE,
177 $DEL . '\1' . $CLOSE,
178 $ADD . '\1' . $CLOSE),
179 $aText);
180 return TRUE;
181 default: // unknown diff format
182 $aDoc .= $aText; // just append any unrecognized text
183 return TRUE;
184 } // switch
185 } // _addDiff()
186
187 /**
188 * Add the lines of the given <tt>$aList</tt> to the specified
189 * <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
190 *
191 * @param $aList Array [IN] the list of lines as prepared by
192 * <tt>render()</tt>, [OUT] <tt>FALSE</tt>.
193 * @param $aStart Integer The first linenumber to use.
194 * @param $aDoc String Reference to the current renderer's
195 * <tt>doc</tt> property.
196 * @private
197 * @see render()
198 */
199 function _addLines(&$aList, $aStart, &$aDoc) {
200 // Since we're dealing with monospaced fonts here the width of each
201 // character (space, NBSP, digit) is the same. Hence the length of
202 // a digits string gives us its width i.e. the number of digits.
203 $i = $aStart + count($aList); // greatest line number
204 $g = strlen("$i"); // width of greatest number
205 while (list($i, $l) = each($aList)) {
206 unset($aList[$i]); // free mem
207 $aDoc .= '<span class="lno">'
208 . $this->_lead[$g - strlen("$aStart")]
209 . "$aStart:</span>" . ((($l) && ('&nbsp;' != $l))
210 ? " $l\n"
211 : "\n");
212 ++$aStart; // increment line number
213 } // while
214 $aList = FALSE; // release memory
215 } // _addLines()
216
217 /**
218 * Internal convenience method to replace HTML special characters.
219 *
220 * @param $aString String [IN] The text to handle;
221 * [OUT] the modified text (i.e. the method's result).
222 * @return String The string with HTML special chars replaced.
223 * @private
224 * @since created 05-Feb-2007
225 */
226 function &_entities(&$aString) {
227 $aString = str_replace(array('&', '<', '>'),
228 array('&#38;', '&#60;', '&#62;'), $aString);
229 // [jmt12] Restore the hidden ids to normal HTML comments
230 $aString = preg_replace('/&#60;!-- id:(.*?) --&#62;/','<!-- id:\1 -->', $aString);
231 return $aString;
232 } // _entities()
233
234 /**
235 * Try to fix some markup error of the GeSHi SHELL highlighting.
236 *
237 * <p>
238 * The GeShi highlighting for type "sh" (i.e. "bash") is, well,
239 * seriously flawed (at least up to version 1.0.7.20 i.e. 2007-07-01).
240 * Especially handling of comments and embedded string as well as
241 * keyword is plain wrong.
242 * </p><p>
243 * This internal helper method tries to solve some minor problems by
244 * removing highlight markup embedded in comment markup.
245 * This is, however, by no means a final resolution: GeSHi obviously
246 * keeps a kind of internal state resulting in highlighting markup
247 * spawing (i.e. repeated on) several lines.
248 * Which - if that state is wrong - causes great demage not by
249 * corrupting the data but by confusing the reader with wrong markup.
250 * The easiest way to trigger such a line spawning confusion is to use
251 * solitary doublequotes or singlequotes (apostrophe) in a comment
252 * line ...
253 * </p>
254 * @param $aMarkup String [IN] The highlight markup as returned by GeSHi;
255 * [OUT] <tt>FALSE</tt>.
256 * @param $aDoc String Reference to the current renderer's
257 * <tt>doc</tt> property.
258 * @private
259 * @since created 04-Aug-2007
260 * @see render()
261 */
262 function _fixGeSHi_Bash(&$aMarkup, &$aDoc) {
263 $hits = array();
264 if (defined('GESHI_VERSION')
265 && preg_match('|(\d+)\.(\d+)\.(\d+)\.(\d+)|', GESHI_VERSION, $hits)
266 && ($hits = sprintf('%02u%02u%02u%03u',
267 $hits[1] * 1, $hits[2] * 1, $hits[3] * 1, $hits[4] * 1))
268 && ('010007020' < $hits)) {
269 // GeSHi v1.0.7.21 has the comments bug fixed
270 $aDoc .= $aMarkup;
271 $aMarkup = FALSE; // release memory
272 return;
273 } // if
274 $lines = explode("\n", $aMarkup);
275 $aMarkup = FALSE; // release memory
276 while (list($i, $l) = each($lines)) {
277 $hits = array();
278 // GeSHi "bash" module marks up comments with CSS class "re3":
279 if (preg_match('|^((.*)<span class="re3">)(.*)$|i', $l, $hits)) {
280 if ('#!/bin/' == substr($hits[3], 0, 7)) {
281 $lines[$i] = $hits[2] . strip_tags($hits[3]);
282 } else {
283 $lines[$i] = $hits[1] . strip_tags($hits[3]) . '</span>';
284 } // if
285 } else if (! preg_match('|^\s*<span|i', $l)) {
286 // If a line doesn't start with a highlighted keyword
287 // all tags are removed since they're most probably
288 // "leftovers" from the GeSHI string/comment bug.
289 $lines[$i] = strip_tags($l);
290 } // if
291 } // while
292 $aDoc .= implode("\n", $lines);
293 } // _fixGeSHi_Bash()
294
295 /**
296 * Add markup to load JavaScript file with older DokuWiki versions.
297 *
298 * @param $aRenderer Object The renderer used.
299 * @private
300 * @since created 19-Feb-2007
301 * @see render()
302 */
303 function _fixJS(&$aRenderer) {
304 //XXX This test will break if the DokuWiki file gets renamed:
305 if (@file_exists(DOKU_INC . 'lib/exe/js.php')) {
306 // Assuming a fairly recent DokuWiki installation
307 // handling the plugin files on its own there's
308 // nothing to do here ...
309 return;
310 } // if
311 if ($this->_JSmarkup) {
312 // Markup already added (or not needed)
313 return;
314 } // if
315 $localdir = realpath(dirname(__FILE__)) . '/';
316 $webdir = DOKU_BASE . 'lib/plugins/code/';
317 $css = '';
318 if (file_exists($localdir . 'style.css')) {
319 ob_start();
320 @include($localdir . 'style.css');
321 // Remove whitespace from CSS and expand IMG paths:
322 if ($css = preg_replace(
323 array('|\s*/\x2A.*?\x2A/\s*|s', '|\s*([:;\{\},+!])\s*|',
324 '|(?:url\x28\s*)([^/])|', '|^\s*|', '|\s*$|'),
325 array(' ', '\1', 'url(' . $webdir . '\1'),
326 ob_get_contents())) {
327 $css = '<style type="text/css">' . $css . '</style>';
328 } // if
329 ob_end_clean();
330 } // if
331 $js = (file_exists($localdir . 'script.js'))
332 ? '<script type="text/javascript" src="'
333 . $webdir . 'script.js"></script>'
334 : '';
335 if ($this->_JSmarkup = $css . $js) {
336 $aRenderer->doc = $this->_JSmarkup
337 . preg_replace('|\s*<p>\s*</p>\s*|', '', $aRenderer->doc);
338 //ELSE: Neither CSS nor JS files found.
339 } // if
340 // Set member field to skip tests with next call:
341 $this->_JSmarkup = TRUE;
342 } // _fixJS()
343
344 /**
345 * RegEx callback to markup spaces in ODT mode.
346 *
347 * @param $aList Array A list of RegEx matches.
348 * @private
349 * @static
350 * @since created 07-Jun-2008
351 * @see render()
352 */
353 function _preserveSpaces($aList) {
354 return ($len = strlen($aList[1]))
355 ? '<text:s text:c="' . $len . '"/>'
356 : ' ';
357 } // _preserveSpaces()
358
359 /**
360 * Add the lines of the given <tt>$aText</tt> to the specified
361 * <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
362 *
363 * @param $aText String [IN] the text lines as prepared by
364 * <tt>handle()</tt>, [OUT] <tt>FALSE</tt>.
365 * @param $aStart Integer The first linenumber to use;
366 * if <tt>0</tt> (zero) no linenumbers are used.
367 * @param $aDoc String Reference to the current renderer's
368 * <tt>doc</tt> property.
369 * @param $aClass String The CSS class name for the <tt>PRE</tt> tag.
370 * @param $addTags Boolean Used in "ODT" mode to suppress tagging
371 * the line numbers.
372 * @private
373 * @since created 03-Feb-2007
374 * @see render()
375 */
376 function _rawMarkup(&$aText, $aStart, &$aDoc, $aClass, $addTags = TRUE) {
377 if ($addTags) {
378 $aDoc .= '<pre class="' . $aClass . '">' . "\n";
379 } // if
380 if ($aStart) {
381 // Split the prepared data into a list of lines:
382 $aText = explode("\n", $aText);
383 // Add the numbered lines to the document:
384 $this->_addLines($aText, $aStart, $aDoc);
385 } else {
386 $aDoc .= $aText;
387 } // if
388 if ($addTags) {
389 $aDoc .= '</pre>';
390 } // if
391 $aText = FALSE; // release memory
392 } // _rawMarkup()
393
394 /**
395 * RegEx callback to replace SPAN tags in ODT mode.
396 *
397 * @param $aList Array A list of RegEx matches.
398 * @private
399 * @static
400 * @since created 07-Jun-2008
401 * @see render()
402 */
403 function _replaceSpan($aList) {
404 return ($aList[3])
405 ? '<text:span text:style-name="Code_5f_'
406 . str_replace('_', '_5f_', $aList[3]) . '">'
407 : '<text:span>';
408 } // _replaceSpan()
409
410 //@}
411 /**
412 * @publicsection
413 */
414 //@{
415
416 /**
417 * Tell the parser whether the plugin accepts syntax mode
418 * <tt>$aMode</tt> within its own markup.
419 *
420 * @param $aMode String The requested syntaxmode.
421 * @return Boolean <tt>FALSE</tt> (no nested markup allowed).
422 * @public
423 * @see getAllowedTypes()
424 */
425 function accepts($aMode) {
426 return FALSE;
427 } // accepts()
428
429 /**
430 * Connect lookup pattern to lexer.
431 *
432 * @param $aMode String The desired rendermode.
433 * @public
434 * @see render()
435 */
436 function connectTo($aMode) {
437 // look-ahead to minimize the chance of false matches:
438 $this->Lexer->addEntryPattern(
439 '\x3Ccode(?=[^>]*\x3E\r?\n.*\n\x3C\x2Fcode\x3E)',
440 $aMode, 'plugin_code');
441 } // connectTo()
442
443 /**
444 * Get an array of mode types that may be nested within the
445 * plugin's own markup.
446 *
447 * @return Array Allowed nested types (none).
448 * @public
449 * @see accepts()
450 * @static
451 */
452 function getAllowedTypes() {
453 return array();
454 } // getAllowedTypes()
455
456 /**
457 * Get an associative array with plugin info.
458 *
459 * <p>
460 * The returned array holds the following fields:
461 * <dl>
462 * <dt>author</dt><dd>Author of the plugin</dd>
463 * <dt>email</dt><dd>Email address to contact the author</dd>
464 * <dt>date</dt><dd>Last modified date of the plugin in
465 * <tt>YYYY-MM-DD</tt> format</dd>
466 * <dt>name</dt><dd>Name of the plugin</dd>
467 * <dt>desc</dt><dd>Short description of the plugin (Text only)</dd>
468 * <dt>url</dt><dd>Website with more information on the plugin
469 * (eg. syntax description)</dd>
470 * </dl>
471 * @return Array Information about this plugin class.
472 * @public
473 * @static
474 */
475 function getInfo() {
476 $c = 'code'; // hack to hide "desc" field from GeShi
477 return array(
478 'author' => 'Matthias Watermann',
479 'email' => '[email protected]',
480 'date' => '2008-07-22',
481 'name' => 'Code Syntax Plugin',
482 'desc' => 'Syntax highlighting with line numbering <'
483 . $c . ' lang 1 |[fh] text |[hs]> ... </' . $c . '>',
484 'url' => 'http://wiki.splitbrain.org/plugin:code2');
485 } // getInfo()
486
487 /**
488 * Define how this plugin is handled regarding paragraphs.
489 *
490 * <p>
491 * This method is important for correct XHTML nesting.
492 * It returns one of the following values:
493 * </p><dl>
494 * <dt>normal</dt><dd>The plugin can be used inside paragraphs.</dd>
495 * <dt>block</dt><dd>Open paragraphs need to be closed before
496 * plugin output.</dd>
497 * <dt>stack</dt><dd>Special case: Plugin wraps other paragraphs.</dd>
498 * </dl>
499 * @return String <tt>"block"</tt>.
500 * @public
501 * @static
502 */
503 function getPType() {
504 return 'block';
505 } // getPType()
506
507 /**
508 * Where to sort in?
509 *
510 * @return Integer <tt>194</tt> (below "Doku_Parser_Mode_code").
511 * @public
512 * @static
513 */
514 function getSort() {
515 // class "Doku_Parser_Mode_code" returns 200
516 return 194;
517 } // getSort()
518
519 /**
520 * Get the type of syntax this plugin defines.
521 *
522 * @return String <tt>"protected"</tt>.
523 * @public
524 * @static
525 */
526 function getType() {
527 return 'protected';
528 } // getType()
529
530 /**
531 * Handler to prepare matched data for the rendering process.
532 *
533 * <p>
534 * The <tt>$aState</tt> parameter gives the type of pattern
535 * which triggered the call to this method:
536 * </p><dl>
537 * <dt>DOKU_LEXER_UNMATCHED</dt>
538 * <dd>ordinary text encountered within the plugin's syntax mode
539 * which doesn't match any pattern.</dd>
540 * </dl>
541 * @param $aMatch String The text matched by the patterns.
542 * @param $aState Integer The lexer state for the match.
543 * @param $aPos Integer The character position of the matched text.
544 * @param $aHandler Object Reference to the Doku_Handler object.
545 * @return Array Index <tt>[0]</tt> holds the current <tt>$aState</tt>,
546 * index <tt>[1]</tt> the embedded text to highlight,
547 * index <tt>[2]</tt> the language/dialect (or <tt>FALSE</tt>),
548 * index <tt>[3]</tt> the first line number (or <tt>0</tt>),
549 * index <tt>[4]</tt> the top title (or <tt>FALSE</tt>),
550 * index <tt>[5]</tt> the bottom title (or <tt>FALSE</tt>),
551 * index <tt>[6]</tt> hidding CSS flag (or <tt>""</tt>).
552 * @public
553 * @see render()
554 * @static
555 */
556 function handle($aMatch, $aState, $aPos, &$aHandler) {
557 if (DOKU_LEXER_UNMATCHED != $aState) {
558 return array($aState); // nothing to do for "render()"
559 } // if
560 $aMatch = explode('>', $aMatch, 2);
561 // $aMatch[0] : lang etc.
562 // $aMatch[1] : text to highlight
563 $n = explode('>', trim($aMatch[1]));
564 $l = 'extern'; // external resource requested?
565 // Check whether there's an external file to fetch:
566 if ($l == $n[0]) {
567 if ($n[1] = trim($n[1])) {
568 if (is_array($n[0] = @parse_url($n[1]))
569 && ($n[0] = $n[0]['scheme'])) {
570 // Don't accept unsecure schemes like
571 // "file", "javascript", "mailto" etc.
572 switch ($n[0]) {
573 case 'ftp':
574 case 'http':
575 case 'https':
576 //XXX This might fail due to global PHP setup:
577 if ($handle = @fopen($n[1], 'rb')) {
578 $aMatch[1] = '';
579 while (! @feof($handle)) {
580 //XXX This might fail due to
581 // memory constraints:
582 $aMatch[1] .= @fread($handle, 0x8000);
583 } // while
584 @fclose($handle);
585 } else {
586 $aMatch = array($l,
587 'Failed to retrieve: ' . $n[1]);
588 } // if
589 break;
590 default:
591 $aMatch = array($l,
592 'Unsupported URL scheme: ' . $n[0]);
593 break;
594 } // switch
595 } else {
596 $aMatch = array($l, 'Invalid URL: ' . $n[1]);
597 } // if
598 } else {
599 $aMatch = array($l, 'Missing URL: ' . $aMatch[1]);
600 } // if
601 } // if
602 // Strip leading/trailing/EoL whitespace,
603 // replace TABs by four spaces, "&#160;" by NBSP:
604 $aMatch[1] = preg_replace(
605 array('#(?>\r\n)|\r#', '|^\n\n*|',
606 '|[\t ]+\n|', '|\s*\n$|'),
607 array("\n", '', "\n", ''),
608 str_replace('&#160;', '&nbsp;',
609 str_replace("\t", ' ', $aMatch[1])));
610
611 $css = ''; // default: no initial CSS content hidding
612 $l = FALSE; // default: no language
613 $n = 0; // default: no line numbers
614 $ht = $ft = FALSE; // default: no (head/foot) title
615 $hits = array(); // RegEx matches from the tag attributes
616 /*
617 The free form of the RegEx to parse the arguments here is:
618 /^
619 # "eat" leading whitespace:
620 \s*
621 (?=\S) # Look ahead: do not match empty lines. This is
622 # needed since all other expressions are optional.
623 # Make sure, nothing is given away once it matched:
624 (?>
625 # We need a separate branch for "diff" because it may be
626 # followed by a _letter_ (not digit) indicating the format.
627 (?>
628 (diff)
629 # match 1
630 (?>\s+([cnrsu]?))?
631 # match 2
632 )
633 |
634 # Branch for standard language highlighting
635 (?>
636 # extract language:
637 ([a-z][^\x7C\s]*)
638 # match 3
639 (?>
640 # extract starting line number:
641 \s+(\d\d*)
642 # match 4
643 )?
644 )
645 |
646 # Branch for line numbering only
647 (\d\d*)
648 # match 5
649 |
650 \s* # dummy needed to match "title only" markup (below)
651 )
652 # "eat" anything else up to the text delimiter:
653 [^\x7C]*
654 (?>
655 \x7C
656 # extract the position flag:
657 ([bfht])?\s*
658 # match 6
659 # extract the header,footer line:
660 ([^\x7C]+)
661 # match 7
662 (?>
663 # see whether there is a class flag:
664 \x7C\s*
665 (h|s)?.*
666 # match 8
667 )?
668 )?
669 # Anchored to make sure everything gets matched:
670 $/xiu
671
672 Since compiling and applying a free form RegEx slows down the
673 overall matching process I've folded it all to a standard RegEx.
674 Benchmarking during development gave me
675 free form: 20480 loops, 552960 hits, 102400 fails, 12.994689 secs
676 standard: 20480 loops, 552960 hits, 102400 fails, 8.357169 secs
677 */
678 if (preg_match('/^\s*(?=\S)(?>(?>(diff)(?>\s+([cnrsu]?))?)|'
679 . '(?>([a-z][^\x7C\s]*)(?>\s+(\d\d*))?)|(\d\d*)|\s*)[^\x7C]*'
680 . '(?>\x7C([bfht])?\s*([^\x7C]+)(?>\x7C\s*(h|s)?.*)?)?$/iu',
681 $aMatch[0], $hits)) {
682 unset($hits[0]); // free mem
683 // $hits[1] = "diff"
684 // $hits[2] = type (of [1])
685 // $hits[3] = LANG
686 // $hits[4] = NUM (of [3])
687 // $hits[5] = NUM (alone)
688 // $hits[6] = Top/Bottom flag (of [7])
689 // $hits[7] = TITLE
690 // $hits[8] = s/h CSS flag
691 if (isset($hits[3]) && ($hits[3])) {
692 $l = strtolower($hits[3]);
693 if (isset($hits[4]) && ($hits[4])) {
694 $n = (int)$hits[4];
695 } // if
696 $hits[3] = $hits[4] = FALSE;
697 } else if (isset($hits[1]) && ($hits[1])) {
698 $l = strtolower($hits[1]);
699 $hits[2] = (isset($hits[2]))
700 ? strtolower($hits[2]) . '?'
701 : '?';
702 $n = $hits[2]{0};
703 $hits[1] = $hits[2] = FALSE;
704 } else if (isset($hits[5]) && ($hits[5])) {
705 $n = (int)$hits[5];
706 } // if
707 if (isset($hits[7]) && ($hits[7])) {
708 $hits[6] = (isset($hits[6]))
709 ? strtolower($hits[6]) . 'f'
710 : 'f';
711 switch ($hits[6]{0}) {
712 case 'h':
713 case 't':
714 $ht = trim($hits[7]);
715 break;
716 default:
717 $ft = trim($hits[7]);
718 break;
719 } // switch
720 if (isset($hits[8])) {
721 $hits[8] = strtolower($hits[8]) . 's';
722 if ('h' == $hits[8]{0}) {
723 // This class is handled by JavaScript (there
724 // _must_not_ be any CSS rules for this):
725 $css = ' HideOnInit';
726 } // if
727 } // if
728 $hits[6] = $hits[7] = $hits[8] = FALSE;
729 } // if
730 // ELSE: no arguments given to CODE tag
731 } // if
732 switch ($l) {
733 case 'console':
734 // nothing additional to setup here
735 break;
736 case 'diff':
737 if ("\n" != $aMatch[1]{0}) {
738 // A leading LF is needed to recognize and handle
739 // the very first line with all the REs used.
740 $aMatch[1] = "\n" . $aMatch[1];
741 } // if
742 switch ($n) {
743 case 'u': // DIFF cmdline switch for "unified"
744 case 'c': // DIFF cmdline switch for "context"
745 case 'n': // DIFF cmdline switch for "RCS"
746 case 's':
747 // We believe the format hint ...
748 // (or should we be more suspicious?)
749 break;
750 case 'r': // Mnemonic for "RCS"
751 $n = 'n';
752 break;
753 default: // try to figure out the format actually used
754 if (preg_match(
755 '|\n(?:\x2A{5,}\n\x2A{3}\s[1-9]+.*?\x2A{4}\n.+?)+|s',
756 $aMatch[1])) {
757 $n = 'c';
758 } else if (preg_match(
759 '|\n@@\s\-[0-9]+,[0-9]+[ \+,0-9]+?@@\n.+\n|s',
760 $aMatch[1])) {
761 $n = 'u';
762 } else if (preg_match(
763 '|\n[ad][0-9]+\s+[0-9]+\r?\n|', $aMatch[1])) {
764 // We've to check this _before_ "simple" since
765 // the REs are quite similar (but this one is
766 // slightly more specific).
767 $n = 'n';
768 } else if (preg_match(
769 '|\n(?:[0-9a-z]+(?:,[0-9a-z]+)*)(?:[^\n]*\n.*?)+|',
770 $aMatch[1])) {
771 $n = 's';
772 } else {
773 $n = '?';
774 } // if
775 break;
776 } // switch
777 break;
778 case 'htm': // convenience shortcut
779 case 'html': // dito
780 $l = 'html4strict';
781 break;
782 case 'js': // shortcut
783 $l = 'javascript';
784 break;
785 case 'sh': // shortcut
786 $l = 'bash';
787 break;
788 default:
789 if (! $l) {
790 // no language: simple PRE markup will get generated
791 $l = FALSE;
792 } // if
793 break;
794 } // switch
795 return array(DOKU_LEXER_UNMATCHED,
796 $aMatch[1], $l, $n, $ht, $ft, $css);
797 } // handle()
798
799 /**
800 * Add exit pattern to lexer.
801 *
802 * @public
803 */
804 function postConnect() {
805 // look-before to minimize the chance of false matches:
806 $this->Lexer->addExitPattern('(?<=\n)\x3C\x2Fcode\x3E',
807 'plugin_code');
808 } // postConnect()
809
810 /**
811 * Handle the actual output (markup) creation.
812 *
813 * <p>
814 * The method checks the given <tt>$aFormat</tt> to decide how to
815 * handle the specified <tt>$aData</tt>.
816 * The standard case (i.e. <tt>"xhtml"</tt>) is handled completely
817 * by this implementation, preparing linenumbers and/or head/foot
818 * lines are requested.
819 * For the <tt>"odt"</tt> format all plugin features (incl. linenumbers
820 * and header/footer lines) are supported by generating the appropriate
821 * ODT/XML markup.
822 * All other formats are passed back to the given <tt>$aRenderer</tt>
823 * instance for further handling.
824 * </p><p>
825 * <tt>$aRenderer</tt> contains a reference to the renderer object
826 * which is currently in charge of the rendering.
827 * The contents of the given <tt>$aData</tt> is the return value
828 * of the <tt>handle()</tt> method.
829 * </p>
830 * @param $aFormat String The output format to generate.
831 * @param $aRenderer Object A reference to the renderer object.
832 * @param $aData Array The data created/returned by the
833 * <tt>handle()</tt> method.
834 * @return Boolean <tt>TRUE</tt>.
835 * @public
836 * @see handle()
837 */
838 function render($aFormat, &$aRenderer, &$aData) {
839 if (DOKU_LEXER_UNMATCHED != $aData[0]) {
840 return TRUE;
841 } // if
842 if ('xhtml' == $aFormat) {
843 if ($tdiv = (($aData[4]) || ($aData[5]))) {
844 $this->_fixJS($aRenderer); // check for old DokuWiki versions
845 $aRenderer->doc .= '<div class="code">';
846 if ($aData[4]) {
847 //XXX Note that "_headerToLink()" is supposed to be a
848 // _private_ method of the renderer class; so this code
849 // will fail once DokuWiki is rewritten in PHP5 which
850 // implements encapsulation of private methods and
851 // properties:
852 $aRenderer->doc .= '<p class="codehead' . $aData[6]
853 . '"><a name="' . $aRenderer->_headerToLink($aData[4])
854 . '">' . $this->_entities($aData[4]) . '</a></p>';
855 $aData[4] = $aData[6] = FALSE; // free mem
856 } // if
857 } // if
858 if ($aData[2]) { // lang was given
859 if ('console' == $aData[2]) {
860 $this->_rawMarkup($this->_entities($aData[1]),
861 $aData[3], $aRenderer->doc, $aData[2]);
862 } else if ('diff' == $aData[2]) {
863 $this->_entities($aData[1]);
864 $aRenderer->doc .= '<pre class="code diff">';
865 $this->_addDiff($aData[1], $aData[3], $aRenderer->doc);
866 $aRenderer->doc .= '</pre>';
867 } else {
868 $isSH = ('bash' == $aData[2]);
869 $geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
870 if ($geshi->error()) {
871 // Language not supported by "GeSHi"
872 $geshi = NULL; // release memory
873 $this->_rawMarkup($this->_entities($aData[1]),
874 $aData[3], $aRenderer->doc, 'code');
875 } else {
876 $aData[1] = FALSE; // free mem
877 $geshi->enable_classes();
878 $geshi->set_encoding('utf-8');
879 $geshi->set_header_type(GESHI_HEADER_PRE);
880 $geshi->set_overall_class('code ' . $aData[2]);
881 global $conf;
882 if ($conf['target']['extern']) {
883 $geshi->set_link_target($conf['target']['extern']);
884 } // if
885 if ($aData[3]) { // line numbers requested
886 // Separate PRE tag from parsed data:
887 $aData[1] = explode('>', $geshi->parse_code(), 2);
888 // [1][0] = leading "<pre"
889 // [1][1] = remaining markup up to trailing "</pre"
890 $geshi = NULL; // release memory
891
892 // Add the open tag to the document:
893 $aRenderer->doc .= $aData[1][0] . '>';
894
895 // Separate trailing PRE tag:
896 $aData[1] = explode('</pre>', $aData[1][1], 2);
897 // [1][0] = GeSHi markup
898 // [1][1] = trailing "</pre"
899
900 if ($isSH) {
901 $aData[1][1] = '';
902 $this->_fixGeSHi_Bash($aData[1][0],
903 $aData[1][1]);
904 } else {
905 // Set reference to fixed markup to sync with
906 // the "bash" execution path (above):
907 $aData[1][1] =& $aData[1][0];
908 } // if
909
910 // Split the parsed data into a list of lines:
911 $aData[2] = explode("\n", $aData[1][1]);
912 $aData[1] = FALSE; // free mem
913
914 // Add the numbered lines to the document:
915 $this->_addLines($aData[2], $aData[3],
916 $aRenderer->doc);
917
918 // Close the preformatted section markup:
919 $aRenderer->doc .= '</pre>';
920 } else { // w/o line numbering
921 if ($isSH) {
922 // Separate trailing PRE tag which
923 // sometimes is "forgotten" by GeSHi:
924 $aData[2] = explode('</pre>',
925 $geshi->parse_code(), 2);
926 // [1][0] = GeSHi markup
927 // [1][1] = trailing "</pre" (if any)
928 $this->_fixGeSHi_Bash($aData[2][0],
929 $aRenderer->doc);
930 $aRenderer->doc .= '</pre>';
931 } else {
932 $aRenderer->doc .= $geshi->parse_code();
933 } // if
934 $geshi = NULL; // release memory
935 } // if
936 } // if
937 } // if
938 } else {
939 $this->_rawMarkup($this->_entities($aData[1]),
940 $aData[3], $aRenderer->doc, 'code');
941 } // if
942 if ($tdiv) {
943 if ($aData[5]) {
944 //XXX See "_headerToLink()" note above.
945 $aRenderer->doc .= '<p class="codefoot'
946 . $aData[6] . '"><a name="'
947 . $aRenderer->_headerToLink($aData[5]) . '">'
948 . $this->_entities($aData[5]) . '</a></p>';
949 } // if
950 $aRenderer->doc .= '</div>';
951 } // if
952 } else if ('odt' == $aFormat) {
953 $inLI = array();
954 if (preg_match('|^<text:p text:style-name="[^"]+">\s*</text:p>\s*(.*)$|si',
955 $aRenderer->doc, $inLI)) {
956 // remove leading whitespace
957 $aRenderer->doc = $inLI[1];
958 } // if
959 // The "renderer_plugin_odt" doesn't clean (close)
960 // its own tags before calling this plugin.
961 // To work around that bug we have to check some
962 // private properties of the renderer instance.
963 $inLI = FALSE;
964 if (is_a($aRenderer, 'renderer_plugin_odt')) {
965 if ($inLI = ($aRenderer->in_list_item)) {
966 // If we're in a list item, we've to close the paragraph:
967 $aRenderer->doc .= '</text:p>';
968 } // if
969 if ($aRenderer->in_paragraph) {
970 $aRenderer->doc .= '</text:p>';
971 $aRenderer->in_paragraph = FALSE;
972 } // if
973 } // if
974
975 // Init (open) our text section:
976 $aRenderer->doc .= "\n"
977 . '<text:section text:style-name="Code_5f_Section" text:name="CodeSnippet'
978 . ++$this->_odtSect . '">';
979
980 if ($tdiv = (($aData[4]) || ($aData[5]))) {
981 // Check whether we need a top caption ("header"):
982 if ($aData[4]) {
983 $aRenderer->doc .=
984 '<text:p text:style-name="Code_5f_Title">'
985 . "<text:line-break/>\n"
986 . $aData[4] . "</text:p>\n";
987 $aData[4] = $aData[6] = FALSE; // free mem
988 } // if
989 } // if
990 // The following code resembles the "xhtml" processing
991 // above except that we're not using "pre" tags here
992 // but ODT/XML markup.
993 $aData[0] = ''; // tmp. container of processed data
994 if ($aData[2]) { // lang was given
995 if ('console' == $aData[2]) {
996 $this->_rawMarkup($this->_entities($aData[1]),
997 $aData[3], $aData[0], $aData[2], FALSE);
998 } else if ('diff' == $aData[2]) {
999 $this->_addDiff($this->_entities($aData[1]),
1000 $aData[3], $aData[0]);
1001 } else {
1002 $isSH = ('bash' == $aData[2]);
1003 $geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
1004 if ($geshi->error()) {
1005 // Language not supported by "GeSHi"
1006 $geshi = NULL; // release memory
1007 $this->_rawMarkup($this->_entities($aData[1]),
1008 $aData[3], $aData[0], '', FALSE);
1009 } else {
1010 $aData[1] = FALSE; // free mem
1011 $geshi->enable_classes();
1012 $geshi->set_encoding('utf-8');
1013 $geshi->set_header_type(GESHI_HEADER_PRE);
1014 $geshi->set_overall_class('code ' . $aData[2]);
1015 global $conf;
1016 if ($conf['target']['extern']) {
1017 $geshi->set_link_target($conf['target']['extern']);
1018 } // if
1019 // Separate PRE tag from parsed data:
1020 $aData[1] = explode('>', $geshi->parse_code(), 2);
1021 // [1][0] = leading "<pre"
1022 // [1][1] = remaining markup up to trailing "</pre"
1023 $geshi = NULL; // release memory
1024
1025 // Separate trailing PRE tag:
1026 $aData[1] = explode('</pre>', $aData[1][1], 2);
1027 // [1][0] = GeSHi markup
1028 // [1][1] = trailing "</pre"
1029 $aData[1] = $aData[1][0];
1030
1031 if ($isSH) { // work around GeSHI bug
1032 $aData[2] = '';
1033 $this->_fixGeSHi_Bash($aData[1], $aData[2]);
1034 } else {
1035 $aData[2] = $aData[1];
1036 } // if
1037 $aData[1] = FALSE; // release memory
1038
1039 if ($aData[3]) { // line numbers requested
1040 // Split the parsed data into a list of lines:
1041 $aData[1] = explode("\n", $aData[2]);
1042 $aData[2] = FALSE; // release memory
1043
1044 // Add the numbered lines to the document:
1045 $this->_addLines($aData[1], $aData[3], $aData[0]);
1046 } else { // w/o line numbers
1047 $aData[0] = $aData[2];
1048 $aData[2] = FALSE; // release memory
1049 } // if
1050 } // if
1051 } // if
1052 } else {
1053 $this->_rawMarkup($this->_entities($aData[1]),
1054 $aData[3], $aData[0], '', FALSE);
1055 } // if
1056
1057 if ('console' == $aData[2]) {
1058 $aRenderer->doc .=
1059 '<text:p text:style-name="Code_5f_Console">';
1060 } else {
1061 $aRenderer->doc .=
1062 '<text:p text:style-name="Code_5f_Standard">';
1063 } // if
1064 // Replace the HTML "span" tags (for highlighting) by
1065 // the appropriate ODT/XML markup.
1066 // For unknown reasons we need an additional space
1067 // in front of the very first line.
1068 $aData[0] = '<text:s/>'
1069 . preg_replace_callback('|(<span( class="([^"]*)"[^>]*)?>)|',
1070 array('syntax_plugin_code', '_replaceSpan'),
1071 // OOo (v2.3) crashes on "&nbsp;"
1072 str_replace('&nbsp;', chr(194) . chr(160),
1073 str_replace('</span>', '</text:span>',
1074 strip_tags($aData[0], '<span>'))));
1075 // Now append our markup to the renderer's document;
1076 // TABs, LFs and SPACEs are replaced by their respective
1077 // ODT/XML equivalents:
1078 $aRenderer->doc .= preg_replace_callback('|( {2,})|',
1079 array('syntax_plugin_code', '_preserveSpaces'),
1080 str_replace("\n", "<text:line-break/>\n", $aData[0]));
1081 $aData[0] = FALSE; // release memory
1082
1083 // Check whether we need a bottom caption ("footer"):
1084 if ($tdiv && ($aData[5])) {
1085 $aRenderer->doc .=
1086 '</text:p><text:p text:style-name="Code_5f_Title">'
1087 . $aData[5];
1088 } // if
1089 // Close all our open tags:
1090 $aRenderer->doc .= "</text:p></text:section>\n";
1091
1092 if ($inLI) {
1093 // Workaround (see above): (re-)open a paragraph:
1094 $aRenderer->doc .= '<text:p>';
1095 } // if
1096 } else { // unsupported output format
1097 $aData[0] = $aData[4] = $aData[5] = FALSE; // avoid recursion
1098 // Pass anything else back to the renderer instance
1099 // (which will - hopefully - know how to handle it):
1100 $aRenderer->code($aData[1], $aData[2]);
1101 } // if
1102 $aData = array(FALSE); // don't process this text again
1103 return TRUE;
1104 } // render()
1105
1106 //@}
1107} // class syntax_plugin_code
1108} // if
1109?>
Note: See TracBrowser for help on using the repository browser.