source: documentation/trunk/packages/dokuwiki-2011-05-25a/lib/plugins/code/syntax.php.bak@ 25027

Last change on this file since 25027 was 25027, checked in by jmt12, 12 years ago

Adding the packages directory, and within it a configured version of dokuwiki all ready to run

File size: 34.5 KB
Line 
1<?php
2if (! class_exists('syntax_plugin_code')) {
3 if (! defined('DOKU_PLUGIN')) {
4 if (! defined('DOKU_INC')) {
5 define('DOKU_INC',
6 realpath(dirname(__FILE__) . '/../../') . '/');
7 } // if
8 define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
9 } // if
10 // Include parent class:
11 require_once(DOKU_PLUGIN . 'syntax.php');
12 // We're dealing with "GeSHi" here, hence include it:
13 require_once(DOKU_INC . 'inc/geshi.php');
14
15/**
16 * <tt>syntax_plugin_code.php </tt>- A PHP4 class that implements the
17 * <tt>DokuWiki</tt> plugin for <tt>highlighting</tt> code fragments.
18 *
19 * <p>
20 * Usage:<br>
21 * <tt>&#60;code [language startno |[fh] text |[hs]]&#62;...&#60;/code&#62;</tt>
22 * </p><pre>
23 * Copyright (C) 2006, 2008 M.Watermann, D-10247 Berlin, FRG
24 * All rights reserved
25 * EMail : &lt;[email protected]&gt;
26 * </pre><div class="disclaimer">
27 * This program is free software; you can redistribute it and/or modify
28 * it under the terms of the GNU General Public License as published by
29 * the Free Software Foundation; either
30 * <a href="http://www.gnu.org/licenses/gpl.html">version 3</a> of the
31 * License, or (at your option) any later version.<br>
32 * This software is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 * </div>
37 * @author <a href="mailto:[email protected]">Matthias Watermann</a>
38 * @version <tt>$Id: syntax_plugin_code.php,v 1.29 2008/07/22 09:22:47 matthias Exp $</tt>
39 * @since created 24-Dec-2006
40 */
41class syntax_plugin_code extends DokuWiki_Syntax_Plugin {
42
43 /**
44 * @privatesection
45 */
46 //@{
47
48 /**
49 * Additional markup used with older DokuWiki installations.
50 *
51 * @private
52 * @see _fixJS()
53 */
54 var $_JSmarkup = FALSE;
55
56 /**
57 * Indention "text" used by <tt>_addLines()</tt>.
58 *
59 * <p>
60 * Note that we're using raw <em>UTF-8 NonBreakable Spaces</em> here.
61 * </p>
62 * @private
63 * @see _addLines()
64 */
65 var $_lead = array('', ' ', '  ', '   ', '    ',
66 '     ', '      ', '       ');
67
68
69 /**
70 * Section counter for ODT export
71 *
72 * @private
73 * @see render()
74 * @since created 08-Jun-2008
75 */
76 var $_odtSect = 0;
77
78 /**
79 * Prepare the markup to render the DIFF text.
80 *
81 * @param $aText String The DIFF text to markup.
82 * @param $aFormat String The DIFF format used ("u", "c", "n|r", "s").
83 * @param $aDoc String Reference to the current renderer's
84 * <tt>doc</tt> property.
85 * @return Boolean <tt>TRUE</tt>.
86 * @private
87 * @see render()
88 */
89 function _addDiff(&$aText, &$aFormat, &$aDoc) {
90 // Since we're inside a PRE block we need the leading LFs:
91 $ADD = "\n" . '<span class="diff_addedline">';
92 $DEL = "\n" . '<span class="diff_deletedline">';
93 $HEAD = "\n" . '<span class="diff_blockheader">';
94 $CLOSE = '</span>';
95 // Common headers for all formats;
96 // the RegEx needs at least ")#" appended!
97 $DiffHead = '#\n((?:diff\s[^\n]*)|(?:Index:\s[^\n]*)|(?:={60,})'
98 . '|(?:RCS file:\s[^\n]*)|(?:retrieving revision [0-9][^\n]*)';
99 switch ($aFormat) {
100 case 'u': // unified output
101 $aDoc .= preg_replace(
102 array($DiffHead . '|(?:@@[^\n]*))#',
103 '|\n(\+[^\n]*)|',
104 '|\n(\-[^\n]*)|'),
105 array($HEAD . '\1' . $CLOSE,
106 $ADD . '\1' . $CLOSE,
107 $DEL . '\1' . $CLOSE),
108 $aText);
109 return TRUE;
110 case 'c': // context output
111 $sections = preg_split('|(\n\*{5,})|',
112 preg_replace($DiffHead . ')#',
113 $HEAD . '\1' . $CLOSE,
114 $aText),
115 -1, PREG_SPLIT_DELIM_CAPTURE);
116 $sections[0] = preg_replace(
117 array('|\n(\-{3}[^\n]*)|',
118 '|\n(\*{3}[^\n]*)|'),
119 array($ADD . '\1' . $CLOSE,
120 $DEL . '\1' . $CLOSE),
121 $sections[0]);
122 $c = count($sections);
123 for ($i = 1; $c > $i; ++$i) {
124 $hits = array();
125 if (preg_match('|^\n(\*{5,})|',
126 $sections[$i], $hits)) {
127 unset($hits[0]);
128 $sections[$i] = $HEAD . $hits[1] . $CLOSE;
129 } else if (preg_match('|^\n(\x2A{3}\s[^\n]*)(.*)|s',
130 $sections[$i], $hits)) {
131 unset($hits[0]); // free mem
132 $parts = preg_split('|\n(\-{3}\s[^\n]*)|',
133 $hits[2], -1, PREG_SPLIT_DELIM_CAPTURE);
134 // $parts[0] == OLD code
135 $parts[0] = preg_replace('|\n([!\-][^\n]*)|',
136 $DEL . '\1' . $CLOSE, $parts[0]);
137 // $parts[1] == head of NEW code
138 $parts[1] = $ADD . $parts[1] . $CLOSE;
139 // $parts[2] == NEW code
140 $parts[2] = preg_replace(
141 array('|\n([!\x2B][^\n]*)|',
142 '|\n(\x2A{3}[^\n]*)|'),
143 array($ADD . '\1' . $CLOSE,
144 $DEL . '\1' . $CLOSE),
145 $parts[2]);
146 if (isset($parts[3])) {
147 // TRUE when handling multi-file patches
148 $parts[3] = preg_replace('|^(\x2D{3}[^\n]*)|',
149 $ADD . '\1' . $CLOSE, $parts[3]);
150 } // if
151 $sections[$i] = $DEL . $hits[1] . $CLOSE
152 . implode('', $parts);
153 } // if
154 // ELSE: leave $sections[$i] as is
155 } // for
156 $aDoc .= implode('', $sections);
157 return TRUE;
158 case 'n': // RCS output
159 // Only added lines are there so we highlight just the
160 // diff indicators while leaving the text alone.
161 $aDoc .= preg_replace(
162 array($DiffHead . ')#',
163 '|\n(d[0-9]+\s+[0-9]+)|',
164 '|\n(a[0-9]+\s+[0-9]+)|'),
165 array($HEAD . '\1' . $CLOSE,
166 $DEL . '\1' . $CLOSE,
167 $ADD . '\1' . $CLOSE),
168 $aText);
169 return TRUE;
170 case 's': // simple output
171 $aDoc .= preg_replace(
172 array($DiffHead
173 . '|((?:[0-9a-z]+(?:,[0-9a-z]+)*)(?:[^\n]*)))#',
174 '|\n(\x26#60;[^\n]*)|',
175 '|\n(\x26#62;[^\n]*)|'),
176 array($HEAD . '\1' . $CLOSE,
177 $DEL . '\1' . $CLOSE,
178 $ADD . '\1' . $CLOSE),
179 $aText);
180 return TRUE;
181 default: // unknown diff format
182 $aDoc .= $aText; // just append any unrecognized text
183 return TRUE;
184 } // switch
185 } // _addDiff()
186
187 /**
188 * Add the lines of the given <tt>$aList</tt> to the specified
189 * <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
190 *
191 * @param $aList Array [IN] the list of lines as prepared by
192 * <tt>render()</tt>, [OUT] <tt>FALSE</tt>.
193 * @param $aStart Integer The first linenumber to use.
194 * @param $aDoc String Reference to the current renderer's
195 * <tt>doc</tt> property.
196 * @private
197 * @see render()
198 */
199 function _addLines(&$aList, $aStart, &$aDoc) {
200 // Since we're dealing with monospaced fonts here the width of each
201 // character (space, NBSP, digit) is the same. Hence the length of
202 // a digits string gives us its width i.e. the number of digits.
203 $i = $aStart + count($aList); // greatest line number
204 $g = strlen("$i"); // width of greatest number
205 while (list($i, $l) = each($aList)) {
206 unset($aList[$i]); // free mem
207 $aDoc .= '<span class="lno">'
208 . $this->_lead[$g - strlen("$aStart")]
209 . "$aStart:</span>" . ((($l) && ('&nbsp;' != $l))
210 ? " $l\n"
211 : "\n");
212 ++$aStart; // increment line number
213 } // while
214 $aList = FALSE; // release memory
215 } // _addLines()
216
217 /**
218 * Internal convenience method to replace HTML special characters.
219 *
220 * @param $aString String [IN] The text to handle;
221 * [OUT] the modified text (i.e. the method's result).
222 * @return String The string with HTML special chars replaced.
223 * @private
224 * @since created 05-Feb-2007
225 */
226 function &_entities(&$aString) {
227 $aString = str_replace(array('&', '<', '>'),
228 array('&#38;', '&#60;', '&#62;'), $aString);
229 return $aString;
230 } // _entities()
231
232 /**
233 * Try to fix some markup error of the GeSHi SHELL highlighting.
234 *
235 * <p>
236 * The GeShi highlighting for type "sh" (i.e. "bash") is, well,
237 * seriously flawed (at least up to version 1.0.7.20 i.e. 2007-07-01).
238 * Especially handling of comments and embedded string as well as
239 * keyword is plain wrong.
240 * </p><p>
241 * This internal helper method tries to solve some minor problems by
242 * removing highlight markup embedded in comment markup.
243 * This is, however, by no means a final resolution: GeSHi obviously
244 * keeps a kind of internal state resulting in highlighting markup
245 * spawing (i.e. repeated on) several lines.
246 * Which - if that state is wrong - causes great demage not by
247 * corrupting the data but by confusing the reader with wrong markup.
248 * The easiest way to trigger such a line spawning confusion is to use
249 * solitary doublequotes or singlequotes (apostrophe) in a comment
250 * line ...
251 * </p>
252 * @param $aMarkup String [IN] The highlight markup as returned by GeSHi;
253 * [OUT] <tt>FALSE</tt>.
254 * @param $aDoc String Reference to the current renderer's
255 * <tt>doc</tt> property.
256 * @private
257 * @since created 04-Aug-2007
258 * @see render()
259 */
260 function _fixGeSHi_Bash(&$aMarkup, &$aDoc) {
261 $hits = array();
262 if (defined('GESHI_VERSION')
263 && preg_match('|(\d+)\.(\d+)\.(\d+)\.(\d+)|', GESHI_VERSION, $hits)
264 && ($hits = sprintf('%02u%02u%02u%03u',
265 $hits[1] * 1, $hits[2] * 1, $hits[3] * 1, $hits[4] * 1))
266 && ('010007020' < $hits)) {
267 // GeSHi v1.0.7.21 has the comments bug fixed
268 $aDoc .= $aMarkup;
269 $aMarkup = FALSE; // release memory
270 return;
271 } // if
272 $lines = explode("\n", $aMarkup);
273 $aMarkup = FALSE; // release memory
274 while (list($i, $l) = each($lines)) {
275 $hits = array();
276 // GeSHi "bash" module marks up comments with CSS class "re3":
277 if (preg_match('|^((.*)<span class="re3">)(.*)$|i', $l, $hits)) {
278 if ('#!/bin/' == substr($hits[3], 0, 7)) {
279 $lines[$i] = $hits[2] . strip_tags($hits[3]);
280 } else {
281 $lines[$i] = $hits[1] . strip_tags($hits[3]) . '</span>';
282 } // if
283 } else if (! preg_match('|^\s*<span|i', $l)) {
284 // If a line doesn't start with a highlighted keyword
285 // all tags are removed since they're most probably
286 // "leftovers" from the GeSHI string/comment bug.
287 $lines[$i] = strip_tags($l);
288 } // if
289 } // while
290 $aDoc .= implode("\n", $lines);
291 } // _fixGeSHi_Bash()
292
293 /**
294 * Add markup to load JavaScript file with older DokuWiki versions.
295 *
296 * @param $aRenderer Object The renderer used.
297 * @private
298 * @since created 19-Feb-2007
299 * @see render()
300 */
301 function _fixJS(&$aRenderer) {
302 //XXX This test will break if the DokuWiki file gets renamed:
303 if (@file_exists(DOKU_INC . 'lib/exe/js.php')) {
304 // Assuming a fairly recent DokuWiki installation
305 // handling the plugin files on its own there's
306 // nothing to do here ...
307 return;
308 } // if
309 if ($this->_JSmarkup) {
310 // Markup already added (or not needed)
311 return;
312 } // if
313 $localdir = realpath(dirname(__FILE__)) . '/';
314 $webdir = DOKU_BASE . 'lib/plugins/code/';
315 $css = '';
316 if (file_exists($localdir . 'style.css')) {
317 ob_start();
318 @include($localdir . 'style.css');
319 // Remove whitespace from CSS and expand IMG paths:
320 if ($css = preg_replace(
321 array('|\s*/\x2A.*?\x2A/\s*|s', '|\s*([:;\{\},+!])\s*|',
322 '|(?:url\x28\s*)([^/])|', '|^\s*|', '|\s*$|'),
323 array(' ', '\1', 'url(' . $webdir . '\1'),
324 ob_get_contents())) {
325 $css = '<style type="text/css">' . $css . '</style>';
326 } // if
327 ob_end_clean();
328 } // if
329 $js = (file_exists($localdir . 'script.js'))
330 ? '<script type="text/javascript" src="'
331 . $webdir . 'script.js"></script>'
332 : '';
333 if ($this->_JSmarkup = $css . $js) {
334 $aRenderer->doc = $this->_JSmarkup
335 . preg_replace('|\s*<p>\s*</p>\s*|', '', $aRenderer->doc);
336 //ELSE: Neither CSS nor JS files found.
337 } // if
338 // Set member field to skip tests with next call:
339 $this->_JSmarkup = TRUE;
340 } // _fixJS()
341
342 /**
343 * RegEx callback to markup spaces in ODT mode.
344 *
345 * @param $aList Array A list of RegEx matches.
346 * @private
347 * @static
348 * @since created 07-Jun-2008
349 * @see render()
350 */
351 function _preserveSpaces($aList) {
352 return ($len = strlen($aList[1]))
353 ? '<text:s text:c="' . $len . '"/>'
354 : ' ';
355 } // _preserveSpaces()
356
357 /**
358 * Add the lines of the given <tt>$aText</tt> to the specified
359 * <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
360 *
361 * @param $aText String [IN] the text lines as prepared by
362 * <tt>handle()</tt>, [OUT] <tt>FALSE</tt>.
363 * @param $aStart Integer The first linenumber to use;
364 * if <tt>0</tt> (zero) no linenumbers are used.
365 * @param $aDoc String Reference to the current renderer's
366 * <tt>doc</tt> property.
367 * @param $aClass String The CSS class name for the <tt>PRE</tt> tag.
368 * @param $addTags Boolean Used in "ODT" mode to suppress tagging
369 * the line numbers.
370 * @private
371 * @since created 03-Feb-2007
372 * @see render()
373 */
374 function _rawMarkup(&$aText, $aStart, &$aDoc, $aClass, $addTags = TRUE) {
375 if ($addTags) {
376 $aDoc .= '<pre class="' . $aClass . '">' . "\n";
377 } // if
378 if ($aStart) {
379 // Split the prepared data into a list of lines:
380 $aText = explode("\n", $aText);
381 // Add the numbered lines to the document:
382 $this->_addLines($aText, $aStart, $aDoc);
383 } else {
384 $aDoc .= $aText;
385 } // if
386 if ($addTags) {
387 $aDoc .= '</pre>';
388 } // if
389 $aText = FALSE; // release memory
390 } // _rawMarkup()
391
392 /**
393 * RegEx callback to replace SPAN tags in ODT mode.
394 *
395 * @param $aList Array A list of RegEx matches.
396 * @private
397 * @static
398 * @since created 07-Jun-2008
399 * @see render()
400 */
401 function _replaceSpan($aList) {
402 return ($aList[3])
403 ? '<text:span text:style-name="Code_5f_'
404 . str_replace('_', '_5f_', $aList[3]) . '">'
405 : '<text:span>';
406 } // _replaceSpan()
407
408 //@}
409 /**
410 * @publicsection
411 */
412 //@{
413
414 /**
415 * Tell the parser whether the plugin accepts syntax mode
416 * <tt>$aMode</tt> within its own markup.
417 *
418 * @param $aMode String The requested syntaxmode.
419 * @return Boolean <tt>FALSE</tt> (no nested markup allowed).
420 * @public
421 * @see getAllowedTypes()
422 */
423 function accepts($aMode) {
424 return FALSE;
425 } // accepts()
426
427 /**
428 * Connect lookup pattern to lexer.
429 *
430 * @param $aMode String The desired rendermode.
431 * @public
432 * @see render()
433 */
434 function connectTo($aMode) {
435 // look-ahead to minimize the chance of false matches:
436 $this->Lexer->addEntryPattern(
437 '\x3Ccode(?=[^>]*\x3E\r?\n.*\n\x3C\x2Fcode\x3E)',
438 $aMode, 'plugin_code');
439 } // connectTo()
440
441 /**
442 * Get an array of mode types that may be nested within the
443 * plugin's own markup.
444 *
445 * @return Array Allowed nested types (none).
446 * @public
447 * @see accepts()
448 * @static
449 */
450 function getAllowedTypes() {
451 return array();
452 } // getAllowedTypes()
453
454 /**
455 * Get an associative array with plugin info.
456 *
457 * <p>
458 * The returned array holds the following fields:
459 * <dl>
460 * <dt>author</dt><dd>Author of the plugin</dd>
461 * <dt>email</dt><dd>Email address to contact the author</dd>
462 * <dt>date</dt><dd>Last modified date of the plugin in
463 * <tt>YYYY-MM-DD</tt> format</dd>
464 * <dt>name</dt><dd>Name of the plugin</dd>
465 * <dt>desc</dt><dd>Short description of the plugin (Text only)</dd>
466 * <dt>url</dt><dd>Website with more information on the plugin
467 * (eg. syntax description)</dd>
468 * </dl>
469 * @return Array Information about this plugin class.
470 * @public
471 * @static
472 */
473 function getInfo() {
474 $c = 'code'; // hack to hide "desc" field from GeShi
475 return array(
476 'author' => 'Matthias Watermann',
477 'email' => '[email protected]',
478 'date' => '2008-07-22',
479 'name' => 'Code Syntax Plugin',
480 'desc' => 'Syntax highlighting with line numbering <'
481 . $c . ' lang 1 |[fh] text |[hs]> ... </' . $c . '>',
482 'url' => 'http://wiki.splitbrain.org/plugin:code2');
483 } // getInfo()
484
485 /**
486 * Define how this plugin is handled regarding paragraphs.
487 *
488 * <p>
489 * This method is important for correct XHTML nesting.
490 * It returns one of the following values:
491 * </p><dl>
492 * <dt>normal</dt><dd>The plugin can be used inside paragraphs.</dd>
493 * <dt>block</dt><dd>Open paragraphs need to be closed before
494 * plugin output.</dd>
495 * <dt>stack</dt><dd>Special case: Plugin wraps other paragraphs.</dd>
496 * </dl>
497 * @return String <tt>"block"</tt>.
498 * @public
499 * @static
500 */
501 function getPType() {
502 return 'block';
503 } // getPType()
504
505 /**
506 * Where to sort in?
507 *
508 * @return Integer <tt>194</tt> (below "Doku_Parser_Mode_code").
509 * @public
510 * @static
511 */
512 function getSort() {
513 // class "Doku_Parser_Mode_code" returns 200
514 return 194;
515 } // getSort()
516
517 /**
518 * Get the type of syntax this plugin defines.
519 *
520 * @return String <tt>"protected"</tt>.
521 * @public
522 * @static
523 */
524 function getType() {
525 return 'protected';
526 } // getType()
527
528 /**
529 * Handler to prepare matched data for the rendering process.
530 *
531 * <p>
532 * The <tt>$aState</tt> parameter gives the type of pattern
533 * which triggered the call to this method:
534 * </p><dl>
535 * <dt>DOKU_LEXER_UNMATCHED</dt>
536 * <dd>ordinary text encountered within the plugin's syntax mode
537 * which doesn't match any pattern.</dd>
538 * </dl>
539 * @param $aMatch String The text matched by the patterns.
540 * @param $aState Integer The lexer state for the match.
541 * @param $aPos Integer The character position of the matched text.
542 * @param $aHandler Object Reference to the Doku_Handler object.
543 * @return Array Index <tt>[0]</tt> holds the current <tt>$aState</tt>,
544 * index <tt>[1]</tt> the embedded text to highlight,
545 * index <tt>[2]</tt> the language/dialect (or <tt>FALSE</tt>),
546 * index <tt>[3]</tt> the first line number (or <tt>0</tt>),
547 * index <tt>[4]</tt> the top title (or <tt>FALSE</tt>),
548 * index <tt>[5]</tt> the bottom title (or <tt>FALSE</tt>),
549 * index <tt>[6]</tt> hidding CSS flag (or <tt>""</tt>).
550 * @public
551 * @see render()
552 * @static
553 */
554 function handle($aMatch, $aState, $aPos, &$aHandler) {
555 if (DOKU_LEXER_UNMATCHED != $aState) {
556 return array($aState); // nothing to do for "render()"
557 } // if
558 $aMatch = explode('>', $aMatch, 2);
559 // $aMatch[0] : lang etc.
560 // $aMatch[1] : text to highlight
561 $n = explode('>', trim($aMatch[1]));
562 $l = 'extern'; // external resource requested?
563 // Check whether there's an external file to fetch:
564 if ($l == $n[0]) {
565 if ($n[1] = trim($n[1])) {
566 if (is_array($n[0] = @parse_url($n[1]))
567 && ($n[0] = $n[0]['scheme'])) {
568 // Don't accept unsecure schemes like
569 // "file", "javascript", "mailto" etc.
570 switch ($n[0]) {
571 case 'ftp':
572 case 'http':
573 case 'https':
574 //XXX This might fail due to global PHP setup:
575 if ($handle = @fopen($n[1], 'rb')) {
576 $aMatch[1] = '';
577 while (! @feof($handle)) {
578 //XXX This might fail due to
579 // memory constraints:
580 $aMatch[1] .= @fread($handle, 0x8000);
581 } // while
582 @fclose($handle);
583 } else {
584 $aMatch = array($l,
585 'Failed to retrieve: ' . $n[1]);
586 } // if
587 break;
588 default:
589 $aMatch = array($l,
590 'Unsupported URL scheme: ' . $n[0]);
591 break;
592 } // switch
593 } else {
594 $aMatch = array($l, 'Invalid URL: ' . $n[1]);
595 } // if
596 } else {
597 $aMatch = array($l, 'Missing URL: ' . $aMatch[1]);
598 } // if
599 } // if
600 // Strip leading/trailing/EoL whitespace,
601 // replace TABs by four spaces, "&#160;" by NBSP:
602 $aMatch[1] = preg_replace(
603 array('#(?>\r\n)|\r#', '|^\n\n*|',
604 '|[\t ]+\n|', '|\s*\n$|'),
605 array("\n", '', "\n", ''),
606 str_replace('&#160;', '&nbsp;',
607 str_replace("\t", ' ', $aMatch[1])));
608
609 $css = ''; // default: no initial CSS content hidding
610 $l = FALSE; // default: no language
611 $n = 0; // default: no line numbers
612 $ht = $ft = FALSE; // default: no (head/foot) title
613 $hits = array(); // RegEx matches from the tag attributes
614 /*
615 The free form of the RegEx to parse the arguments here is:
616 /^
617 # "eat" leading whitespace:
618 \s*
619 (?=\S) # Look ahead: do not match empty lines. This is
620 # needed since all other expressions are optional.
621 # Make sure, nothing is given away once it matched:
622 (?>
623 # We need a separate branch for "diff" because it may be
624 # followed by a _letter_ (not digit) indicating the format.
625 (?>
626 (diff)
627 # match 1
628 (?>\s+([cnrsu]?))?
629 # match 2
630 )
631 |
632 # Branch for standard language highlighting
633 (?>
634 # extract language:
635 ([a-z][^\x7C\s]*)
636 # match 3
637 (?>
638 # extract starting line number:
639 \s+(\d\d*)
640 # match 4
641 )?
642 )
643 |
644 # Branch for line numbering only
645 (\d\d*)
646 # match 5
647 |
648 \s* # dummy needed to match "title only" markup (below)
649 )
650 # "eat" anything else up to the text delimiter:
651 [^\x7C]*
652 (?>
653 \x7C
654 # extract the position flag:
655 ([bfht])?\s*
656 # match 6
657 # extract the header,footer line:
658 ([^\x7C]+)
659 # match 7
660 (?>
661 # see whether there is a class flag:
662 \x7C\s*
663 (h|s)?.*
664 # match 8
665 )?
666 )?
667 # Anchored to make sure everything gets matched:
668 $/xiu
669
670 Since compiling and applying a free form RegEx slows down the
671 overall matching process I've folded it all to a standard RegEx.
672 Benchmarking during development gave me
673 free form: 20480 loops, 552960 hits, 102400 fails, 12.994689 secs
674 standard: 20480 loops, 552960 hits, 102400 fails, 8.357169 secs
675 */
676 if (preg_match('/^\s*(?=\S)(?>(?>(diff)(?>\s+([cnrsu]?))?)|'
677 . '(?>([a-z][^\x7C\s]*)(?>\s+(\d\d*))?)|(\d\d*)|\s*)[^\x7C]*'
678 . '(?>\x7C([bfht])?\s*([^\x7C]+)(?>\x7C\s*(h|s)?.*)?)?$/iu',
679 $aMatch[0], $hits)) {
680 unset($hits[0]); // free mem
681 // $hits[1] = "diff"
682 // $hits[2] = type (of [1])
683 // $hits[3] = LANG
684 // $hits[4] = NUM (of [3])
685 // $hits[5] = NUM (alone)
686 // $hits[6] = Top/Bottom flag (of [7])
687 // $hits[7] = TITLE
688 // $hits[8] = s/h CSS flag
689 if (isset($hits[3]) && ($hits[3])) {
690 $l = strtolower($hits[3]);
691 if (isset($hits[4]) && ($hits[4])) {
692 $n = (int)$hits[4];
693 } // if
694 $hits[3] = $hits[4] = FALSE;
695 } else if (isset($hits[1]) && ($hits[1])) {
696 $l = strtolower($hits[1]);
697 $hits[2] = (isset($hits[2]))
698 ? strtolower($hits[2]) . '?'
699 : '?';
700 $n = $hits[2]{0};
701 $hits[1] = $hits[2] = FALSE;
702 } else if (isset($hits[5]) && ($hits[5])) {
703 $n = (int)$hits[5];
704 } // if
705 if (isset($hits[7]) && ($hits[7])) {
706 $hits[6] = (isset($hits[6]))
707 ? strtolower($hits[6]) . 'f'
708 : 'f';
709 switch ($hits[6]{0}) {
710 case 'h':
711 case 't':
712 $ht = trim($hits[7]);
713 break;
714 default:
715 $ft = trim($hits[7]);
716 break;
717 } // switch
718 if (isset($hits[8])) {
719 $hits[8] = strtolower($hits[8]) . 's';
720 if ('h' == $hits[8]{0}) {
721 // This class is handled by JavaScript (there
722 // _must_not_ be any CSS rules for this):
723 $css = ' HideOnInit';
724 } // if
725 } // if
726 $hits[6] = $hits[7] = $hits[8] = FALSE;
727 } // if
728 // ELSE: no arguments given to CODE tag
729 } // if
730 switch ($l) {
731 case 'console':
732 // nothing additional to setup here
733 break;
734 case 'diff':
735 if ("\n" != $aMatch[1]{0}) {
736 // A leading LF is needed to recognize and handle
737 // the very first line with all the REs used.
738 $aMatch[1] = "\n" . $aMatch[1];
739 } // if
740 switch ($n) {
741 case 'u': // DIFF cmdline switch for "unified"
742 case 'c': // DIFF cmdline switch for "context"
743 case 'n': // DIFF cmdline switch for "RCS"
744 case 's':
745 // We believe the format hint ...
746 // (or should we be more suspicious?)
747 break;
748 case 'r': // Mnemonic for "RCS"
749 $n = 'n';
750 break;
751 default: // try to figure out the format actually used
752 if (preg_match(
753 '|\n(?:\x2A{5,}\n\x2A{3}\s[1-9]+.*?\x2A{4}\n.+?)+|s',
754 $aMatch[1])) {
755 $n = 'c';
756 } else if (preg_match(
757 '|\n@@\s\-[0-9]+,[0-9]+[ \+,0-9]+?@@\n.+\n|s',
758 $aMatch[1])) {
759 $n = 'u';
760 } else if (preg_match(
761 '|\n[ad][0-9]+\s+[0-9]+\r?\n|', $aMatch[1])) {
762 // We've to check this _before_ "simple" since
763 // the REs are quite similar (but this one is
764 // slightly more specific).
765 $n = 'n';
766 } else if (preg_match(
767 '|\n(?:[0-9a-z]+(?:,[0-9a-z]+)*)(?:[^\n]*\n.*?)+|',
768 $aMatch[1])) {
769 $n = 's';
770 } else {
771 $n = '?';
772 } // if
773 break;
774 } // switch
775 break;
776 case 'htm': // convenience shortcut
777 case 'html': // dito
778 $l = 'html4strict';
779 break;
780 case 'js': // shortcut
781 $l = 'javascript';
782 break;
783 case 'sh': // shortcut
784 $l = 'bash';
785 break;
786 default:
787 if (! $l) {
788 // no language: simple PRE markup will get generated
789 $l = FALSE;
790 } // if
791 break;
792 } // switch
793 return array(DOKU_LEXER_UNMATCHED,
794 $aMatch[1], $l, $n, $ht, $ft, $css);
795 } // handle()
796
797 /**
798 * Add exit pattern to lexer.
799 *
800 * @public
801 */
802 function postConnect() {
803 // look-before to minimize the chance of false matches:
804 $this->Lexer->addExitPattern('(?<=\n)\x3C\x2Fcode\x3E',
805 'plugin_code');
806 } // postConnect()
807
808 /**
809 * Handle the actual output (markup) creation.
810 *
811 * <p>
812 * The method checks the given <tt>$aFormat</tt> to decide how to
813 * handle the specified <tt>$aData</tt>.
814 * The standard case (i.e. <tt>"xhtml"</tt>) is handled completely
815 * by this implementation, preparing linenumbers and/or head/foot
816 * lines are requested.
817 * For the <tt>"odt"</tt> format all plugin features (incl. linenumbers
818 * and header/footer lines) are supported by generating the appropriate
819 * ODT/XML markup.
820 * All other formats are passed back to the given <tt>$aRenderer</tt>
821 * instance for further handling.
822 * </p><p>
823 * <tt>$aRenderer</tt> contains a reference to the renderer object
824 * which is currently in charge of the rendering.
825 * The contents of the given <tt>$aData</tt> is the return value
826 * of the <tt>handle()</tt> method.
827 * </p>
828 * @param $aFormat String The output format to generate.
829 * @param $aRenderer Object A reference to the renderer object.
830 * @param $aData Array The data created/returned by the
831 * <tt>handle()</tt> method.
832 * @return Boolean <tt>TRUE</tt>.
833 * @public
834 * @see handle()
835 */
836 function render($aFormat, &$aRenderer, &$aData) {
837 if (DOKU_LEXER_UNMATCHED != $aData[0]) {
838 return TRUE;
839 } // if
840 if ('xhtml' == $aFormat) {
841 if ($tdiv = (($aData[4]) || ($aData[5]))) {
842 $this->_fixJS($aRenderer); // check for old DokuWiki versions
843 $aRenderer->doc .= '<div class="code">';
844 if ($aData[4]) {
845 //XXX Note that "_headerToLink()" is supposed to be a
846 // _private_ method of the renderer class; so this code
847 // will fail once DokuWiki is rewritten in PHP5 which
848 // implements encapsulation of private methods and
849 // properties:
850 $aRenderer->doc .= '<p class="codehead' . $aData[6]
851 . '"><a name="' . $aRenderer->_headerToLink($aData[4])
852 . '">' . $this->_entities($aData[4]) . '</a></p>';
853 $aData[4] = $aData[6] = FALSE; // free mem
854 } // if
855 } // if
856 if ($aData[2]) { // lang was given
857 if ('console' == $aData[2]) {
858 $this->_rawMarkup($this->_entities($aData[1]),
859 $aData[3], $aRenderer->doc, $aData[2]);
860 } else if ('diff' == $aData[2]) {
861 $this->_entities($aData[1]);
862 $aRenderer->doc .= '<pre class="code diff">';
863 $this->_addDiff($aData[1], $aData[3], $aRenderer->doc);
864 $aRenderer->doc .= '</pre>';
865 } else {
866 $isSH = ('bash' == $aData[2]);
867 $geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
868 if ($geshi->error()) {
869 // Language not supported by "GeSHi"
870 $geshi = NULL; // release memory
871 $this->_rawMarkup($this->_entities($aData[1]),
872 $aData[3], $aRenderer->doc, 'code');
873 } else {
874 $aData[1] = FALSE; // free mem
875 $geshi->enable_classes();
876 $geshi->set_encoding('utf-8');
877 $geshi->set_header_type(GESHI_HEADER_PRE);
878 $geshi->set_overall_class('code ' . $aData[2]);
879 global $conf;
880 if ($conf['target']['extern']) {
881 $geshi->set_link_target($conf['target']['extern']);
882 } // if
883 if ($aData[3]) { // line numbers requested
884 // Separate PRE tag from parsed data:
885 $aData[1] = explode('>', $geshi->parse_code(), 2);
886 // [1][0] = leading "<pre"
887 // [1][1] = remaining markup up to trailing "</pre"
888 $geshi = NULL; // release memory
889
890 // Add the open tag to the document:
891 $aRenderer->doc .= $aData[1][0] . '>';
892
893 // Separate trailing PRE tag:
894 $aData[1] = explode('</pre>', $aData[1][1], 2);
895 // [1][0] = GeSHi markup
896 // [1][1] = trailing "</pre"
897
898 if ($isSH) {
899 $aData[1][1] = '';
900 $this->_fixGeSHi_Bash($aData[1][0],
901 $aData[1][1]);
902 } else {
903 // Set reference to fixed markup to sync with
904 // the "bash" execution path (above):
905 $aData[1][1] =& $aData[1][0];
906 } // if
907
908 // Split the parsed data into a list of lines:
909 $aData[2] = explode("\n", $aData[1][1]);
910 $aData[1] = FALSE; // free mem
911
912 // Add the numbered lines to the document:
913 $this->_addLines($aData[2], $aData[3],
914 $aRenderer->doc);
915
916 // Close the preformatted section markup:
917 $aRenderer->doc .= '</pre>';
918 } else { // w/o line numbering
919 if ($isSH) {
920 // Separate trailing PRE tag which
921 // sometimes is "forgotten" by GeSHi:
922 $aData[2] = explode('</pre>',
923 $geshi->parse_code(), 2);
924 // [1][0] = GeSHi markup
925 // [1][1] = trailing "</pre" (if any)
926 $this->_fixGeSHi_Bash($aData[2][0],
927 $aRenderer->doc);
928 $aRenderer->doc .= '</pre>';
929 } else {
930 $aRenderer->doc .= $geshi->parse_code();
931 } // if
932 $geshi = NULL; // release memory
933 } // if
934 } // if
935 } // if
936 } else {
937 $this->_rawMarkup($this->_entities($aData[1]),
938 $aData[3], $aRenderer->doc, 'code');
939 } // if
940 if ($tdiv) {
941 if ($aData[5]) {
942 //XXX See "_headerToLink()" note above.
943 $aRenderer->doc .= '<p class="codefoot'
944 . $aData[6] . '"><a name="'
945 . $aRenderer->_headerToLink($aData[5]) . '">'
946 . $this->_entities($aData[5]) . '</a></p>';
947 } // if
948 $aRenderer->doc .= '</div>';
949 } // if
950 } else if ('odt' == $aFormat) {
951 $inLI = array();
952 if (preg_match('|^<text:p text:style-name="[^"]+">\s*</text:p>\s*(.*)$|si',
953 $aRenderer->doc, $inLI)) {
954 // remove leading whitespace
955 $aRenderer->doc = $inLI[1];
956 } // if
957 // The "renderer_plugin_odt" doesn't clean (close)
958 // its own tags before calling this plugin.
959 // To work around that bug we have to check some
960 // private properties of the renderer instance.
961 $inLI = FALSE;
962 if (is_a($aRenderer, 'renderer_plugin_odt')) {
963 if ($inLI = ($aRenderer->in_list_item)) {
964 // If we're in a list item, we've to close the paragraph:
965 $aRenderer->doc .= '</text:p>';
966 } // if
967 if ($aRenderer->in_paragraph) {
968 $aRenderer->doc .= '</text:p>';
969 $aRenderer->in_paragraph = FALSE;
970 } // if
971 } // if
972
973 // Init (open) our text section:
974 $aRenderer->doc .= "\n"
975 . '<text:section text:style-name="Code_5f_Section" text:name="CodeSnippet'
976 . ++$this->_odtSect . '">';
977
978 if ($tdiv = (($aData[4]) || ($aData[5]))) {
979 // Check whether we need a top caption ("header"):
980 if ($aData[4]) {
981 $aRenderer->doc .=
982 '<text:p text:style-name="Code_5f_Title">'
983 . "<text:line-break/>\n"
984 . $aData[4] . "</text:p>\n";
985 $aData[4] = $aData[6] = FALSE; // free mem
986 } // if
987 } // if
988 // The following code resembles the "xhtml" processing
989 // above except that we're not using "pre" tags here
990 // but ODT/XML markup.
991 $aData[0] = ''; // tmp. container of processed data
992 if ($aData[2]) { // lang was given
993 if ('console' == $aData[2]) {
994 $this->_rawMarkup($this->_entities($aData[1]),
995 $aData[3], $aData[0], $aData[2], FALSE);
996 } else if ('diff' == $aData[2]) {
997 $this->_addDiff($this->_entities($aData[1]),
998 $aData[3], $aData[0]);
999 } else {
1000 $isSH = ('bash' == $aData[2]);
1001 $geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
1002 if ($geshi->error()) {
1003 // Language not supported by "GeSHi"
1004 $geshi = NULL; // release memory
1005 $this->_rawMarkup($this->_entities($aData[1]),
1006 $aData[3], $aData[0], '', FALSE);
1007 } else {
1008 $aData[1] = FALSE; // free mem
1009 $geshi->enable_classes();
1010 $geshi->set_encoding('utf-8');
1011 $geshi->set_header_type(GESHI_HEADER_PRE);
1012 $geshi->set_overall_class('code ' . $aData[2]);
1013 global $conf;
1014 if ($conf['target']['extern']) {
1015 $geshi->set_link_target($conf['target']['extern']);
1016 } // if
1017 // Separate PRE tag from parsed data:
1018 $aData[1] = explode('>', $geshi->parse_code(), 2);
1019 // [1][0] = leading "<pre"
1020 // [1][1] = remaining markup up to trailing "</pre"
1021 $geshi = NULL; // release memory
1022
1023 // Separate trailing PRE tag:
1024 $aData[1] = explode('</pre>', $aData[1][1], 2);
1025 // [1][0] = GeSHi markup
1026 // [1][1] = trailing "</pre"
1027 $aData[1] = $aData[1][0];
1028
1029 if ($isSH) { // work around GeSHI bug
1030 $aData[2] = '';
1031 $this->_fixGeSHi_Bash($aData[1], $aData[2]);
1032 } else {
1033 $aData[2] = $aData[1];
1034 } // if
1035 $aData[1] = FALSE; // release memory
1036
1037 if ($aData[3]) { // line numbers requested
1038 // Split the parsed data into a list of lines:
1039 $aData[1] = explode("\n", $aData[2]);
1040 $aData[2] = FALSE; // release memory
1041
1042 // Add the numbered lines to the document:
1043 $this->_addLines($aData[1], $aData[3], $aData[0]);
1044 } else { // w/o line numbers
1045 $aData[0] = $aData[2];
1046 $aData[2] = FALSE; // release memory
1047 } // if
1048 } // if
1049 } // if
1050 } else {
1051 $this->_rawMarkup($this->_entities($aData[1]),
1052 $aData[3], $aData[0], '', FALSE);
1053 } // if
1054
1055 if ('console' == $aData[2]) {
1056 $aRenderer->doc .=
1057 '<text:p text:style-name="Code_5f_Console">';
1058 } else {
1059 $aRenderer->doc .=
1060 '<text:p text:style-name="Code_5f_Standard">';
1061 } // if
1062 // Replace the HTML "span" tags (for highlighting) by
1063 // the appropriate ODT/XML markup.
1064 // For unknown reasons we need an additional space
1065 // in front of the very first line.
1066 $aData[0] = '<text:s/>'
1067 . preg_replace_callback('|(<span( class="([^"]*)"[^>]*)?>)|',
1068 array('syntax_plugin_code', '_replaceSpan'),
1069 // OOo (v2.3) crashes on "&nbsp;"
1070 str_replace('&nbsp;', chr(194) . chr(160),
1071 str_replace('</span>', '</text:span>',
1072 strip_tags($aData[0], '<span>'))));
1073 // Now append our markup to the renderer's document;
1074 // TABs, LFs and SPACEs are replaced by their respective
1075 // ODT/XML equivalents:
1076 $aRenderer->doc .= preg_replace_callback('|( {2,})|',
1077 array('syntax_plugin_code', '_preserveSpaces'),
1078 str_replace("\n", "<text:line-break/>\n", $aData[0]));
1079 $aData[0] = FALSE; // release memory
1080
1081 // Check whether we need a bottom caption ("footer"):
1082 if ($tdiv && ($aData[5])) {
1083 $aRenderer->doc .=
1084 '</text:p><text:p text:style-name="Code_5f_Title">'
1085 . $aData[5];
1086 } // if
1087 // Close all our open tags:
1088 $aRenderer->doc .= "</text:p></text:section>\n";
1089
1090 if ($inLI) {
1091 // Workaround (see above): (re-)open a paragraph:
1092 $aRenderer->doc .= '<text:p>';
1093 } // if
1094 } else { // unsupported output format
1095 $aData[0] = $aData[4] = $aData[5] = FALSE; // avoid recursion
1096 // Pass anything else back to the renderer instance
1097 // (which will - hopefully - know how to handle it):
1098 $aRenderer->code($aData[1], $aData[2]);
1099 } // if
1100 $aData = array(FALSE); // don't process this text again
1101 return TRUE;
1102 } // render()
1103
1104 //@}
1105} // class syntax_plugin_code
1106} // if
1107?>
Note: See TracBrowser for help on using the repository browser.