Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: documentation/trunk/packages/dokuwiki-2011-05-25a/lib/plugins/code/syntax.php.bak@ 25027

Last change on this file since 25027 was 25027, checked in by jmt12, 12 years ago
Adding the packages directory, and within it a configured version of dokuwiki all ready to run
File size: 34.5 KB

Line
1	<?php
2	if (! class_exists('syntax_plugin_code')) {
3	if (! defined('DOKU_PLUGIN')) {
4	if (! defined('DOKU_INC')) {
5	define('DOKU_INC',
6	realpath(dirname(__FILE__) . '/../../') . '/');
7	} // if
8	define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
9	} // if
10	// Include parent class:
11	require_once(DOKU_PLUGIN . 'syntax.php');
12	// We're dealing with "GeSHi" here, hence include it:
13	require_once(DOKU_INC . 'inc/geshi.php');
14
15	/**
16	* <tt>syntax_plugin_code.php </tt>- A PHP4 class that implements the
17	* <tt>DokuWiki</tt> plugin for <tt>highlighting</tt> code fragments.
18	*
19	* <p>
20	* Usage:<br>
21	* <tt><code [language startno \|[fh] text \|[hs]]>...</code></tt>
22	* </p><pre>
23	* Copyright (C) 2006, 2008 M.Watermann, D-10247 Berlin, FRG
24	* All rights reserved
25	* EMail : <[email protected]>
26	* </pre><div class="disclaimer">
27	* This program is free software; you can redistribute it and/or modify
28	* it under the terms of the GNU General Public License as published by
29	* the Free Software Foundation; either
30	* <a href="http://www.gnu.org/licenses/gpl.html">version 3</a> of the
31	* License, or (at your option) any later version.<br>
32	* This software is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35	* General Public License for more details.
36	* </div>
37	* @author <a href="mailto:[email protected]">Matthias Watermann</a>
38	* @version <tt>$Id: syntax_plugin_code.php,v 1.29 2008/07/22 09:22:47 matthias Exp $</tt>
39	* @since created 24-Dec-2006
40	*/
41	class syntax_plugin_code extends DokuWiki_Syntax_Plugin {
42
43	/**
44	* @privatesection
45	*/
46	//@{
47
48	/**
49	* Additional markup used with older DokuWiki installations.
50	*
51	* @private
52	* @see _fixJS()
53	*/
54	var $_JSmarkup = FALSE;
55
56	/**
57	* Indention "text" used by <tt>_addLines()</tt>.
58	*
59	* <p>
60	* Note that we're using raw <em>UTF-8 NonBreakable Spaces</em> here.
61	* </p>
62	* @private
63	* @see _addLines()
64	*/
65	var $_lead = array('', ' ', ' Â ', ' Â ', ' Â Â ',
66	' Â Â ', ' Â Â Â ', ' Â Â Â ');
67
68
69	/**
70	* Section counter for ODT export
71	*
72	* @private
73	* @see render()
74	* @since created 08-Jun-2008
75	*/
76	var $_odtSect = 0;
77
78	/**
79	* Prepare the markup to render the DIFF text.
80	*
81	* @param $aText String The DIFF text to markup.
82	* @param $aFormat String The DIFF format used ("u", "c", "n\|r", "s").
83	* @param $aDoc String Reference to the current renderer's
84	* <tt>doc</tt> property.
85	* @return Boolean <tt>TRUE</tt>.
86	* @private
87	* @see render()
88	*/
89	function _addDiff(&$aText, &$aFormat, &$aDoc) {
90	// Since we're inside a PRE block we need the leading LFs:
91	$ADD = "\n" . '<span class="diff_addedline">';
92	$DEL = "\n" . '<span class="diff_deletedline">';
93	$HEAD = "\n" . '<span class="diff_blockheader">';
94	$CLOSE = '</span>';
95	// Common headers for all formats;
96	// the RegEx needs at least ")#" appended!
97	$DiffHead = '#\n((?:diff\s[^\n])\|(?:Index:\s[^\n])\|(?:={60,})'
98	. '\|(?:RCS file:\s[^\n])\|(?:retrieving revision [0-9][^\n])';
99	switch ($aFormat) {
100	case 'u': // unified output
101	$aDoc .= preg_replace(
102	array($DiffHead . '\|(?:@@[^\n]*))#',
103	'\|\n(\+[^\n]*)\|',
104	'\|\n(\-[^\n]*)\|'),
105	array($HEAD . '\1' . $CLOSE,
106	$ADD . '\1' . $CLOSE,
107	$DEL . '\1' . $CLOSE),
108	$aText);
109	return TRUE;
110	case 'c': // context output
111	$sections = preg_split('\|(\n\*{5,})\|',
112	preg_replace($DiffHead . ')#',
113	$HEAD . '\1' . $CLOSE,
114	$aText),
115	-1, PREG_SPLIT_DELIM_CAPTURE);
116	$sections[0] = preg_replace(
117	array('\|\n(\-{3}[^\n]*)\|',
118	'\|\n(\{3}[^\n])\|'),
119	array($ADD . '\1' . $CLOSE,
120	$DEL . '\1' . $CLOSE),
121	$sections[0]);
122	$c = count($sections);
123	for ($i = 1; $c > $i; ++$i) {
124	$hits = array();
125	if (preg_match('\|^\n(\*{5,})\|',
126	$sections[$i], $hits)) {
127	unset($hits[0]);
128	$sections[$i] = $HEAD . $hits[1] . $CLOSE;
129	} else if (preg_match('\|^\n(\x2A{3}\s[^\n])(.)\|s',
130	$sections[$i], $hits)) {
131	unset($hits[0]); // free mem
132	$parts = preg_split('\|\n(\-{3}\s[^\n]*)\|',
133	$hits[2], -1, PREG_SPLIT_DELIM_CAPTURE);
134	// $parts[0] == OLD code
135	$parts[0] = preg_replace('\|\n([!\-][^\n]*)\|',
136	$DEL . '\1' . $CLOSE, $parts[0]);
137	// $parts[1] == head of NEW code
138	$parts[1] = $ADD . $parts[1] . $CLOSE;
139	// $parts[2] == NEW code
140	$parts[2] = preg_replace(
141	array('\|\n([!\x2B][^\n]*)\|',
142	'\|\n(\x2A{3}[^\n]*)\|'),
143	array($ADD . '\1' . $CLOSE,
144	$DEL . '\1' . $CLOSE),
145	$parts[2]);
146	if (isset($parts[3])) {
147	// TRUE when handling multi-file patches
148	$parts[3] = preg_replace('\|^(\x2D{3}[^\n]*)\|',
149	$ADD . '\1' . $CLOSE, $parts[3]);
150	} // if
151	$sections[$i] = $DEL . $hits[1] . $CLOSE
152	. implode('', $parts);
153	} // if
154	// ELSE: leave $sections[$i] as is
155	} // for
156	$aDoc .= implode('', $sections);
157	return TRUE;
158	case 'n': // RCS output
159	// Only added lines are there so we highlight just the
160	// diff indicators while leaving the text alone.
161	$aDoc .= preg_replace(
162	array($DiffHead . ')#',
163	'\|\n(d[0-9]+\s+[0-9]+)\|',
164	'\|\n(a[0-9]+\s+[0-9]+)\|'),
165	array($HEAD . '\1' . $CLOSE,
166	$DEL . '\1' . $CLOSE,
167	$ADD . '\1' . $CLOSE),
168	$aText);
169	return TRUE;
170	case 's': // simple output
171	$aDoc .= preg_replace(
172	array($DiffHead
173	. '\|((?:[0-9a-z]+(?:,[0-9a-z]+))(?:[^\n])))#',
174	'\|\n(\x26#60;[^\n]*)\|',
175	'\|\n(\x26#62;[^\n]*)\|'),
176	array($HEAD . '\1' . $CLOSE,
177	$DEL . '\1' . $CLOSE,
178	$ADD . '\1' . $CLOSE),
179	$aText);
180	return TRUE;
181	default: // unknown diff format
182	$aDoc .= $aText; // just append any unrecognized text
183	return TRUE;
184	} // switch
185	} // _addDiff()
186
187	/**
188	* Add the lines of the given <tt>$aList</tt> to the specified
189	* <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
190	*
191	* @param $aList Array [IN] the list of lines as prepared by
192	* <tt>render()</tt>, [OUT] <tt>FALSE</tt>.
193	* @param $aStart Integer The first linenumber to use.
194	* @param $aDoc String Reference to the current renderer's
195	* <tt>doc</tt> property.
196	* @private
197	* @see render()
198	*/
199	function _addLines(&$aList, $aStart, &$aDoc) {
200	// Since we're dealing with monospaced fonts here the width of each
201	// character (space, NBSP, digit) is the same. Hence the length of
202	// a digits string gives us its width i.e. the number of digits.
203	$i = $aStart + count($aList); // greatest line number
204	$g = strlen("$i"); // width of greatest number
205	while (list($i, $l) = each($aList)) {
206	unset($aList[$i]); // free mem
207	$aDoc .= '<span class="lno">'
208	. $this->_lead[$g - strlen("$aStart")]
209	. "$aStart:</span>" . ((($l) && (' ' != $l))
210	? " $l\n"
211	: "\n");
212	++$aStart; // increment line number
213	} // while
214	$aList = FALSE; // release memory
215	} // _addLines()
216
217	/**
218	* Internal convenience method to replace HTML special characters.
219	*
220	* @param $aString String [IN] The text to handle;
221	* [OUT] the modified text (i.e. the method's result).
222	* @return String The string with HTML special chars replaced.
223	* @private
224	* @since created 05-Feb-2007
225	*/
226	function &_entities(&$aString) {
227	$aString = str_replace(array('&', '<', '>'),
228	array('&', '<', '>'), $aString);
229	return $aString;
230	} // _entities()
231
232	/**
233	* Try to fix some markup error of the GeSHi SHELL highlighting.
234	*
235	* <p>
236	* The GeShi highlighting for type "sh" (i.e. "bash") is, well,
237	* seriously flawed (at least up to version 1.0.7.20 i.e. 2007-07-01).
238	* Especially handling of comments and embedded string as well as
239	* keyword is plain wrong.
240	* </p><p>
241	* This internal helper method tries to solve some minor problems by
242	* removing highlight markup embedded in comment markup.
243	* This is, however, by no means a final resolution: GeSHi obviously
244	* keeps a kind of internal state resulting in highlighting markup
245	* spawing (i.e. repeated on) several lines.
246	* Which - if that state is wrong - causes great demage not by
247	* corrupting the data but by confusing the reader with wrong markup.
248	* The easiest way to trigger such a line spawning confusion is to use
249	* solitary doublequotes or singlequotes (apostrophe) in a comment
250	* line ...
251	* </p>
252	* @param $aMarkup String [IN] The highlight markup as returned by GeSHi;
253	* [OUT] <tt>FALSE</tt>.
254	* @param $aDoc String Reference to the current renderer's
255	* <tt>doc</tt> property.
256	* @private
257	* @since created 04-Aug-2007
258	* @see render()
259	*/
260	function _fixGeSHi_Bash(&$aMarkup, &$aDoc) {
261	$hits = array();
262	if (defined('GESHI_VERSION')
263	&& preg_match('\|(\d+)\.(\d+)\.(\d+)\.(\d+)\|', GESHI_VERSION, $hits)
264	&& ($hits = sprintf('%02u%02u%02u%03u',
265	$hits[1] * 1, $hits[2] * 1, $hits[3] * 1, $hits[4] * 1))
266	&& ('010007020' < $hits)) {
267	// GeSHi v1.0.7.21 has the comments bug fixed
268	$aDoc .= $aMarkup;
269	$aMarkup = FALSE; // release memory
270	return;
271	} // if
272	$lines = explode("\n", $aMarkup);
273	$aMarkup = FALSE; // release memory
274	while (list($i, $l) = each($lines)) {
275	$hits = array();
276	// GeSHi "bash" module marks up comments with CSS class "re3":
277	if (preg_match('\|^((.)<span class="re3">)(.)$\|i', $l, $hits)) {
278	if ('#!/bin/' == substr($hits[3], 0, 7)) {
279	$lines[$i] = $hits[2] . strip_tags($hits[3]);
280	} else {
281	$lines[$i] = $hits[1] . strip_tags($hits[3]) . '</span>';
282	} // if
283	} else if (! preg_match('\|^\s*<span\|i', $l)) {
284	// If a line doesn't start with a highlighted keyword
285	// all tags are removed since they're most probably
286	// "leftovers" from the GeSHI string/comment bug.
287	$lines[$i] = strip_tags($l);
288	} // if
289	} // while
290	$aDoc .= implode("\n", $lines);
291	} // _fixGeSHi_Bash()
292
293	/**
294	* Add markup to load JavaScript file with older DokuWiki versions.
295	*
296	* @param $aRenderer Object The renderer used.
297	* @private
298	* @since created 19-Feb-2007
299	* @see render()
300	*/
301	function _fixJS(&$aRenderer) {
302	//XXX This test will break if the DokuWiki file gets renamed:
303	if (@file_exists(DOKU_INC . 'lib/exe/js.php')) {
304	// Assuming a fairly recent DokuWiki installation
305	// handling the plugin files on its own there's
306	// nothing to do here ...
307	return;
308	} // if
309	if ($this->_JSmarkup) {
310	// Markup already added (or not needed)
311	return;
312	} // if
313	$localdir = realpath(dirname(__FILE__)) . '/';
314	$webdir = DOKU_BASE . 'lib/plugins/code/';
315	$css = '';
316	if (file_exists($localdir . 'style.css')) {
317	ob_start();
318	@include($localdir . 'style.css');
319	// Remove whitespace from CSS and expand IMG paths:
320	if ($css = preg_replace(
321	array('\|\s/\x2A.?\x2A/\s\|s', '\|\s([:;\{\},+!])\s*\|',
322	'\|(?:url\x28\s)([^/])\|', '\|^\s\|', '\|\s*$\|'),
323	array(' ', '\1', 'url(' . $webdir . '\1'),
324	ob_get_contents())) {
325	$css = '<style type="text/css">' . $css . '</style>';
326	} // if
327	ob_end_clean();
328	} // if
329	$js = (file_exists($localdir . 'script.js'))
330	? '<script type="text/javascript" src="'
331	. $webdir . 'script.js"></script>'
332	: '';
333	if ($this->_JSmarkup = $css . $js) {
334	$aRenderer->doc = $this->_JSmarkup
335	. preg_replace('\|\s<p>\s</p>\s*\|', '', $aRenderer->doc);
336	//ELSE: Neither CSS nor JS files found.
337	} // if
338	// Set member field to skip tests with next call:
339	$this->_JSmarkup = TRUE;
340	} // _fixJS()
341
342	/**
343	* RegEx callback to markup spaces in ODT mode.
344	*
345	* @param $aList Array A list of RegEx matches.
346	* @private
347	* @static
348	* @since created 07-Jun-2008
349	* @see render()
350	*/
351	function _preserveSpaces($aList) {
352	return ($len = strlen($aList[1]))
353	? '<text:s text:c="' . $len . '"/>'
354	: ' ';
355	} // _preserveSpaces()
356
357	/**
358	* Add the lines of the given <tt>$aText</tt> to the specified
359	* <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
360	*
361	* @param $aText String [IN] the text lines as prepared by
362	* <tt>handle()</tt>, [OUT] <tt>FALSE</tt>.
363	* @param $aStart Integer The first linenumber to use;
364	* if <tt>0</tt> (zero) no linenumbers are used.
365	* @param $aDoc String Reference to the current renderer's
366	* <tt>doc</tt> property.
367	* @param $aClass String The CSS class name for the <tt>PRE</tt> tag.
368	* @param $addTags Boolean Used in "ODT" mode to suppress tagging
369	* the line numbers.
370	* @private
371	* @since created 03-Feb-2007
372	* @see render()
373	*/
374	function _rawMarkup(&$aText, $aStart, &$aDoc, $aClass, $addTags = TRUE) {
375	if ($addTags) {
376	$aDoc .= '<pre class="' . $aClass . '">' . "\n";
377	} // if
378	if ($aStart) {
379	// Split the prepared data into a list of lines:
380	$aText = explode("\n", $aText);
381	// Add the numbered lines to the document:
382	$this->_addLines($aText, $aStart, $aDoc);
383	} else {
384	$aDoc .= $aText;
385	} // if
386	if ($addTags) {
387	$aDoc .= '</pre>';
388	} // if
389	$aText = FALSE; // release memory
390	} // _rawMarkup()
391
392	/**
393	* RegEx callback to replace SPAN tags in ODT mode.
394	*
395	* @param $aList Array A list of RegEx matches.
396	* @private
397	* @static
398	* @since created 07-Jun-2008
399	* @see render()
400	*/
401	function _replaceSpan($aList) {
402	return ($aList[3])
403	? '<text:span text:style-name="Code_5f_'
404	. str_replace('_', '_5f_', $aList[3]) . '">'
405	: '<text:span>';
406	} // _replaceSpan()
407
408	//@}
409	/**
410	* @publicsection
411	*/
412	//@{
413
414	/**
415	* Tell the parser whether the plugin accepts syntax mode
416	* <tt>$aMode</tt> within its own markup.
417	*
418	* @param $aMode String The requested syntaxmode.
419	* @return Boolean <tt>FALSE</tt> (no nested markup allowed).
420	* @public
421	* @see getAllowedTypes()
422	*/
423	function accepts($aMode) {
424	return FALSE;
425	} // accepts()
426
427	/**
428	* Connect lookup pattern to lexer.
429	*
430	* @param $aMode String The desired rendermode.
431	* @public
432	* @see render()
433	*/
434	function connectTo($aMode) {
435	// look-ahead to minimize the chance of false matches:
436	$this->Lexer->addEntryPattern(
437	'\x3Ccode(?=[^>]\x3E\r?\n.\n\x3C\x2Fcode\x3E)',
438	$aMode, 'plugin_code');
439	} // connectTo()
440
441	/**
442	* Get an array of mode types that may be nested within the
443	* plugin's own markup.
444	*
445	* @return Array Allowed nested types (none).
446	* @public
447	* @see accepts()
448	* @static
449	*/
450	function getAllowedTypes() {
451	return array();
452	} // getAllowedTypes()
453
454	/**
455	* Get an associative array with plugin info.
456	*
457	* <p>
458	* The returned array holds the following fields:
459	* <dl>
460	* <dt>author</dt><dd>Author of the plugin</dd>
461	* <dt>email</dt><dd>Email address to contact the author</dd>
462	* <dt>date</dt><dd>Last modified date of the plugin in
463	* <tt>YYYY-MM-DD</tt> format</dd>
464	* <dt>name</dt><dd>Name of the plugin</dd>
465	* <dt>desc</dt><dd>Short description of the plugin (Text only)</dd>
466	* <dt>url</dt><dd>Website with more information on the plugin
467	* (eg. syntax description)</dd>
468	* </dl>
469	* @return Array Information about this plugin class.
470	* @public
471	* @static
472	*/
473	function getInfo() {
474	$c = 'code'; // hack to hide "desc" field from GeShi
475	return array(
476	'author' => 'Matthias Watermann',
477	'email' => '[email protected]',
478	'date' => '2008-07-22',
479	'name' => 'Code Syntax Plugin',
480	'desc' => 'Syntax highlighting with line numbering <'
481	. $c . ' lang 1 \|[fh] text \|[hs]> ... </' . $c . '>',
482	'url' => 'http://wiki.splitbrain.org/plugin:code2');
483	} // getInfo()
484
485	/**
486	* Define how this plugin is handled regarding paragraphs.
487	*
488	* <p>
489	* This method is important for correct XHTML nesting.
490	* It returns one of the following values:
491	* </p><dl>
492	* <dt>normal</dt><dd>The plugin can be used inside paragraphs.</dd>
493	* <dt>block</dt><dd>Open paragraphs need to be closed before
494	* plugin output.</dd>
495	* <dt>stack</dt><dd>Special case: Plugin wraps other paragraphs.</dd>
496	* </dl>
497	* @return String <tt>"block"</tt>.
498	* @public
499	* @static
500	*/
501	function getPType() {
502	return 'block';
503	} // getPType()
504
505	/**
506	* Where to sort in?
507	*
508	* @return Integer <tt>194</tt> (below "Doku_Parser_Mode_code").
509	* @public
510	* @static
511	*/
512	function getSort() {
513	// class "Doku_Parser_Mode_code" returns 200
514	return 194;
515	} // getSort()
516
517	/**
518	* Get the type of syntax this plugin defines.
519	*
520	* @return String <tt>"protected"</tt>.
521	* @public
522	* @static
523	*/
524	function getType() {
525	return 'protected';
526	} // getType()
527
528	/**
529	* Handler to prepare matched data for the rendering process.
530	*
531	* <p>
532	* The <tt>$aState</tt> parameter gives the type of pattern
533	* which triggered the call to this method:
534	* </p><dl>
535	* <dt>DOKU_LEXER_UNMATCHED</dt>
536	* <dd>ordinary text encountered within the plugin's syntax mode
537	* which doesn't match any pattern.</dd>
538	* </dl>
539	* @param $aMatch String The text matched by the patterns.
540	* @param $aState Integer The lexer state for the match.
541	* @param $aPos Integer The character position of the matched text.
542	* @param $aHandler Object Reference to the Doku_Handler object.
543	* @return Array Index <tt>[0]</tt> holds the current <tt>$aState</tt>,
544	* index <tt>[1]</tt> the embedded text to highlight,
545	* index <tt>[2]</tt> the language/dialect (or <tt>FALSE</tt>),
546	* index <tt>[3]</tt> the first line number (or <tt>0</tt>),
547	* index <tt>[4]</tt> the top title (or <tt>FALSE</tt>),
548	* index <tt>[5]</tt> the bottom title (or <tt>FALSE</tt>),
549	* index <tt>[6]</tt> hidding CSS flag (or <tt>""</tt>).
550	* @public
551	* @see render()
552	* @static
553	*/
554	function handle($aMatch, $aState, $aPos, &$aHandler) {
555	if (DOKU_LEXER_UNMATCHED != $aState) {
556	return array($aState); // nothing to do for "render()"
557	} // if
558	$aMatch = explode('>', $aMatch, 2);
559	// $aMatch[0] : lang etc.
560	// $aMatch[1] : text to highlight
561	$n = explode('>', trim($aMatch[1]));
562	$l = 'extern'; // external resource requested?
563	// Check whether there's an external file to fetch:
564	if ($l == $n[0]) {
565	if ($n[1] = trim($n[1])) {
566	if (is_array($n[0] = @parse_url($n[1]))
567	&& ($n[0] = $n[0]['scheme'])) {
568	// Don't accept unsecure schemes like
569	// "file", "javascript", "mailto" etc.
570	switch ($n[0]) {
571	case 'ftp':
572	case 'http':
573	case 'https':
574	//XXX This might fail due to global PHP setup:
575	if ($handle = @fopen($n[1], 'rb')) {
576	$aMatch[1] = '';
577	while (! @feof($handle)) {
578	//XXX This might fail due to
579	// memory constraints:
580	$aMatch[1] .= @fread($handle, 0x8000);
581	} // while
582	@fclose($handle);
583	} else {
584	$aMatch = array($l,
585	'Failed to retrieve: ' . $n[1]);
586	} // if
587	break;
588	default:
589	$aMatch = array($l,
590	'Unsupported URL scheme: ' . $n[0]);
591	break;
592	} // switch
593	} else {
594	$aMatch = array($l, 'Invalid URL: ' . $n[1]);
595	} // if
596	} else {
597	$aMatch = array($l, 'Missing URL: ' . $aMatch[1]);
598	} // if
599	} // if
600	// Strip leading/trailing/EoL whitespace,
601	// replace TABs by four spaces, " " by NBSP:
602	$aMatch[1] = preg_replace(
603	array('#(?>\r\n)\|\r#', '\|^\n\n*\|',
604	'\|[\t ]+\n\|', '\|\s*\n$\|'),
605	array("\n", '', "\n", ''),
606	str_replace(' ', ' ',
607	str_replace("\t", ' ', $aMatch[1])));
608
609	$css = ''; // default: no initial CSS content hidding
610	$l = FALSE; // default: no language
611	$n = 0; // default: no line numbers
612	$ht = $ft = FALSE; // default: no (head/foot) title
613	$hits = array(); // RegEx matches from the tag attributes
614	/*
615	The free form of the RegEx to parse the arguments here is:
616	/^
617	# "eat" leading whitespace:
618	\s*
619	(?=\S) # Look ahead: do not match empty lines. This is
620	# needed since all other expressions are optional.
621	# Make sure, nothing is given away once it matched:
622	(?>
623	# We need a separate branch for "diff" because it may be
624	# followed by a _letter_ (not digit) indicating the format.
625	(?>
626	(diff)
627	# match 1
628	(?>\s+([cnrsu]?))?
629	# match 2
630	)
631	\|
632	# Branch for standard language highlighting
633	(?>
634	# extract language:
635	([a-z][^\x7C\s]*)
636	# match 3
637	(?>
638	# extract starting line number:
639	\s+(\d\d*)
640	# match 4
641	)?
642	)
643	\|
644	# Branch for line numbering only
645	(\d\d*)
646	# match 5
647	\|
648	\s* # dummy needed to match "title only" markup (below)
649	)
650	# "eat" anything else up to the text delimiter:
651	[^\x7C]*
652	(?>
653	\x7C
654	# extract the position flag:
655	([bfht])?\s*
656	# match 6
657	# extract the header,footer line:
658	([^\x7C]+)
659	# match 7
660	(?>
661	# see whether there is a class flag:
662	\x7C\s*
663	(h\|s)?.*
664	# match 8
665	)?
666	)?
667	# Anchored to make sure everything gets matched:
668	$/xiu
669
670	Since compiling and applying a free form RegEx slows down the
671	overall matching process I've folded it all to a standard RegEx.
672	Benchmarking during development gave me
673	free form: 20480 loops, 552960 hits, 102400 fails, 12.994689 secs
674	standard: 20480 loops, 552960 hits, 102400 fails, 8.357169 secs
675	*/
676	if (preg_match('/^\s*(?=\S)(?>(?>(diff)(?>\s+([cnrsu]?))?)\|'
677	. '(?>([a-z][^\x7C\s])(?>\s+(\d\d))?)\|(\d\d)\|\s)[^\x7C]*'
678	. '(?>\x7C([bfht])?\s([^\x7C]+)(?>\x7C\s(h\|s)?.*)?)?$/iu',
679	$aMatch[0], $hits)) {
680	unset($hits[0]); // free mem
681	// $hits[1] = "diff"
682	// $hits[2] = type (of [1])
683	// $hits[3] = LANG
684	// $hits[4] = NUM (of [3])
685	// $hits[5] = NUM (alone)
686	// $hits[6] = Top/Bottom flag (of [7])
687	// $hits[7] = TITLE
688	// $hits[8] = s/h CSS flag
689	if (isset($hits[3]) && ($hits[3])) {
690	$l = strtolower($hits[3]);
691	if (isset($hits[4]) && ($hits[4])) {
692	$n = (int)$hits[4];
693	} // if
694	$hits[3] = $hits[4] = FALSE;
695	} else if (isset($hits[1]) && ($hits[1])) {
696	$l = strtolower($hits[1]);
697	$hits[2] = (isset($hits[2]))
698	? strtolower($hits[2]) . '?'
699	: '?';
700	$n = $hits[2]{0};
701	$hits[1] = $hits[2] = FALSE;
702	} else if (isset($hits[5]) && ($hits[5])) {
703	$n = (int)$hits[5];
704	} // if
705	if (isset($hits[7]) && ($hits[7])) {
706	$hits[6] = (isset($hits[6]))
707	? strtolower($hits[6]) . 'f'
708	: 'f';
709	switch ($hits[6]{0}) {
710	case 'h':
711	case 't':
712	$ht = trim($hits[7]);
713	break;
714	default:
715	$ft = trim($hits[7]);
716	break;
717	} // switch
718	if (isset($hits[8])) {
719	$hits[8] = strtolower($hits[8]) . 's';
720	if ('h' == $hits[8]{0}) {
721	// This class is handled by JavaScript (there
722	// _must_not_ be any CSS rules for this):
723	$css = ' HideOnInit';
724	} // if
725	} // if
726	$hits[6] = $hits[7] = $hits[8] = FALSE;
727	} // if
728	// ELSE: no arguments given to CODE tag
729	} // if
730	switch ($l) {
731	case 'console':
732	// nothing additional to setup here
733	break;
734	case 'diff':
735	if ("\n" != $aMatch[1]{0}) {
736	// A leading LF is needed to recognize and handle
737	// the very first line with all the REs used.
738	$aMatch[1] = "\n" . $aMatch[1];
739	} // if
740	switch ($n) {
741	case 'u': // DIFF cmdline switch for "unified"
742	case 'c': // DIFF cmdline switch for "context"
743	case 'n': // DIFF cmdline switch for "RCS"
744	case 's':
745	// We believe the format hint ...
746	// (or should we be more suspicious?)
747	break;
748	case 'r': // Mnemonic for "RCS"
749	$n = 'n';
750	break;
751	default: // try to figure out the format actually used
752	if (preg_match(
753	'\|\n(?:\x2A{5,}\n\x2A{3}\s[1-9]+.*?\x2A{4}\n.+?)+\|s',
754	$aMatch[1])) {
755	$n = 'c';
756	} else if (preg_match(
757	'\|\n@@\s\-[0-9]+,[0-9]+[ \+,0-9]+?@@\n.+\n\|s',
758	$aMatch[1])) {
759	$n = 'u';
760	} else if (preg_match(
761	'\|\n[ad][0-9]+\s+[0-9]+\r?\n\|', $aMatch[1])) {
762	// We've to check this _before_ "simple" since
763	// the REs are quite similar (but this one is
764	// slightly more specific).
765	$n = 'n';
766	} else if (preg_match(
767	'\|\n(?:[0-9a-z]+(?:,[0-9a-z]+))(?:[^\n]\n.*?)+\|',
768	$aMatch[1])) {
769	$n = 's';
770	} else {
771	$n = '?';
772	} // if
773	break;
774	} // switch
775	break;
776	case 'htm': // convenience shortcut
777	case 'html': // dito
778	$l = 'html4strict';
779	break;
780	case 'js': // shortcut
781	$l = 'javascript';
782	break;
783	case 'sh': // shortcut
784	$l = 'bash';
785	break;
786	default:
787	if (! $l) {
788	// no language: simple PRE markup will get generated
789	$l = FALSE;
790	} // if
791	break;
792	} // switch
793	return array(DOKU_LEXER_UNMATCHED,
794	$aMatch[1], $l, $n, $ht, $ft, $css);
795	} // handle()
796
797	/**
798	* Add exit pattern to lexer.
799	*
800	* @public
801	*/
802	function postConnect() {
803	// look-before to minimize the chance of false matches:
804	$this->Lexer->addExitPattern('(?<=\n)\x3C\x2Fcode\x3E',
805	'plugin_code');
806	} // postConnect()
807
808	/**
809	* Handle the actual output (markup) creation.
810	*
811	* <p>
812	* The method checks the given <tt>$aFormat</tt> to decide how to
813	* handle the specified <tt>$aData</tt>.
814	* The standard case (i.e. <tt>"xhtml"</tt>) is handled completely
815	* by this implementation, preparing linenumbers and/or head/foot
816	* lines are requested.
817	* For the <tt>"odt"</tt> format all plugin features (incl. linenumbers
818	* and header/footer lines) are supported by generating the appropriate
819	* ODT/XML markup.
820	* All other formats are passed back to the given <tt>$aRenderer</tt>
821	* instance for further handling.
822	* </p><p>
823	* <tt>$aRenderer</tt> contains a reference to the renderer object
824	* which is currently in charge of the rendering.
825	* The contents of the given <tt>$aData</tt> is the return value
826	* of the <tt>handle()</tt> method.
827	* </p>
828	* @param $aFormat String The output format to generate.
829	* @param $aRenderer Object A reference to the renderer object.
830	* @param $aData Array The data created/returned by the
831	* <tt>handle()</tt> method.
832	* @return Boolean <tt>TRUE</tt>.
833	* @public
834	* @see handle()
835	*/
836	function render($aFormat, &$aRenderer, &$aData) {
837	if (DOKU_LEXER_UNMATCHED != $aData[0]) {
838	return TRUE;
839	} // if
840	if ('xhtml' == $aFormat) {
841	if ($tdiv = (($aData[4]) \|\| ($aData[5]))) {
842	$this->_fixJS($aRenderer); // check for old DokuWiki versions
843	$aRenderer->doc .= '<div class="code">';
844	if ($aData[4]) {
845	//XXX Note that "_headerToLink()" is supposed to be a
846	// _private_ method of the renderer class; so this code
847	// will fail once DokuWiki is rewritten in PHP5 which
848	// implements encapsulation of private methods and
849	// properties:
850	$aRenderer->doc .= '<p class="codehead' . $aData[6]
851	. '"><a name="' . $aRenderer->_headerToLink($aData[4])
852	. '">' . $this->_entities($aData[4]) . '</a></p>';
853	$aData[4] = $aData[6] = FALSE; // free mem
854	} // if
855	} // if
856	if ($aData[2]) { // lang was given
857	if ('console' == $aData[2]) {
858	$this->_rawMarkup($this->_entities($aData[1]),
859	$aData[3], $aRenderer->doc, $aData[2]);
860	} else if ('diff' == $aData[2]) {
861	$this->_entities($aData[1]);
862	$aRenderer->doc .= '<pre class="code diff">';
863	$this->_addDiff($aData[1], $aData[3], $aRenderer->doc);
864	$aRenderer->doc .= '</pre>';
865	} else {
866	$isSH = ('bash' == $aData[2]);
867	$geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
868	if ($geshi->error()) {
869	// Language not supported by "GeSHi"
870	$geshi = NULL; // release memory
871	$this->_rawMarkup($this->_entities($aData[1]),
872	$aData[3], $aRenderer->doc, 'code');
873	} else {
874	$aData[1] = FALSE; // free mem
875	$geshi->enable_classes();
876	$geshi->set_encoding('utf-8');
877	$geshi->set_header_type(GESHI_HEADER_PRE);
878	$geshi->set_overall_class('code ' . $aData[2]);
879	global $conf;
880	if ($conf['target']['extern']) {
881	$geshi->set_link_target($conf['target']['extern']);
882	} // if
883	if ($aData[3]) { // line numbers requested
884	// Separate PRE tag from parsed data:
885	$aData[1] = explode('>', $geshi->parse_code(), 2);
886	// [1][0] = leading "<pre"
887	// [1][1] = remaining markup up to trailing "</pre"
888	$geshi = NULL; // release memory
889
890	// Add the open tag to the document:
891	$aRenderer->doc .= $aData[1][0] . '>';
892
893	// Separate trailing PRE tag:
894	$aData[1] = explode('</pre>', $aData[1][1], 2);
895	// [1][0] = GeSHi markup
896	// [1][1] = trailing "</pre"
897
898	if ($isSH) {
899	$aData[1][1] = '';
900	$this->_fixGeSHi_Bash($aData[1][0],
901	$aData[1][1]);
902	} else {
903	// Set reference to fixed markup to sync with
904	// the "bash" execution path (above):
905	$aData[1][1] =& $aData[1][0];
906	} // if
907
908	// Split the parsed data into a list of lines:
909	$aData[2] = explode("\n", $aData[1][1]);
910	$aData[1] = FALSE; // free mem
911
912	// Add the numbered lines to the document:
913	$this->_addLines($aData[2], $aData[3],
914	$aRenderer->doc);
915
916	// Close the preformatted section markup:
917	$aRenderer->doc .= '</pre>';
918	} else { // w/o line numbering
919	if ($isSH) {
920	// Separate trailing PRE tag which
921	// sometimes is "forgotten" by GeSHi:
922	$aData[2] = explode('</pre>',
923	$geshi->parse_code(), 2);
924	// [1][0] = GeSHi markup
925	// [1][1] = trailing "</pre" (if any)
926	$this->_fixGeSHi_Bash($aData[2][0],
927	$aRenderer->doc);
928	$aRenderer->doc .= '</pre>';
929	} else {
930	$aRenderer->doc .= $geshi->parse_code();
931	} // if
932	$geshi = NULL; // release memory
933	} // if
934	} // if
935	} // if
936	} else {
937	$this->_rawMarkup($this->_entities($aData[1]),
938	$aData[3], $aRenderer->doc, 'code');
939	} // if
940	if ($tdiv) {
941	if ($aData[5]) {
942	//XXX See "_headerToLink()" note above.
943	$aRenderer->doc .= '<p class="codefoot'
944	. $aData[6] . '"><a name="'
945	. $aRenderer->_headerToLink($aData[5]) . '">'
946	. $this->_entities($aData[5]) . '</a></p>';
947	} // if
948	$aRenderer->doc .= '</div>';
949	} // if
950	} else if ('odt' == $aFormat) {
951	$inLI = array();
952	if (preg_match('\|^<text:p text:style-name="[^"]+">\s</text:p>\s(.*)$\|si',
953	$aRenderer->doc, $inLI)) {
954	// remove leading whitespace
955	$aRenderer->doc = $inLI[1];
956	} // if
957	// The "renderer_plugin_odt" doesn't clean (close)
958	// its own tags before calling this plugin.
959	// To work around that bug we have to check some
960	// private properties of the renderer instance.
961	$inLI = FALSE;
962	if (is_a($aRenderer, 'renderer_plugin_odt')) {
963	if ($inLI = ($aRenderer->in_list_item)) {
964	// If we're in a list item, we've to close the paragraph:
965	$aRenderer->doc .= '</text:p>';
966	} // if
967	if ($aRenderer->in_paragraph) {
968	$aRenderer->doc .= '</text:p>';
969	$aRenderer->in_paragraph = FALSE;
970	} // if
971	} // if
972
973	// Init (open) our text section:
974	$aRenderer->doc .= "\n"
975	. '<text:section text:style-name="Code_5f_Section" text:name="CodeSnippet'
976	. ++$this->_odtSect . '">';
977
978	if ($tdiv = (($aData[4]) \|\| ($aData[5]))) {
979	// Check whether we need a top caption ("header"):
980	if ($aData[4]) {
981	$aRenderer->doc .=
982	'<text:p text:style-name="Code_5f_Title">'
983	. "<text:line-break/>\n"
984	. $aData[4] . "</text:p>\n";
985	$aData[4] = $aData[6] = FALSE; // free mem
986	} // if
987	} // if
988	// The following code resembles the "xhtml" processing
989	// above except that we're not using "pre" tags here
990	// but ODT/XML markup.
991	$aData[0] = ''; // tmp. container of processed data
992	if ($aData[2]) { // lang was given
993	if ('console' == $aData[2]) {
994	$this->_rawMarkup($this->_entities($aData[1]),
995	$aData[3], $aData[0], $aData[2], FALSE);
996	} else if ('diff' == $aData[2]) {
997	$this->_addDiff($this->_entities($aData[1]),
998	$aData[3], $aData[0]);
999	} else {
1000	$isSH = ('bash' == $aData[2]);
1001	$geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
1002	if ($geshi->error()) {
1003	// Language not supported by "GeSHi"
1004	$geshi = NULL; // release memory
1005	$this->_rawMarkup($this->_entities($aData[1]),
1006	$aData[3], $aData[0], '', FALSE);
1007	} else {
1008	$aData[1] = FALSE; // free mem
1009	$geshi->enable_classes();
1010	$geshi->set_encoding('utf-8');
1011	$geshi->set_header_type(GESHI_HEADER_PRE);
1012	$geshi->set_overall_class('code ' . $aData[2]);
1013	global $conf;
1014	if ($conf['target']['extern']) {
1015	$geshi->set_link_target($conf['target']['extern']);
1016	} // if
1017	// Separate PRE tag from parsed data:
1018	$aData[1] = explode('>', $geshi->parse_code(), 2);
1019	// [1][0] = leading "<pre"
1020	// [1][1] = remaining markup up to trailing "</pre"
1021	$geshi = NULL; // release memory
1022
1023	// Separate trailing PRE tag:
1024	$aData[1] = explode('</pre>', $aData[1][1], 2);
1025	// [1][0] = GeSHi markup
1026	// [1][1] = trailing "</pre"
1027	$aData[1] = $aData[1][0];
1028
1029	if ($isSH) { // work around GeSHI bug
1030	$aData[2] = '';
1031	$this->_fixGeSHi_Bash($aData[1], $aData[2]);
1032	} else {
1033	$aData[2] = $aData[1];
1034	} // if
1035	$aData[1] = FALSE; // release memory
1036
1037	if ($aData[3]) { // line numbers requested
1038	// Split the parsed data into a list of lines:
1039	$aData[1] = explode("\n", $aData[2]);
1040	$aData[2] = FALSE; // release memory
1041
1042	// Add the numbered lines to the document:
1043	$this->_addLines($aData[1], $aData[3], $aData[0]);
1044	} else { // w/o line numbers
1045	$aData[0] = $aData[2];
1046	$aData[2] = FALSE; // release memory
1047	} // if
1048	} // if
1049	} // if
1050	} else {
1051	$this->_rawMarkup($this->_entities($aData[1]),
1052	$aData[3], $aData[0], '', FALSE);
1053	} // if
1054
1055	if ('console' == $aData[2]) {
1056	$aRenderer->doc .=
1057	'<text:p text:style-name="Code_5f_Console">';
1058	} else {
1059	$aRenderer->doc .=
1060	'<text:p text:style-name="Code_5f_Standard">';
1061	} // if
1062	// Replace the HTML "span" tags (for highlighting) by
1063	// the appropriate ODT/XML markup.
1064	// For unknown reasons we need an additional space
1065	// in front of the very first line.
1066	$aData[0] = '<text:s/>'
1067	. preg_replace_callback('\|(<span( class="([^"])"[^>])?>)\|',
1068	array('syntax_plugin_code', '_replaceSpan'),
1069	// OOo (v2.3) crashes on " "
1070	str_replace(' ', chr(194) . chr(160),
1071	str_replace('</span>', '</text:span>',
1072	strip_tags($aData[0], '<span>'))));
1073	// Now append our markup to the renderer's document;
1074	// TABs, LFs and SPACEs are replaced by their respective
1075	// ODT/XML equivalents:
1076	$aRenderer->doc .= preg_replace_callback('\|( {2,})\|',
1077	array('syntax_plugin_code', '_preserveSpaces'),
1078	str_replace("\n", "<text:line-break/>\n", $aData[0]));
1079	$aData[0] = FALSE; // release memory
1080
1081	// Check whether we need a bottom caption ("footer"):
1082	if ($tdiv && ($aData[5])) {
1083	$aRenderer->doc .=
1084	'</text:p><text:p text:style-name="Code_5f_Title">'
1085	. $aData[5];
1086	} // if
1087	// Close all our open tags:
1088	$aRenderer->doc .= "</text:p></text:section>\n";
1089
1090	if ($inLI) {
1091	// Workaround (see above): (re-)open a paragraph:
1092	$aRenderer->doc .= '<text:p>';
1093	} // if
1094	} else { // unsupported output format
1095	$aData[0] = $aData[4] = $aData[5] = FALSE; // avoid recursion
1096	// Pass anything else back to the renderer instance
1097	// (which will - hopefully - know how to handle it):
1098	$aRenderer->code($aData[1], $aData[2]);
1099	} // if
1100	$aData = array(FALSE); // don't process this text again
1101	return TRUE;
1102	} // render()
1103
1104	//@}
1105	} // class syntax_plugin_code
1106	} // if
1107	?>

Note: See TracBrowser for help on using the repository browser.

Download in other formats: