Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: documentation/trunk/packages/dokuwiki-2011-05-25a/lib/plugins/code/syntax.php@ 25027

Last change on this file since 25027 was 25027, checked in by jmt12, 12 years ago
Adding the packages directory, and within it a configured version of dokuwiki all ready to run
File size: 34.7 KB

Line
1	<?php
2	if (! class_exists('syntax_plugin_code')) {
3	if (! defined('DOKU_PLUGIN')) {
4	if (! defined('DOKU_INC')) {
5	define('DOKU_INC',
6	realpath(dirname(__FILE__) . '/../../') . '/');
7	} // if
8	define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
9	} // if
10	// Include parent class:
11	require_once(DOKU_PLUGIN . 'syntax.php');
12	// We're dealing with "GeSHi" here, hence include it:
13	require_once(DOKU_INC . 'inc/geshi.php');
14
15	/**
16	* <tt>syntax_plugin_code.php </tt>- A PHP4 class that implements the
17	* <tt>DokuWiki</tt> plugin for <tt>highlighting</tt> code fragments.
18	*
19	* <p>
20	* Usage:<br>
21	* <tt><code [language startno \|[fh] text \|[hs]]>...</code></tt>
22	* </p><pre>
23	* Copyright (C) 2006, 2008 M.Watermann, D-10247 Berlin, FRG
24	* All rights reserved
25	* EMail : <[email protected]>
26	* </pre><div class="disclaimer">
27	* This program is free software; you can redistribute it and/or modify
28	* it under the terms of the GNU General Public License as published by
29	* the Free Software Foundation; either
30	* <a href="http://www.gnu.org/licenses/gpl.html">version 3</a> of the
31	* License, or (at your option) any later version.<br>
32	* This software is distributed in the hope that it will be useful,
33	* but WITHOUT ANY WARRANTY; without even the implied warranty of
34	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35	* General Public License for more details.
36	* </div>
37	* @author <a href="mailto:[email protected]">Matthias Watermann</a>
38	* @version <tt>$Id: syntax_plugin_code.php,v 1.29 2008/07/22 09:22:47 matthias Exp $</tt>
39	* @since created 24-Dec-2006
40	*/
41	class syntax_plugin_code extends DokuWiki_Syntax_Plugin {
42
43	/**
44	* @privatesection
45	*/
46	//@{
47
48	/**
49	* Additional markup used with older DokuWiki installations.
50	*
51	* @private
52	* @see _fixJS()
53	*/
54	var $_JSmarkup = FALSE;
55
56	/**
57	* Indention "text" used by <tt>_addLines()</tt>.
58	*
59	* <p>
60	* Note that we're using raw <em>UTF-8 NonBreakable Spaces</em> here.
61	* </p>
62	* @private
63	* @see _addLines()
64	*/
65	var $_lead = array('', ' ', ' Â ', ' Â ', ' Â Â ',
66	' Â Â ', ' Â Â Â ', ' Â Â Â ');
67
68
69	/**
70	* Section counter for ODT export
71	*
72	* @private
73	* @see render()
74	* @since created 08-Jun-2008
75	*/
76	var $_odtSect = 0;
77
78	/**
79	* Prepare the markup to render the DIFF text.
80	*
81	* @param $aText String The DIFF text to markup.
82	* @param $aFormat String The DIFF format used ("u", "c", "n\|r", "s").
83	* @param $aDoc String Reference to the current renderer's
84	* <tt>doc</tt> property.
85	* @return Boolean <tt>TRUE</tt>.
86	* @private
87	* @see render()
88	*/
89	function _addDiff(&$aText, &$aFormat, &$aDoc) {
90	// Since we're inside a PRE block we need the leading LFs:
91	$ADD = "\n" . '<span class="diff_addedline">';
92	$DEL = "\n" . '<span class="diff_deletedline">';
93	$HEAD = "\n" . '<span class="diff_blockheader">';
94	$CLOSE = '</span>';
95	// Common headers for all formats;
96	// the RegEx needs at least ")#" appended!
97	$DiffHead = '#\n((?:diff\s[^\n])\|(?:Index:\s[^\n])\|(?:={60,})'
98	. '\|(?:RCS file:\s[^\n])\|(?:retrieving revision [0-9][^\n])';
99	switch ($aFormat) {
100	case 'u': // unified output
101	$aDoc .= preg_replace(
102	array($DiffHead . '\|(?:@@[^\n]*))#',
103	'\|\n(\+[^\n]*)\|',
104	'\|\n(\-[^\n]*)\|'),
105	array($HEAD . '\1' . $CLOSE,
106	$ADD . '\1' . $CLOSE,
107	$DEL . '\1' . $CLOSE),
108	$aText);
109	return TRUE;
110	case 'c': // context output
111	$sections = preg_split('\|(\n\*{5,})\|',
112	preg_replace($DiffHead . ')#',
113	$HEAD . '\1' . $CLOSE,
114	$aText),
115	-1, PREG_SPLIT_DELIM_CAPTURE);
116	$sections[0] = preg_replace(
117	array('\|\n(\-{3}[^\n]*)\|',
118	'\|\n(\{3}[^\n])\|'),
119	array($ADD . '\1' . $CLOSE,
120	$DEL . '\1' . $CLOSE),
121	$sections[0]);
122	$c = count($sections);
123	for ($i = 1; $c > $i; ++$i) {
124	$hits = array();
125	if (preg_match('\|^\n(\*{5,})\|',
126	$sections[$i], $hits)) {
127	unset($hits[0]);
128	$sections[$i] = $HEAD . $hits[1] . $CLOSE;
129	} else if (preg_match('\|^\n(\x2A{3}\s[^\n])(.)\|s',
130	$sections[$i], $hits)) {
131	unset($hits[0]); // free mem
132	$parts = preg_split('\|\n(\-{3}\s[^\n]*)\|',
133	$hits[2], -1, PREG_SPLIT_DELIM_CAPTURE);
134	// $parts[0] == OLD code
135	$parts[0] = preg_replace('\|\n([!\-][^\n]*)\|',
136	$DEL . '\1' . $CLOSE, $parts[0]);
137	// $parts[1] == head of NEW code
138	$parts[1] = $ADD . $parts[1] . $CLOSE;
139	// $parts[2] == NEW code
140	$parts[2] = preg_replace(
141	array('\|\n([!\x2B][^\n]*)\|',
142	'\|\n(\x2A{3}[^\n]*)\|'),
143	array($ADD . '\1' . $CLOSE,
144	$DEL . '\1' . $CLOSE),
145	$parts[2]);
146	if (isset($parts[3])) {
147	// TRUE when handling multi-file patches
148	$parts[3] = preg_replace('\|^(\x2D{3}[^\n]*)\|',
149	$ADD . '\1' . $CLOSE, $parts[3]);
150	} // if
151	$sections[$i] = $DEL . $hits[1] . $CLOSE
152	. implode('', $parts);
153	} // if
154	// ELSE: leave $sections[$i] as is
155	} // for
156	$aDoc .= implode('', $sections);
157	return TRUE;
158	case 'n': // RCS output
159	// Only added lines are there so we highlight just the
160	// diff indicators while leaving the text alone.
161	$aDoc .= preg_replace(
162	array($DiffHead . ')#',
163	'\|\n(d[0-9]+\s+[0-9]+)\|',
164	'\|\n(a[0-9]+\s+[0-9]+)\|'),
165	array($HEAD . '\1' . $CLOSE,
166	$DEL . '\1' . $CLOSE,
167	$ADD . '\1' . $CLOSE),
168	$aText);
169	return TRUE;
170	case 's': // simple output
171	$aDoc .= preg_replace(
172	array($DiffHead
173	. '\|((?:[0-9a-z]+(?:,[0-9a-z]+))(?:[^\n])))#',
174	'\|\n(\x26#60;[^\n]*)\|',
175	'\|\n(\x26#62;[^\n]*)\|'),
176	array($HEAD . '\1' . $CLOSE,
177	$DEL . '\1' . $CLOSE,
178	$ADD . '\1' . $CLOSE),
179	$aText);
180	return TRUE;
181	default: // unknown diff format
182	$aDoc .= $aText; // just append any unrecognized text
183	return TRUE;
184	} // switch
185	} // _addDiff()
186
187	/**
188	* Add the lines of the given <tt>$aList</tt> to the specified
189	* <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
190	*
191	* @param $aList Array [IN] the list of lines as prepared by
192	* <tt>render()</tt>, [OUT] <tt>FALSE</tt>.
193	* @param $aStart Integer The first linenumber to use.
194	* @param $aDoc String Reference to the current renderer's
195	* <tt>doc</tt> property.
196	* @private
197	* @see render()
198	*/
199	function _addLines(&$aList, $aStart, &$aDoc) {
200	// Since we're dealing with monospaced fonts here the width of each
201	// character (space, NBSP, digit) is the same. Hence the length of
202	// a digits string gives us its width i.e. the number of digits.
203	$i = $aStart + count($aList); // greatest line number
204	$g = strlen("$i"); // width of greatest number
205	while (list($i, $l) = each($aList)) {
206	unset($aList[$i]); // free mem
207	$aDoc .= '<span class="lno">'
208	. $this->_lead[$g - strlen("$aStart")]
209	. "$aStart:</span>" . ((($l) && (' ' != $l))
210	? " $l\n"
211	: "\n");
212	++$aStart; // increment line number
213	} // while
214	$aList = FALSE; // release memory
215	} // _addLines()
216
217	/**
218	* Internal convenience method to replace HTML special characters.
219	*
220	* @param $aString String [IN] The text to handle;
221	* [OUT] the modified text (i.e. the method's result).
222	* @return String The string with HTML special chars replaced.
223	* @private
224	* @since created 05-Feb-2007
225	*/
226	function &_entities(&$aString) {
227	$aString = str_replace(array('&', '<', '>'),
228	array('&', '<', '>'), $aString);
229	// [jmt12] Restore the hidden ids to normal HTML comments
230	$aString = preg_replace('/<!-- id:(.*?) -->/','<!-- id:\1 -->', $aString);
231	return $aString;
232	} // _entities()
233
234	/**
235	* Try to fix some markup error of the GeSHi SHELL highlighting.
236	*
237	* <p>
238	* The GeShi highlighting for type "sh" (i.e. "bash") is, well,
239	* seriously flawed (at least up to version 1.0.7.20 i.e. 2007-07-01).
240	* Especially handling of comments and embedded string as well as
241	* keyword is plain wrong.
242	* </p><p>
243	* This internal helper method tries to solve some minor problems by
244	* removing highlight markup embedded in comment markup.
245	* This is, however, by no means a final resolution: GeSHi obviously
246	* keeps a kind of internal state resulting in highlighting markup
247	* spawing (i.e. repeated on) several lines.
248	* Which - if that state is wrong - causes great demage not by
249	* corrupting the data but by confusing the reader with wrong markup.
250	* The easiest way to trigger such a line spawning confusion is to use
251	* solitary doublequotes or singlequotes (apostrophe) in a comment
252	* line ...
253	* </p>
254	* @param $aMarkup String [IN] The highlight markup as returned by GeSHi;
255	* [OUT] <tt>FALSE</tt>.
256	* @param $aDoc String Reference to the current renderer's
257	* <tt>doc</tt> property.
258	* @private
259	* @since created 04-Aug-2007
260	* @see render()
261	*/
262	function _fixGeSHi_Bash(&$aMarkup, &$aDoc) {
263	$hits = array();
264	if (defined('GESHI_VERSION')
265	&& preg_match('\|(\d+)\.(\d+)\.(\d+)\.(\d+)\|', GESHI_VERSION, $hits)
266	&& ($hits = sprintf('%02u%02u%02u%03u',
267	$hits[1] * 1, $hits[2] * 1, $hits[3] * 1, $hits[4] * 1))
268	&& ('010007020' < $hits)) {
269	// GeSHi v1.0.7.21 has the comments bug fixed
270	$aDoc .= $aMarkup;
271	$aMarkup = FALSE; // release memory
272	return;
273	} // if
274	$lines = explode("\n", $aMarkup);
275	$aMarkup = FALSE; // release memory
276	while (list($i, $l) = each($lines)) {
277	$hits = array();
278	// GeSHi "bash" module marks up comments with CSS class "re3":
279	if (preg_match('\|^((.)<span class="re3">)(.)$\|i', $l, $hits)) {
280	if ('#!/bin/' == substr($hits[3], 0, 7)) {
281	$lines[$i] = $hits[2] . strip_tags($hits[3]);
282	} else {
283	$lines[$i] = $hits[1] . strip_tags($hits[3]) . '</span>';
284	} // if
285	} else if (! preg_match('\|^\s*<span\|i', $l)) {
286	// If a line doesn't start with a highlighted keyword
287	// all tags are removed since they're most probably
288	// "leftovers" from the GeSHI string/comment bug.
289	$lines[$i] = strip_tags($l);
290	} // if
291	} // while
292	$aDoc .= implode("\n", $lines);
293	} // _fixGeSHi_Bash()
294
295	/**
296	* Add markup to load JavaScript file with older DokuWiki versions.
297	*
298	* @param $aRenderer Object The renderer used.
299	* @private
300	* @since created 19-Feb-2007
301	* @see render()
302	*/
303	function _fixJS(&$aRenderer) {
304	//XXX This test will break if the DokuWiki file gets renamed:
305	if (@file_exists(DOKU_INC . 'lib/exe/js.php')) {
306	// Assuming a fairly recent DokuWiki installation
307	// handling the plugin files on its own there's
308	// nothing to do here ...
309	return;
310	} // if
311	if ($this->_JSmarkup) {
312	// Markup already added (or not needed)
313	return;
314	} // if
315	$localdir = realpath(dirname(__FILE__)) . '/';
316	$webdir = DOKU_BASE . 'lib/plugins/code/';
317	$css = '';
318	if (file_exists($localdir . 'style.css')) {
319	ob_start();
320	@include($localdir . 'style.css');
321	// Remove whitespace from CSS and expand IMG paths:
322	if ($css = preg_replace(
323	array('\|\s/\x2A.?\x2A/\s\|s', '\|\s([:;\{\},+!])\s*\|',
324	'\|(?:url\x28\s)([^/])\|', '\|^\s\|', '\|\s*$\|'),
325	array(' ', '\1', 'url(' . $webdir . '\1'),
326	ob_get_contents())) {
327	$css = '<style type="text/css">' . $css . '</style>';
328	} // if
329	ob_end_clean();
330	} // if
331	$js = (file_exists($localdir . 'script.js'))
332	? '<script type="text/javascript" src="'
333	. $webdir . 'script.js"></script>'
334	: '';
335	if ($this->_JSmarkup = $css . $js) {
336	$aRenderer->doc = $this->_JSmarkup
337	. preg_replace('\|\s<p>\s</p>\s*\|', '', $aRenderer->doc);
338	//ELSE: Neither CSS nor JS files found.
339	} // if
340	// Set member field to skip tests with next call:
341	$this->_JSmarkup = TRUE;
342	} // _fixJS()
343
344	/**
345	* RegEx callback to markup spaces in ODT mode.
346	*
347	* @param $aList Array A list of RegEx matches.
348	* @private
349	* @static
350	* @since created 07-Jun-2008
351	* @see render()
352	*/
353	function _preserveSpaces($aList) {
354	return ($len = strlen($aList[1]))
355	? '<text:s text:c="' . $len . '"/>'
356	: ' ';
357	} // _preserveSpaces()
358
359	/**
360	* Add the lines of the given <tt>$aText</tt> to the specified
361	* <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
362	*
363	* @param $aText String [IN] the text lines as prepared by
364	* <tt>handle()</tt>, [OUT] <tt>FALSE</tt>.
365	* @param $aStart Integer The first linenumber to use;
366	* if <tt>0</tt> (zero) no linenumbers are used.
367	* @param $aDoc String Reference to the current renderer's
368	* <tt>doc</tt> property.
369	* @param $aClass String The CSS class name for the <tt>PRE</tt> tag.
370	* @param $addTags Boolean Used in "ODT" mode to suppress tagging
371	* the line numbers.
372	* @private
373	* @since created 03-Feb-2007
374	* @see render()
375	*/
376	function _rawMarkup(&$aText, $aStart, &$aDoc, $aClass, $addTags = TRUE) {
377	if ($addTags) {
378	$aDoc .= '<pre class="' . $aClass . '">' . "\n";
379	} // if
380	if ($aStart) {
381	// Split the prepared data into a list of lines:
382	$aText = explode("\n", $aText);
383	// Add the numbered lines to the document:
384	$this->_addLines($aText, $aStart, $aDoc);
385	} else {
386	$aDoc .= $aText;
387	} // if
388	if ($addTags) {
389	$aDoc .= '</pre>';
390	} // if
391	$aText = FALSE; // release memory
392	} // _rawMarkup()
393
394	/**
395	* RegEx callback to replace SPAN tags in ODT mode.
396	*
397	* @param $aList Array A list of RegEx matches.
398	* @private
399	* @static
400	* @since created 07-Jun-2008
401	* @see render()
402	*/
403	function _replaceSpan($aList) {
404	return ($aList[3])
405	? '<text:span text:style-name="Code_5f_'
406	. str_replace('_', '_5f_', $aList[3]) . '">'
407	: '<text:span>';
408	} // _replaceSpan()
409
410	//@}
411	/**
412	* @publicsection
413	*/
414	//@{
415
416	/**
417	* Tell the parser whether the plugin accepts syntax mode
418	* <tt>$aMode</tt> within its own markup.
419	*
420	* @param $aMode String The requested syntaxmode.
421	* @return Boolean <tt>FALSE</tt> (no nested markup allowed).
422	* @public
423	* @see getAllowedTypes()
424	*/
425	function accepts($aMode) {
426	return FALSE;
427	} // accepts()
428
429	/**
430	* Connect lookup pattern to lexer.
431	*
432	* @param $aMode String The desired rendermode.
433	* @public
434	* @see render()
435	*/
436	function connectTo($aMode) {
437	// look-ahead to minimize the chance of false matches:
438	$this->Lexer->addEntryPattern(
439	'\x3Ccode(?=[^>]\x3E\r?\n.\n\x3C\x2Fcode\x3E)',
440	$aMode, 'plugin_code');
441	} // connectTo()
442
443	/**
444	* Get an array of mode types that may be nested within the
445	* plugin's own markup.
446	*
447	* @return Array Allowed nested types (none).
448	* @public
449	* @see accepts()
450	* @static
451	*/
452	function getAllowedTypes() {
453	return array();
454	} // getAllowedTypes()
455
456	/**
457	* Get an associative array with plugin info.
458	*
459	* <p>
460	* The returned array holds the following fields:
461	* <dl>
462	* <dt>author</dt><dd>Author of the plugin</dd>
463	* <dt>email</dt><dd>Email address to contact the author</dd>
464	* <dt>date</dt><dd>Last modified date of the plugin in
465	* <tt>YYYY-MM-DD</tt> format</dd>
466	* <dt>name</dt><dd>Name of the plugin</dd>
467	* <dt>desc</dt><dd>Short description of the plugin (Text only)</dd>
468	* <dt>url</dt><dd>Website with more information on the plugin
469	* (eg. syntax description)</dd>
470	* </dl>
471	* @return Array Information about this plugin class.
472	* @public
473	* @static
474	*/
475	function getInfo() {
476	$c = 'code'; // hack to hide "desc" field from GeShi
477	return array(
478	'author' => 'Matthias Watermann',
479	'email' => '[email protected]',
480	'date' => '2008-07-22',
481	'name' => 'Code Syntax Plugin',
482	'desc' => 'Syntax highlighting with line numbering <'
483	. $c . ' lang 1 \|[fh] text \|[hs]> ... </' . $c . '>',
484	'url' => 'http://wiki.splitbrain.org/plugin:code2');
485	} // getInfo()
486
487	/**
488	* Define how this plugin is handled regarding paragraphs.
489	*
490	* <p>
491	* This method is important for correct XHTML nesting.
492	* It returns one of the following values:
493	* </p><dl>
494	* <dt>normal</dt><dd>The plugin can be used inside paragraphs.</dd>
495	* <dt>block</dt><dd>Open paragraphs need to be closed before
496	* plugin output.</dd>
497	* <dt>stack</dt><dd>Special case: Plugin wraps other paragraphs.</dd>
498	* </dl>
499	* @return String <tt>"block"</tt>.
500	* @public
501	* @static
502	*/
503	function getPType() {
504	return 'block';
505	} // getPType()
506
507	/**
508	* Where to sort in?
509	*
510	* @return Integer <tt>194</tt> (below "Doku_Parser_Mode_code").
511	* @public
512	* @static
513	*/
514	function getSort() {
515	// class "Doku_Parser_Mode_code" returns 200
516	return 194;
517	} // getSort()
518
519	/**
520	* Get the type of syntax this plugin defines.
521	*
522	* @return String <tt>"protected"</tt>.
523	* @public
524	* @static
525	*/
526	function getType() {
527	return 'protected';
528	} // getType()
529
530	/**
531	* Handler to prepare matched data for the rendering process.
532	*
533	* <p>
534	* The <tt>$aState</tt> parameter gives the type of pattern
535	* which triggered the call to this method:
536	* </p><dl>
537	* <dt>DOKU_LEXER_UNMATCHED</dt>
538	* <dd>ordinary text encountered within the plugin's syntax mode
539	* which doesn't match any pattern.</dd>
540	* </dl>
541	* @param $aMatch String The text matched by the patterns.
542	* @param $aState Integer The lexer state for the match.
543	* @param $aPos Integer The character position of the matched text.
544	* @param $aHandler Object Reference to the Doku_Handler object.
545	* @return Array Index <tt>[0]</tt> holds the current <tt>$aState</tt>,
546	* index <tt>[1]</tt> the embedded text to highlight,
547	* index <tt>[2]</tt> the language/dialect (or <tt>FALSE</tt>),
548	* index <tt>[3]</tt> the first line number (or <tt>0</tt>),
549	* index <tt>[4]</tt> the top title (or <tt>FALSE</tt>),
550	* index <tt>[5]</tt> the bottom title (or <tt>FALSE</tt>),
551	* index <tt>[6]</tt> hidding CSS flag (or <tt>""</tt>).
552	* @public
553	* @see render()
554	* @static
555	*/
556	function handle($aMatch, $aState, $aPos, &$aHandler) {
557	if (DOKU_LEXER_UNMATCHED != $aState) {
558	return array($aState); // nothing to do for "render()"
559	} // if
560	$aMatch = explode('>', $aMatch, 2);
561	// $aMatch[0] : lang etc.
562	// $aMatch[1] : text to highlight
563	$n = explode('>', trim($aMatch[1]));
564	$l = 'extern'; // external resource requested?
565	// Check whether there's an external file to fetch:
566	if ($l == $n[0]) {
567	if ($n[1] = trim($n[1])) {
568	if (is_array($n[0] = @parse_url($n[1]))
569	&& ($n[0] = $n[0]['scheme'])) {
570	// Don't accept unsecure schemes like
571	// "file", "javascript", "mailto" etc.
572	switch ($n[0]) {
573	case 'ftp':
574	case 'http':
575	case 'https':
576	//XXX This might fail due to global PHP setup:
577	if ($handle = @fopen($n[1], 'rb')) {
578	$aMatch[1] = '';
579	while (! @feof($handle)) {
580	//XXX This might fail due to
581	// memory constraints:
582	$aMatch[1] .= @fread($handle, 0x8000);
583	} // while
584	@fclose($handle);
585	} else {
586	$aMatch = array($l,
587	'Failed to retrieve: ' . $n[1]);
588	} // if
589	break;
590	default:
591	$aMatch = array($l,
592	'Unsupported URL scheme: ' . $n[0]);
593	break;
594	} // switch
595	} else {
596	$aMatch = array($l, 'Invalid URL: ' . $n[1]);
597	} // if
598	} else {
599	$aMatch = array($l, 'Missing URL: ' . $aMatch[1]);
600	} // if
601	} // if
602	// Strip leading/trailing/EoL whitespace,
603	// replace TABs by four spaces, " " by NBSP:
604	$aMatch[1] = preg_replace(
605	array('#(?>\r\n)\|\r#', '\|^\n\n*\|',
606	'\|[\t ]+\n\|', '\|\s*\n$\|'),
607	array("\n", '', "\n", ''),
608	str_replace(' ', ' ',
609	str_replace("\t", ' ', $aMatch[1])));
610
611	$css = ''; // default: no initial CSS content hidding
612	$l = FALSE; // default: no language
613	$n = 0; // default: no line numbers
614	$ht = $ft = FALSE; // default: no (head/foot) title
615	$hits = array(); // RegEx matches from the tag attributes
616	/*
617	The free form of the RegEx to parse the arguments here is:
618	/^
619	# "eat" leading whitespace:
620	\s*
621	(?=\S) # Look ahead: do not match empty lines. This is
622	# needed since all other expressions are optional.
623	# Make sure, nothing is given away once it matched:
624	(?>
625	# We need a separate branch for "diff" because it may be
626	# followed by a _letter_ (not digit) indicating the format.
627	(?>
628	(diff)
629	# match 1
630	(?>\s+([cnrsu]?))?
631	# match 2
632	)
633	\|
634	# Branch for standard language highlighting
635	(?>
636	# extract language:
637	([a-z][^\x7C\s]*)
638	# match 3
639	(?>
640	# extract starting line number:
641	\s+(\d\d*)
642	# match 4
643	)?
644	)
645	\|
646	# Branch for line numbering only
647	(\d\d*)
648	# match 5
649	\|
650	\s* # dummy needed to match "title only" markup (below)
651	)
652	# "eat" anything else up to the text delimiter:
653	[^\x7C]*
654	(?>
655	\x7C
656	# extract the position flag:
657	([bfht])?\s*
658	# match 6
659	# extract the header,footer line:
660	([^\x7C]+)
661	# match 7
662	(?>
663	# see whether there is a class flag:
664	\x7C\s*
665	(h\|s)?.*
666	# match 8
667	)?
668	)?
669	# Anchored to make sure everything gets matched:
670	$/xiu
671
672	Since compiling and applying a free form RegEx slows down the
673	overall matching process I've folded it all to a standard RegEx.
674	Benchmarking during development gave me
675	free form: 20480 loops, 552960 hits, 102400 fails, 12.994689 secs
676	standard: 20480 loops, 552960 hits, 102400 fails, 8.357169 secs
677	*/
678	if (preg_match('/^\s*(?=\S)(?>(?>(diff)(?>\s+([cnrsu]?))?)\|'
679	. '(?>([a-z][^\x7C\s])(?>\s+(\d\d))?)\|(\d\d)\|\s)[^\x7C]*'
680	. '(?>\x7C([bfht])?\s([^\x7C]+)(?>\x7C\s(h\|s)?.*)?)?$/iu',
681	$aMatch[0], $hits)) {
682	unset($hits[0]); // free mem
683	// $hits[1] = "diff"
684	// $hits[2] = type (of [1])
685	// $hits[3] = LANG
686	// $hits[4] = NUM (of [3])
687	// $hits[5] = NUM (alone)
688	// $hits[6] = Top/Bottom flag (of [7])
689	// $hits[7] = TITLE
690	// $hits[8] = s/h CSS flag
691	if (isset($hits[3]) && ($hits[3])) {
692	$l = strtolower($hits[3]);
693	if (isset($hits[4]) && ($hits[4])) {
694	$n = (int)$hits[4];
695	} // if
696	$hits[3] = $hits[4] = FALSE;
697	} else if (isset($hits[1]) && ($hits[1])) {
698	$l = strtolower($hits[1]);
699	$hits[2] = (isset($hits[2]))
700	? strtolower($hits[2]) . '?'
701	: '?';
702	$n = $hits[2]{0};
703	$hits[1] = $hits[2] = FALSE;
704	} else if (isset($hits[5]) && ($hits[5])) {
705	$n = (int)$hits[5];
706	} // if
707	if (isset($hits[7]) && ($hits[7])) {
708	$hits[6] = (isset($hits[6]))
709	? strtolower($hits[6]) . 'f'
710	: 'f';
711	switch ($hits[6]{0}) {
712	case 'h':
713	case 't':
714	$ht = trim($hits[7]);
715	break;
716	default:
717	$ft = trim($hits[7]);
718	break;
719	} // switch
720	if (isset($hits[8])) {
721	$hits[8] = strtolower($hits[8]) . 's';
722	if ('h' == $hits[8]{0}) {
723	// This class is handled by JavaScript (there
724	// _must_not_ be any CSS rules for this):
725	$css = ' HideOnInit';
726	} // if
727	} // if
728	$hits[6] = $hits[7] = $hits[8] = FALSE;
729	} // if
730	// ELSE: no arguments given to CODE tag
731	} // if
732	switch ($l) {
733	case 'console':
734	// nothing additional to setup here
735	break;
736	case 'diff':
737	if ("\n" != $aMatch[1]{0}) {
738	// A leading LF is needed to recognize and handle
739	// the very first line with all the REs used.
740	$aMatch[1] = "\n" . $aMatch[1];
741	} // if
742	switch ($n) {
743	case 'u': // DIFF cmdline switch for "unified"
744	case 'c': // DIFF cmdline switch for "context"
745	case 'n': // DIFF cmdline switch for "RCS"
746	case 's':
747	// We believe the format hint ...
748	// (or should we be more suspicious?)
749	break;
750	case 'r': // Mnemonic for "RCS"
751	$n = 'n';
752	break;
753	default: // try to figure out the format actually used
754	if (preg_match(
755	'\|\n(?:\x2A{5,}\n\x2A{3}\s[1-9]+.*?\x2A{4}\n.+?)+\|s',
756	$aMatch[1])) {
757	$n = 'c';
758	} else if (preg_match(
759	'\|\n@@\s\-[0-9]+,[0-9]+[ \+,0-9]+?@@\n.+\n\|s',
760	$aMatch[1])) {
761	$n = 'u';
762	} else if (preg_match(
763	'\|\n[ad][0-9]+\s+[0-9]+\r?\n\|', $aMatch[1])) {
764	// We've to check this _before_ "simple" since
765	// the REs are quite similar (but this one is
766	// slightly more specific).
767	$n = 'n';
768	} else if (preg_match(
769	'\|\n(?:[0-9a-z]+(?:,[0-9a-z]+))(?:[^\n]\n.*?)+\|',
770	$aMatch[1])) {
771	$n = 's';
772	} else {
773	$n = '?';
774	} // if
775	break;
776	} // switch
777	break;
778	case 'htm': // convenience shortcut
779	case 'html': // dito
780	$l = 'html4strict';
781	break;
782	case 'js': // shortcut
783	$l = 'javascript';
784	break;
785	case 'sh': // shortcut
786	$l = 'bash';
787	break;
788	default:
789	if (! $l) {
790	// no language: simple PRE markup will get generated
791	$l = FALSE;
792	} // if
793	break;
794	} // switch
795	return array(DOKU_LEXER_UNMATCHED,
796	$aMatch[1], $l, $n, $ht, $ft, $css);
797	} // handle()
798
799	/**
800	* Add exit pattern to lexer.
801	*
802	* @public
803	*/
804	function postConnect() {
805	// look-before to minimize the chance of false matches:
806	$this->Lexer->addExitPattern('(?<=\n)\x3C\x2Fcode\x3E',
807	'plugin_code');
808	} // postConnect()
809
810	/**
811	* Handle the actual output (markup) creation.
812	*
813	* <p>
814	* The method checks the given <tt>$aFormat</tt> to decide how to
815	* handle the specified <tt>$aData</tt>.
816	* The standard case (i.e. <tt>"xhtml"</tt>) is handled completely
817	* by this implementation, preparing linenumbers and/or head/foot
818	* lines are requested.
819	* For the <tt>"odt"</tt> format all plugin features (incl. linenumbers
820	* and header/footer lines) are supported by generating the appropriate
821	* ODT/XML markup.
822	* All other formats are passed back to the given <tt>$aRenderer</tt>
823	* instance for further handling.
824	* </p><p>
825	* <tt>$aRenderer</tt> contains a reference to the renderer object
826	* which is currently in charge of the rendering.
827	* The contents of the given <tt>$aData</tt> is the return value
828	* of the <tt>handle()</tt> method.
829	* </p>
830	* @param $aFormat String The output format to generate.
831	* @param $aRenderer Object A reference to the renderer object.
832	* @param $aData Array The data created/returned by the
833	* <tt>handle()</tt> method.
834	* @return Boolean <tt>TRUE</tt>.
835	* @public
836	* @see handle()
837	*/
838	function render($aFormat, &$aRenderer, &$aData) {
839	if (DOKU_LEXER_UNMATCHED != $aData[0]) {
840	return TRUE;
841	} // if
842	if ('xhtml' == $aFormat) {
843	if ($tdiv = (($aData[4]) \|\| ($aData[5]))) {
844	$this->_fixJS($aRenderer); // check for old DokuWiki versions
845	$aRenderer->doc .= '<div class="code">';
846	if ($aData[4]) {
847	//XXX Note that "_headerToLink()" is supposed to be a
848	// _private_ method of the renderer class; so this code
849	// will fail once DokuWiki is rewritten in PHP5 which
850	// implements encapsulation of private methods and
851	// properties:
852	$aRenderer->doc .= '<p class="codehead' . $aData[6]
853	. '"><a name="' . $aRenderer->_headerToLink($aData[4])
854	. '">' . $this->_entities($aData[4]) . '</a></p>';
855	$aData[4] = $aData[6] = FALSE; // free mem
856	} // if
857	} // if
858	if ($aData[2]) { // lang was given
859	if ('console' == $aData[2]) {
860	$this->_rawMarkup($this->_entities($aData[1]),
861	$aData[3], $aRenderer->doc, $aData[2]);
862	} else if ('diff' == $aData[2]) {
863	$this->_entities($aData[1]);
864	$aRenderer->doc .= '<pre class="code diff">';
865	$this->_addDiff($aData[1], $aData[3], $aRenderer->doc);
866	$aRenderer->doc .= '</pre>';
867	} else {
868	$isSH = ('bash' == $aData[2]);
869	$geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
870	if ($geshi->error()) {
871	// Language not supported by "GeSHi"
872	$geshi = NULL; // release memory
873	$this->_rawMarkup($this->_entities($aData[1]),
874	$aData[3], $aRenderer->doc, 'code');
875	} else {
876	$aData[1] = FALSE; // free mem
877	$geshi->enable_classes();
878	$geshi->set_encoding('utf-8');
879	$geshi->set_header_type(GESHI_HEADER_PRE);
880	$geshi->set_overall_class('code ' . $aData[2]);
881	global $conf;
882	if ($conf['target']['extern']) {
883	$geshi->set_link_target($conf['target']['extern']);
884	} // if
885	if ($aData[3]) { // line numbers requested
886	// Separate PRE tag from parsed data:
887	$aData[1] = explode('>', $geshi->parse_code(), 2);
888	// [1][0] = leading "<pre"
889	// [1][1] = remaining markup up to trailing "</pre"
890	$geshi = NULL; // release memory
891
892	// Add the open tag to the document:
893	$aRenderer->doc .= $aData[1][0] . '>';
894
895	// Separate trailing PRE tag:
896	$aData[1] = explode('</pre>', $aData[1][1], 2);
897	// [1][0] = GeSHi markup
898	// [1][1] = trailing "</pre"
899
900	if ($isSH) {
901	$aData[1][1] = '';
902	$this->_fixGeSHi_Bash($aData[1][0],
903	$aData[1][1]);
904	} else {
905	// Set reference to fixed markup to sync with
906	// the "bash" execution path (above):
907	$aData[1][1] =& $aData[1][0];
908	} // if
909
910	// Split the parsed data into a list of lines:
911	$aData[2] = explode("\n", $aData[1][1]);
912	$aData[1] = FALSE; // free mem
913
914	// Add the numbered lines to the document:
915	$this->_addLines($aData[2], $aData[3],
916	$aRenderer->doc);
917
918	// Close the preformatted section markup:
919	$aRenderer->doc .= '</pre>';
920	} else { // w/o line numbering
921	if ($isSH) {
922	// Separate trailing PRE tag which
923	// sometimes is "forgotten" by GeSHi:
924	$aData[2] = explode('</pre>',
925	$geshi->parse_code(), 2);
926	// [1][0] = GeSHi markup
927	// [1][1] = trailing "</pre" (if any)
928	$this->_fixGeSHi_Bash($aData[2][0],
929	$aRenderer->doc);
930	$aRenderer->doc .= '</pre>';
931	} else {
932	$aRenderer->doc .= $geshi->parse_code();
933	} // if
934	$geshi = NULL; // release memory
935	} // if
936	} // if
937	} // if
938	} else {
939	$this->_rawMarkup($this->_entities($aData[1]),
940	$aData[3], $aRenderer->doc, 'code');
941	} // if
942	if ($tdiv) {
943	if ($aData[5]) {
944	//XXX See "_headerToLink()" note above.
945	$aRenderer->doc .= '<p class="codefoot'
946	. $aData[6] . '"><a name="'
947	. $aRenderer->_headerToLink($aData[5]) . '">'
948	. $this->_entities($aData[5]) . '</a></p>';
949	} // if
950	$aRenderer->doc .= '</div>';
951	} // if
952	} else if ('odt' == $aFormat) {
953	$inLI = array();
954	if (preg_match('\|^<text:p text:style-name="[^"]+">\s</text:p>\s(.*)$\|si',
955	$aRenderer->doc, $inLI)) {
956	// remove leading whitespace
957	$aRenderer->doc = $inLI[1];
958	} // if
959	// The "renderer_plugin_odt" doesn't clean (close)
960	// its own tags before calling this plugin.
961	// To work around that bug we have to check some
962	// private properties of the renderer instance.
963	$inLI = FALSE;
964	if (is_a($aRenderer, 'renderer_plugin_odt')) {
965	if ($inLI = ($aRenderer->in_list_item)) {
966	// If we're in a list item, we've to close the paragraph:
967	$aRenderer->doc .= '</text:p>';
968	} // if
969	if ($aRenderer->in_paragraph) {
970	$aRenderer->doc .= '</text:p>';
971	$aRenderer->in_paragraph = FALSE;
972	} // if
973	} // if
974
975	// Init (open) our text section:
976	$aRenderer->doc .= "\n"
977	. '<text:section text:style-name="Code_5f_Section" text:name="CodeSnippet'
978	. ++$this->_odtSect . '">';
979
980	if ($tdiv = (($aData[4]) \|\| ($aData[5]))) {
981	// Check whether we need a top caption ("header"):
982	if ($aData[4]) {
983	$aRenderer->doc .=
984	'<text:p text:style-name="Code_5f_Title">'
985	. "<text:line-break/>\n"
986	. $aData[4] . "</text:p>\n";
987	$aData[4] = $aData[6] = FALSE; // free mem
988	} // if
989	} // if
990	// The following code resembles the "xhtml" processing
991	// above except that we're not using "pre" tags here
992	// but ODT/XML markup.
993	$aData[0] = ''; // tmp. container of processed data
994	if ($aData[2]) { // lang was given
995	if ('console' == $aData[2]) {
996	$this->_rawMarkup($this->_entities($aData[1]),
997	$aData[3], $aData[0], $aData[2], FALSE);
998	} else if ('diff' == $aData[2]) {
999	$this->_addDiff($this->_entities($aData[1]),
1000	$aData[3], $aData[0]);
1001	} else {
1002	$isSH = ('bash' == $aData[2]);
1003	$geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
1004	if ($geshi->error()) {
1005	// Language not supported by "GeSHi"
1006	$geshi = NULL; // release memory
1007	$this->_rawMarkup($this->_entities($aData[1]),
1008	$aData[3], $aData[0], '', FALSE);
1009	} else {
1010	$aData[1] = FALSE; // free mem
1011	$geshi->enable_classes();
1012	$geshi->set_encoding('utf-8');
1013	$geshi->set_header_type(GESHI_HEADER_PRE);
1014	$geshi->set_overall_class('code ' . $aData[2]);
1015	global $conf;
1016	if ($conf['target']['extern']) {
1017	$geshi->set_link_target($conf['target']['extern']);
1018	} // if
1019	// Separate PRE tag from parsed data:
1020	$aData[1] = explode('>', $geshi->parse_code(), 2);
1021	// [1][0] = leading "<pre"
1022	// [1][1] = remaining markup up to trailing "</pre"
1023	$geshi = NULL; // release memory
1024
1025	// Separate trailing PRE tag:
1026	$aData[1] = explode('</pre>', $aData[1][1], 2);
1027	// [1][0] = GeSHi markup
1028	// [1][1] = trailing "</pre"
1029	$aData[1] = $aData[1][0];
1030
1031	if ($isSH) { // work around GeSHI bug
1032	$aData[2] = '';
1033	$this->_fixGeSHi_Bash($aData[1], $aData[2]);
1034	} else {
1035	$aData[2] = $aData[1];
1036	} // if
1037	$aData[1] = FALSE; // release memory
1038
1039	if ($aData[3]) { // line numbers requested
1040	// Split the parsed data into a list of lines:
1041	$aData[1] = explode("\n", $aData[2]);
1042	$aData[2] = FALSE; // release memory
1043
1044	// Add the numbered lines to the document:
1045	$this->_addLines($aData[1], $aData[3], $aData[0]);
1046	} else { // w/o line numbers
1047	$aData[0] = $aData[2];
1048	$aData[2] = FALSE; // release memory
1049	} // if
1050	} // if
1051	} // if
1052	} else {
1053	$this->_rawMarkup($this->_entities($aData[1]),
1054	$aData[3], $aData[0], '', FALSE);
1055	} // if
1056
1057	if ('console' == $aData[2]) {
1058	$aRenderer->doc .=
1059	'<text:p text:style-name="Code_5f_Console">';
1060	} else {
1061	$aRenderer->doc .=
1062	'<text:p text:style-name="Code_5f_Standard">';
1063	} // if
1064	// Replace the HTML "span" tags (for highlighting) by
1065	// the appropriate ODT/XML markup.
1066	// For unknown reasons we need an additional space
1067	// in front of the very first line.
1068	$aData[0] = '<text:s/>'
1069	. preg_replace_callback('\|(<span( class="([^"])"[^>])?>)\|',
1070	array('syntax_plugin_code', '_replaceSpan'),
1071	// OOo (v2.3) crashes on " "
1072	str_replace(' ', chr(194) . chr(160),
1073	str_replace('</span>', '</text:span>',
1074	strip_tags($aData[0], '<span>'))));
1075	// Now append our markup to the renderer's document;
1076	// TABs, LFs and SPACEs are replaced by their respective
1077	// ODT/XML equivalents:
1078	$aRenderer->doc .= preg_replace_callback('\|( {2,})\|',
1079	array('syntax_plugin_code', '_preserveSpaces'),
1080	str_replace("\n", "<text:line-break/>\n", $aData[0]));
1081	$aData[0] = FALSE; // release memory
1082
1083	// Check whether we need a bottom caption ("footer"):
1084	if ($tdiv && ($aData[5])) {
1085	$aRenderer->doc .=
1086	'</text:p><text:p text:style-name="Code_5f_Title">'
1087	. $aData[5];
1088	} // if
1089	// Close all our open tags:
1090	$aRenderer->doc .= "</text:p></text:section>\n";
1091
1092	if ($inLI) {
1093	// Workaround (see above): (re-)open a paragraph:
1094	$aRenderer->doc .= '<text:p>';
1095	} // if
1096	} else { // unsupported output format
1097	$aData[0] = $aData[4] = $aData[5] = FALSE; // avoid recursion
1098	// Pass anything else back to the renderer instance
1099	// (which will - hopefully - know how to handle it):
1100	$aRenderer->code($aData[1], $aData[2]);
1101	} // if
1102	$aData = array(FALSE); // don't process this text again
1103	return TRUE;
1104	} // render()
1105
1106	//@}
1107	} // class syntax_plugin_code
1108	} // if
1109	?>

Note: See TracBrowser for help on using the repository browser.

Download in other formats: