Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: documentation/trunk/packages/dokuwiki-2011-05-25a/inc/geshi.php@ 25027

Last change on this file since 25027 was 25027, checked in by jmt12, 12 years ago
Adding the packages directory, and within it a configured version of dokuwiki all ready to run
File size: 195.9 KB

Line
1	<?php
2	/**
3	* GeSHi - Generic Syntax Highlighter
4	*
5	* The GeSHi class for Generic Syntax Highlighting. Please refer to the
6	* documentation at http://qbnz.com/highlighter/documentation.php for more
7	* information about how to use this class.
8	*
9	* For changes, release notes, TODOs etc, see the relevant files in the docs/
10	* directory.
11	*
12	* This file is part of GeSHi.
13	*
14	* GeSHi is free software; you can redistribute it and/or modify
15	* it under the terms of the GNU General Public License as published by
16	* the Free Software Foundation; either version 2 of the License, or
17	* (at your option) any later version.
18	*
19	* GeSHi is distributed in the hope that it will be useful,
20	* but WITHOUT ANY WARRANTY; without even the implied warranty of
21	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22	* GNU General Public License for more details.
23	*
24	* You should have received a copy of the GNU General Public License
25	* along with GeSHi; if not, write to the Free Software
26	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27	*
28	* @package geshi
29	* @subpackage core
30	* @author Nigel McNie <[email protected]>, Benny Baumann <[email protected]>
31	* @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
32	* @license http://gnu.org/copyleft/gpl.html GNU GPL
33	*
34	*/
35
36	//
37	// GeSHi Constants
38	// You should use these constant names in your programs instead of
39	// their values - you never know when a value may change in a future
40	// version
41	//
42
43	/** The version of this GeSHi file */
44	define('GESHI_VERSION', '1.0.8.8');
45
46	// Define the root directory for the GeSHi code tree
47	if (!defined('GESHI_ROOT')) {
48	/** The root directory for GeSHi */
49	define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
50	}
51	/** The language file directory for GeSHi
52	@access private */
53	define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
54
55	// Define if GeSHi should be paranoid about security
56	if (!defined('GESHI_SECURITY_PARANOID')) {
57	/** Tells GeSHi to be paranoid about security settings */
58	define('GESHI_SECURITY_PARANOID', false);
59	}
60
61	// Line numbers - use with enable_line_numbers()
62	/** Use no line numbers when building the result */
63	define('GESHI_NO_LINE_NUMBERS', 0);
64	/** Use normal line numbers when building the result */
65	define('GESHI_NORMAL_LINE_NUMBERS', 1);
66	/** Use fancy line numbers when building the result */
67	define('GESHI_FANCY_LINE_NUMBERS', 2);
68
69	// Container HTML type
70	/** Use nothing to surround the source */
71	define('GESHI_HEADER_NONE', 0);
72	/** Use a "div" to surround the source */
73	define('GESHI_HEADER_DIV', 1);
74	/** Use a "pre" to surround the source */
75	define('GESHI_HEADER_PRE', 2);
76	/** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
77	define('GESHI_HEADER_PRE_VALID', 3);
78	/**
79	* Use a "table" to surround the source:
80	*
81	* <table>
82	* <thead><tr><td colspan="2">$header</td></tr></thead>
83	* <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
84	* <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
85	* </table>
86	*
87	* this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
88	* https://bugzilla.mozilla.org/show_bug.cgi?id=365805
89	* @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
90	*/
91	define('GESHI_HEADER_PRE_TABLE', 4);
92
93	// Capatalisation constants
94	/** Lowercase keywords found */
95	define('GESHI_CAPS_NO_CHANGE', 0);
96	/** Uppercase keywords found */
97	define('GESHI_CAPS_UPPER', 1);
98	/** Leave keywords found as the case that they are */
99	define('GESHI_CAPS_LOWER', 2);
100
101	// Link style constants
102	/** Links in the source in the :link state */
103	define('GESHI_LINK', 0);
104	/** Links in the source in the :hover state */
105	define('GESHI_HOVER', 1);
106	/** Links in the source in the :active state */
107	define('GESHI_ACTIVE', 2);
108	/** Links in the source in the :visited state */
109	define('GESHI_VISITED', 3);
110
111	// Important string starter/finisher
112	// Note that if you change these, they should be as-is: i.e., don't
113	// write them as if they had been run through htmlentities()
114	/** The starter for important parts of the source */
115	define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116	/** The ender for important parts of the source */
117	define('GESHI_END_IMPORTANT', '<END GeSHi>');
118
119	/**#@+
120	* @access private
121	*/
122	// When strict mode applies for a language
123	/** Strict mode never applies (this is the most common) */
124	define('GESHI_NEVER', 0);
125	/** Strict mode might apply, and can be enabled or
126	disabled by {@link GeSHi->enable_strict_mode()} */
127	define('GESHI_MAYBE', 1);
128	/** Strict mode always applies */
129	define('GESHI_ALWAYS', 2);
130
131	// Advanced regexp handling constants, used in language files
132	/** The key of the regex array defining what to search for */
133	define('GESHI_SEARCH', 0);
134	/** The key of the regex array defining what bracket group in a
135	matched search to use as a replacement */
136	define('GESHI_REPLACE', 1);
137	/** The key of the regex array defining any modifiers to the regular expression */
138	define('GESHI_MODIFIERS', 2);
139	/** The key of the regex array defining what bracket group in a
140	matched search to put before the replacement */
141	define('GESHI_BEFORE', 3);
142	/** The key of the regex array defining what bracket group in a
143	matched search to put after the replacement */
144	define('GESHI_AFTER', 4);
145	/** The key of the regex array defining a custom keyword to use
146	for this regexp's html tag class */
147	define('GESHI_CLASS', 5);
148
149	/** Used in language files to mark comments */
150	define('GESHI_COMMENTS', 0);
151
152	/ Used to work around missing PHP features /
153	define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
154
155	/ make sure we can call stripos /
156	if (!function_exists('stripos')) {
157	// the offset param of preg_match is not supported below PHP 4.3.3
158	if (GESHI_PHP_PRE_433) {
159	/**
160	* @ignore
161	*/
162	function stripos($haystack, $needle, $offset = null) {
163	if (!is_null($offset)) {
164	$haystack = substr($haystack, $offset);
165	}
166	if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
167	return $match[0][1];
168	}
169	return false;
170	}
171	}
172	else {
173	/**
174	* @ignore
175	*/
176	function stripos($haystack, $needle, $offset = null) {
177	if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
178	return $match[0][1];
179	}
180	return false;
181	}
182	}
183	}
184
185	/** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
186	regular expressions. Set this to false if your PCRE lib is up to date
187	@see GeSHi->optimize_regexp_list()
188	**/
189	define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
190	/** it's also important not to generate too long regular expressions
191	be generous here... but keep in mind, that when reaching this limit we
192	still have to close open patterns. 12k should do just fine on a 16k limit.
193	@see GeSHi->optimize_regexp_list()
194	**/
195	define('GESHI_MAX_PCRE_LENGTH', 12288);
196
197	//Number format specification
198	/** Basic number format for integers */
199	define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
200	/** Enhanced number format for integers like seen in C */
201	define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
202	/** Number format to highlight binary numbers with a suffix "b" */
203	define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
204	/** Number format to highlight binary numbers with a prefix % */
205	define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
206	/** Number format to highlight binary numbers with a prefix 0b (C) */
207	define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
208	/** Number format to highlight octal numbers with a leading zero */
209	define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
210	/** Number format to highlight octal numbers with a prefix 0o (logtalk) */
211	define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+
212	/** Number format to highlight octal numbers with a suffix of o */
213	define('GESHI_NUMBER_OCT_SUFFIX', 1024); //[0-7]+[oO]
214	/** Number format to highlight hex numbers with a prefix 0x */
215	define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
216	/** Number format to highlight hex numbers with a suffix of h */
217	define('GESHI_NUMBER_HEX_SUFFIX', 8192); //[0-9][0-9a-fA-F]*h
218	/** Number format to highlight floating-point numbers without support for scientific notation */
219	define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
220	/** Number format to highlight floating-point numbers without support for scientific notation */
221	define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
222	/** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
223	define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
224	/** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
225	define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
226	//Custom formats are passed by RX array
227
228	// Error detection - use these to analyse faults
229	/** No sourcecode to highlight was specified
230	* @deprecated
231	*/
232	define('GESHI_ERROR_NO_INPUT', 1);
233	/** The language specified does not exist */
234	define('GESHI_ERROR_NO_SUCH_LANG', 2);
235	/** GeSHi could not open a file for reading (generally a language file) */
236	define('GESHI_ERROR_FILE_NOT_READABLE', 3);
237	/** The header type passed to {@link GeSHi->set_header_type()} was invalid */
238	define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
239	/** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
240	define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
241	/*#@-/
242
243
244	/**
245	* The GeSHi Class.
246	*
247	* Please refer to the documentation for GeSHi 1.0.X that is available
248	* at http://qbnz.com/highlighter/documentation.php for more information
249	* about how to use this class.
250	*
251	* @package geshi
252	* @author Nigel McNie <[email protected]>, Benny Baumann <[email protected]>
253	* @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
254	*/
255	class GeSHi {
256	/**#@+
257	* @access private
258	*/
259	/**
260	* The source code to highlight
261	* @var string
262	*/
263	var $source = '';
264
265	/**
266	* The language to use when highlighting
267	* @var string
268	*/
269	var $language = '';
270
271	/**
272	* The data for the language used
273	* @var array
274	*/
275	var $language_data = array();
276
277	/**
278	* The path to the language files
279	* @var string
280	*/
281	var $language_path = GESHI_LANG_ROOT;
282
283	/**
284	* The error message associated with an error
285	* @var string
286	* @todo check err reporting works
287	*/
288	var $error = false;
289
290	/**
291	* Possible error messages
292	* @var array
293	*/
294	var $error_messages = array(
295	GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
296	GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
297	GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
298	GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
299	);
300
301	/**
302	* Whether highlighting is strict or not
303	* @var boolean
304	*/
305	var $strict_mode = false;
306
307	/**
308	* Whether to use CSS classes in output
309	* @var boolean
310	*/
311	var $use_classes = false;
312
313	/**
314	* The type of header to use. Can be one of the following
315	* values:
316	*
317	* - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
318	* - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
319	* - GESHI_HEADER_NONE: No header is outputted.
320	*
321	* @var int
322	*/
323	var $header_type = GESHI_HEADER_PRE;
324
325	/**
326	* Array of permissions for which lexics should be highlighted
327	* @var array
328	*/
329	var $lexic_permissions = array(
330	'KEYWORDS' => array(),
331	'COMMENTS' => array('MULTI' => true),
332	'REGEXPS' => array(),
333	'ESCAPE_CHAR' => true,
334	'BRACKETS' => true,
335	'SYMBOLS' => false,
336	'STRINGS' => true,
337	'NUMBERS' => true,
338	'METHODS' => true,
339	'SCRIPT' => true
340	);
341
342	/**
343	* The time it took to parse the code
344	* @var double
345	*/
346	var $time = 0;
347
348	/**
349	* The content of the header block
350	* @var string
351	*/
352	var $header_content = '';
353
354	/**
355	* The content of the footer block
356	* @var string
357	*/
358	var $footer_content = '';
359
360	/**
361	* The style of the header block
362	* @var string
363	*/
364	var $header_content_style = '';
365
366	/**
367	* The style of the footer block
368	* @var string
369	*/
370	var $footer_content_style = '';
371
372	/**
373	* Tells if a block around the highlighted source should be forced
374	* if not using line numbering
375	* @var boolean
376	*/
377	var $force_code_block = false;
378
379	/**
380	* The styles for hyperlinks in the code
381	* @var array
382	*/
383	var $link_styles = array();
384
385	/**
386	* Whether important blocks should be recognised or not
387	* @var boolean
388	* @deprecated
389	* @todo REMOVE THIS FUNCTIONALITY!
390	*/
391	var $enable_important_blocks = false;
392
393	/**
394	* Styles for important parts of the code
395	* @var string
396	* @deprecated
397	* @todo As above - rethink the whole idea of important blocks as it is buggy and
398	* will be hard to implement in 1.2
399	*/
400	var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
401
402	/**
403	* Whether CSS IDs should be added to the code
404	* @var boolean
405	*/
406	var $add_ids = false;
407
408	/**
409	* Lines that should be highlighted extra
410	* @var array
411	*/
412	var $highlight_extra_lines = array();
413
414	/**
415	* Styles of lines that should be highlighted extra
416	* @var array
417	*/
418	var $highlight_extra_lines_styles = array();
419
420	/**
421	* Styles of extra-highlighted lines
422	* @var string
423	*/
424	var $highlight_extra_lines_style = 'background-color: #ffc;';
425
426	/**
427	* The line ending
428	* If null, nl2br() will be used on the result string.
429	* Otherwise, all instances of \n will be replaced with $line_ending
430	* @var string
431	*/
432	var $line_ending = null;
433
434	/**
435	* Number at which line numbers should start at
436	* @var int
437	*/
438	var $line_numbers_start = 1;
439
440	/**
441	* The overall style for this code block
442	* @var string
443	*/
444	var $overall_style = 'font-family:monospace;';
445
446	/**
447	* The style for the actual code
448	* @var string
449	*/
450	var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
451
452	/**
453	* The overall class for this code block
454	* @var string
455	*/
456	var $overall_class = '';
457
458	/**
459	* The overall ID for this code block
460	* @var string
461	*/
462	var $overall_id = '';
463
464	/**
465	* Line number styles
466	* @var string
467	*/
468	var $line_style1 = 'font-weight: normal; vertical-align:top;';
469
470	/**
471	* Line number styles for fancy lines
472	* @var string
473	*/
474	var $line_style2 = 'font-weight: bold; vertical-align:top;';
475
476	/**
477	* Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
478	* @var string
479	*/
480	var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
481
482	/**
483	* Flag for how line numbers are displayed
484	* @var boolean
485	*/
486	var $line_numbers = GESHI_NO_LINE_NUMBERS;
487
488	/**
489	* Flag to decide if multi line spans are allowed. Set it to false to make sure
490	* each tag is closed before and reopened after each linefeed.
491	* @var boolean
492	*/
493	var $allow_multiline_span = true;
494
495	/**
496	* The "nth" value for fancy line highlighting
497	* @var int
498	*/
499	var $line_nth_row = 0;
500
501	/**
502	* The size of tab stops
503	* @var int
504	*/
505	var $tab_width = 8;
506
507	/**
508	* Should we use language-defined tab stop widths?
509	* @var int
510	*/
511	var $use_language_tab_width = false;
512
513	/**
514	* Default target for keyword links
515	* @var string
516	*/
517	var $link_target = '';
518
519	/**
520	* The encoding to use for entity encoding
521	* NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
522	* @var string
523	*/
524	var $encoding = 'utf-8';
525
526	/**
527	* Should keywords be linked?
528	* @var boolean
529	*/
530	var $keyword_links = true;
531
532	/**
533	* Currently loaded language file
534	* @var string
535	* @since 1.0.7.22
536	*/
537	var $loaded_language = '';
538
539	/**
540	* Wether the caches needed for parsing are built or not
541	*
542	* @var bool
543	* @since 1.0.8
544	*/
545	var $parse_cache_built = false;
546
547	/**
548	* Work around for Suhosin Patch with disabled /e modifier
549	*
550	* Note from suhosins author in config file:
551	* <blockquote>
552	* The /e modifier inside <code>preg_replace()</code> allows code execution.
553	* Often it is the cause for remote code execution exploits. It is wise to
554	* deactivate this feature and test where in the application it is used.
555	* The developer using the /e modifier should be made aware that he should
556	* use <code>preg_replace_callback()</code> instead
557	* </blockquote>
558	*
559	* @var array
560	* @since 1.0.8
561	*/
562	var $_kw_replace_group = 0;
563	var $_rx_key = 0;
564
565	/**
566	* some "callback parameters" for handle_multiline_regexps
567	*
568	* @since 1.0.8
569	* @access private
570	* @var string
571	*/
572	var $_hmr_before = '';
573	var $_hmr_replace = '';
574	var $_hmr_after = '';
575	var $_hmr_key = 0;
576
577	/*#@-/
578
579	/**
580	* Creates a new GeSHi object, with source and language
581	*
582	* @param string The source code to highlight
583	* @param string The language to highlight the source with
584	* @param string The path to the language file directory. <b>This
585	* is deprecated!</b> I've backported the auto path
586	* detection from the 1.1.X dev branch, so now it
587	* should be automatically set correctly. If you have
588	* renamed the language directory however, you will
589	* still need to set the path using this parameter or
590	* {@link GeSHi->set_language_path()}
591	* @since 1.0.0
592	*/
593	function GeSHi($source = '', $language = '', $path = '') {
594	if (!empty($source)) {
595	$this->set_source($source);
596	}
597	if (!empty($language)) {
598	$this->set_language($language);
599	}
600	$this->set_language_path($path);
601	}
602
603	/**
604	* Returns an error message associated with the last GeSHi operation,
605	* or false if no error has occured
606	*
607	* @return string\|false An error message if there has been an error, else false
608	* @since 1.0.0
609	*/
610	function error() {
611	if ($this->error) {
612	//Put some template variables for debugging here ...
613	$debug_tpl_vars = array(
614	'{LANGUAGE}' => $this->language,
615	'{PATH}' => $this->language_path
616	);
617	$msg = str_replace(
618	array_keys($debug_tpl_vars),
619	array_values($debug_tpl_vars),
620	$this->error_messages[$this->error]);
621
622	return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
623	}
624	return false;
625	}
626
627	/**
628	* Gets a human-readable language name (thanks to Simon Patterson
629	* for the idea :))
630	*
631	* @return string The name for the current language
632	* @since 1.0.2
633	*/
634	function get_language_name() {
635	if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
636	return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
637	}
638	return $this->language_data['LANG_NAME'];
639	}
640
641	/**
642	* Sets the source code for this object
643	*
644	* @param string The source code to highlight
645	* @since 1.0.0
646	*/
647	function set_source($source) {
648	$this->source = $source;
649	$this->highlight_extra_lines = array();
650	}
651
652	/**
653	* Sets the language for this object
654	*
655	* @note since 1.0.8 this function won't reset language-settings by default anymore!
656	* if you need this set $force_reset = true
657	*
658	* @param string The name of the language to use
659	* @since 1.0.0
660	*/
661	function set_language($language, $force_reset = false) {
662	if ($force_reset) {
663	$this->loaded_language = false;
664	}
665
666	//Clean up the language name to prevent malicious code injection
667	$language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
668
669	$language = strtolower($language);
670
671	//Retreive the full filename
672	$file_name = $this->language_path . $language . '.php';
673	if ($file_name == $this->loaded_language) {
674	// this language is already loaded!
675	return;
676	}
677
678	$this->language = $language;
679
680	$this->error = false;
681	$this->strict_mode = GESHI_NEVER;
682
683	//Check if we can read the desired file
684	if (!is_readable($file_name)) {
685	$this->error = GESHI_ERROR_NO_SUCH_LANG;
686	return;
687	}
688
689	// Load the language for parsing
690	$this->load_language($file_name);
691	}
692
693	/**
694	* Sets the path to the directory containing the language files. Note
695	* that this path is relative to the directory of the script that included
696	* geshi.php, NOT geshi.php itself.
697	*
698	* @param string The path to the language directory
699	* @since 1.0.0
700	* @deprecated The path to the language files should now be automatically
701	* detected, so this method should no longer be needed. The
702	* 1.1.X branch handles manual setting of the path differently
703	* so this method will disappear in 1.2.0.
704	*/
705	function set_language_path($path) {
706	if(strpos($path,':')) {
707	//Security Fix to prevent external directories using fopen wrappers.
708	if(DIRECTORY_SEPARATOR == "\\") {
709	if(!preg_match('#^[a-zA-Z]:#', $path) \|\| false !== strpos($path, ':', 2)) {
710	return;
711	}
712	} else {
713	return;
714	}
715	}
716	if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
717	//Security Fix to prevent external directories using fopen wrappers.
718	return;
719	}
720	if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
721	//Security Fix to prevent external directories using fopen wrappers.
722	return;
723	}
724	if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
725	//Security Fix to prevent external directories using fopen wrappers.
726	return;
727	}
728	if ($path) {
729	$this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
730	$this->set_language($this->language); // otherwise set_language_path has no effect
731	}
732	}
733
734	/**
735	* Sets the type of header to be used.
736	*
737	* If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
738	* means more source code but more control over tab width and line-wrapping.
739	* GESHI_HEADER_PRE means that a "pre" is used - less source, but less
740	* control. Default is GESHI_HEADER_PRE.
741	*
742	* From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
743	* should be outputted.
744	*
745	* @param int The type of header to be used
746	* @since 1.0.0
747	*/
748	function set_header_type($type) {
749	//Check if we got a valid header type
750	if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
751	GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
752	$this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
753	return;
754	}
755
756	//Set that new header type
757	$this->header_type = $type;
758	}
759
760	/**
761	* Sets the styles for the code that will be outputted
762	* when this object is parsed. The style should be a
763	* string of valid stylesheet declarations
764	*
765	* @param string The overall style for the outputted code block
766	* @param boolean Whether to merge the styles with the current styles or not
767	* @since 1.0.0
768	*/
769	function set_overall_style($style, $preserve_defaults = false) {
770	if (!$preserve_defaults) {
771	$this->overall_style = $style;
772	} else {
773	$this->overall_style .= $style;
774	}
775	}
776
777	/**
778	* Sets the overall classname for this block of code. This
779	* class can then be used in a stylesheet to style this object's
780	* output
781	*
782	* @param string The class name to use for this block of code
783	* @since 1.0.0
784	*/
785	function set_overall_class($class) {
786	$this->overall_class = $class;
787	}
788
789	/**
790	* Sets the overall id for this block of code. This id can then
791	* be used in a stylesheet to style this object's output
792	*
793	* @param string The ID to use for this block of code
794	* @since 1.0.0
795	*/
796	function set_overall_id($id) {
797	$this->overall_id = $id;
798	}
799
800	/**
801	* Sets whether CSS classes should be used to highlight the source. Default
802	* is off, calling this method with no arguments will turn it on
803	*
804	* @param boolean Whether to turn classes on or not
805	* @since 1.0.0
806	*/
807	function enable_classes($flag = true) {
808	$this->use_classes = ($flag) ? true : false;
809	}
810
811	/**
812	* Sets the style for the actual code. This should be a string
813	* containing valid stylesheet declarations. If $preserve_defaults is
814	* true, then styles are merged with the default styles, with the
815	* user defined styles having priority
816	*
817	* Note: Use this method to override any style changes you made to
818	* the line numbers if you are using line numbers, else the line of
819	* code will have the same style as the line number! Consult the
820	* GeSHi documentation for more information about this.
821	*
822	* @param string The style to use for actual code
823	* @param boolean Whether to merge the current styles with the new styles
824	* @since 1.0.2
825	*/
826	function set_code_style($style, $preserve_defaults = false) {
827	if (!$preserve_defaults) {
828	$this->code_style = $style;
829	} else {
830	$this->code_style .= $style;
831	}
832	}
833
834	/**
835	* Sets the styles for the line numbers.
836	*
837	* @param string The style for the line numbers that are "normal"
838	* @param string\|boolean If a string, this is the style of the line
839	* numbers that are "fancy", otherwise if boolean then this
840	* defines whether the normal styles should be merged with the
841	* new normal styles or not
842	* @param boolean If set, is the flag for whether to merge the "fancy"
843	* styles with the current styles or not
844	* @since 1.0.2
845	*/
846	function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
847	//Check if we got 2 or three parameters
848	if (is_bool($style2)) {
849	$preserve_defaults = $style2;
850	$style2 = '';
851	}
852
853	//Actually set the new styles
854	if (!$preserve_defaults) {
855	$this->line_style1 = $style1;
856	$this->line_style2 = $style2;
857	} else {
858	$this->line_style1 .= $style1;
859	$this->line_style2 .= $style2;
860	}
861	}
862
863	/**
864	* Sets whether line numbers should be displayed.
865	*
866	* Valid values for the first parameter are:
867	*
868	* - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
869	* - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
870	* - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
871	*
872	* For fancy line numbers, the second parameter is used to signal which lines
873	* are to be fancy. For example, if the value of this parameter is 5 then every
874	* 5th line will be fancy.
875	*
876	* @param int How line numbers should be displayed
877	* @param int Defines which lines are fancy
878	* @since 1.0.0
879	*/
880	function enable_line_numbers($flag, $nth_row = 5) {
881	if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
882	&& GESHI_FANCY_LINE_NUMBERS != $flag) {
883	$this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
884	}
885	$this->line_numbers = $flag;
886	$this->line_nth_row = $nth_row;
887	}
888
889	/**
890	* Sets wether spans and other HTML markup generated by GeSHi can
891	* span over multiple lines or not. Defaults to true to reduce overhead.
892	* Set it to false if you want to manipulate the output or manually display
893	* the code in an ordered list.
894	*
895	* @param boolean Wether multiline spans are allowed or not
896	* @since 1.0.7.22
897	*/
898	function enable_multiline_span($flag) {
899	$this->allow_multiline_span = (bool) $flag;
900	}
901
902	/**
903	* Get current setting for multiline spans, see GeSHi->enable_multiline_span().
904	*
905	* @see enable_multiline_span
906	* @return bool
907	*/
908	function get_multiline_span() {
909	return $this->allow_multiline_span;
910	}
911
912	/**
913	* Sets the style for a keyword group. If $preserve_defaults is
914	* true, then styles are merged with the default styles, with the
915	* user defined styles having priority
916	*
917	* @param int The key of the keyword group to change the styles of
918	* @param string The style to make the keywords
919	* @param boolean Whether to merge the new styles with the old or just
920	* to overwrite them
921	* @since 1.0.0
922	*/
923	function set_keyword_group_style($key, $style, $preserve_defaults = false) {
924	//Set the style for this keyword group
925	if (!$preserve_defaults) {
926	$this->language_data['STYLES']['KEYWORDS'][$key] = $style;
927	} else {
928	$this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
929	}
930
931	//Update the lexic permissions
932	if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
933	$this->lexic_permissions['KEYWORDS'][$key] = true;
934	}
935	}
936
937	/**
938	* Turns highlighting on/off for a keyword group
939	*
940	* @param int The key of the keyword group to turn on or off
941	* @param boolean Whether to turn highlighting for that group on or off
942	* @since 1.0.0
943	*/
944	function set_keyword_group_highlighting($key, $flag = true) {
945	$this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
946	}
947
948	/**
949	* Sets the styles for comment groups. If $preserve_defaults is
950	* true, then styles are merged with the default styles, with the
951	* user defined styles having priority
952	*
953	* @param int The key of the comment group to change the styles of
954	* @param string The style to make the comments
955	* @param boolean Whether to merge the new styles with the old or just
956	* to overwrite them
957	* @since 1.0.0
958	*/
959	function set_comments_style($key, $style, $preserve_defaults = false) {
960	if (!$preserve_defaults) {
961	$this->language_data['STYLES']['COMMENTS'][$key] = $style;
962	} else {
963	$this->language_data['STYLES']['COMMENTS'][$key] .= $style;
964	}
965	}
966
967	/**
968	* Turns highlighting on/off for comment groups
969	*
970	* @param int The key of the comment group to turn on or off
971	* @param boolean Whether to turn highlighting for that group on or off
972	* @since 1.0.0
973	*/
974	function set_comments_highlighting($key, $flag = true) {
975	$this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
976	}
977
978	/**
979	* Sets the styles for escaped characters. If $preserve_defaults is
980	* true, then styles are merged with the default styles, with the
981	* user defined styles having priority
982	*
983	* @param string The style to make the escape characters
984	* @param boolean Whether to merge the new styles with the old or just
985	* to overwrite them
986	* @since 1.0.0
987	*/
988	function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
989	if (!$preserve_defaults) {
990	$this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
991	} else {
992	$this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
993	}
994	}
995
996	/**
997	* Turns highlighting on/off for escaped characters
998	*
999	* @param boolean Whether to turn highlighting for escape characters on or off
1000	* @since 1.0.0
1001	*/
1002	function set_escape_characters_highlighting($flag = true) {
1003	$this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1004	}
1005
1006	/**
1007	* Sets the styles for brackets. If $preserve_defaults is
1008	* true, then styles are merged with the default styles, with the
1009	* user defined styles having priority
1010	*
1011	* This method is DEPRECATED: use set_symbols_style instead.
1012	* This method will be removed in 1.2.X
1013	*
1014	* @param string The style to make the brackets
1015	* @param boolean Whether to merge the new styles with the old or just
1016	* to overwrite them
1017	* @since 1.0.0
1018	* @deprecated In favour of set_symbols_style
1019	*/
1020	function set_brackets_style($style, $preserve_defaults = false) {
1021	if (!$preserve_defaults) {
1022	$this->language_data['STYLES']['BRACKETS'][0] = $style;
1023	} else {
1024	$this->language_data['STYLES']['BRACKETS'][0] .= $style;
1025	}
1026	}
1027
1028	/**
1029	* Turns highlighting on/off for brackets
1030	*
1031	* This method is DEPRECATED: use set_symbols_highlighting instead.
1032	* This method will be remove in 1.2.X
1033	*
1034	* @param boolean Whether to turn highlighting for brackets on or off
1035	* @since 1.0.0
1036	* @deprecated In favour of set_symbols_highlighting
1037	*/
1038	function set_brackets_highlighting($flag) {
1039	$this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1040	}
1041
1042	/**
1043	* Sets the styles for symbols. If $preserve_defaults is
1044	* true, then styles are merged with the default styles, with the
1045	* user defined styles having priority
1046	*
1047	* @param string The style to make the symbols
1048	* @param boolean Whether to merge the new styles with the old or just
1049	* to overwrite them
1050	* @param int Tells the group of symbols for which style should be set.
1051	* @since 1.0.1
1052	*/
1053	function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1054	// Update the style of symbols
1055	if (!$preserve_defaults) {
1056	$this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1057	} else {
1058	$this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1059	}
1060
1061	// For backward compatibility
1062	if (0 == $group) {
1063	$this->set_brackets_style ($style, $preserve_defaults);
1064	}
1065	}
1066
1067	/**
1068	* Turns highlighting on/off for symbols
1069	*
1070	* @param boolean Whether to turn highlighting for symbols on or off
1071	* @since 1.0.0
1072	*/
1073	function set_symbols_highlighting($flag) {
1074	// Update lexic permissions for this symbol group
1075	$this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1076
1077	// For backward compatibility
1078	$this->set_brackets_highlighting ($flag);
1079	}
1080
1081	/**
1082	* Sets the styles for strings. If $preserve_defaults is
1083	* true, then styles are merged with the default styles, with the
1084	* user defined styles having priority
1085	*
1086	* @param string The style to make the escape characters
1087	* @param boolean Whether to merge the new styles with the old or just
1088	* to overwrite them
1089	* @param int Tells the group of strings for which style should be set.
1090	* @since 1.0.0
1091	*/
1092	function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1093	if (!$preserve_defaults) {
1094	$this->language_data['STYLES']['STRINGS'][$group] = $style;
1095	} else {
1096	$this->language_data['STYLES']['STRINGS'][$group] .= $style;
1097	}
1098	}
1099
1100	/**
1101	* Turns highlighting on/off for strings
1102	*
1103	* @param boolean Whether to turn highlighting for strings on or off
1104	* @since 1.0.0
1105	*/
1106	function set_strings_highlighting($flag) {
1107	$this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1108	}
1109
1110	/**
1111	* Sets the styles for strict code blocks. If $preserve_defaults is
1112	* true, then styles are merged with the default styles, with the
1113	* user defined styles having priority
1114	*
1115	* @param string The style to make the script blocks
1116	* @param boolean Whether to merge the new styles with the old or just
1117	* to overwrite them
1118	* @param int Tells the group of script blocks for which style should be set.
1119	* @since 1.0.8.4
1120	*/
1121	function set_script_style($style, $preserve_defaults = false, $group = 0) {
1122	// Update the style of symbols
1123	if (!$preserve_defaults) {
1124	$this->language_data['STYLES']['SCRIPT'][$group] = $style;
1125	} else {
1126	$this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1127	}
1128	}
1129
1130	/**
1131	* Sets the styles for numbers. If $preserve_defaults is
1132	* true, then styles are merged with the default styles, with the
1133	* user defined styles having priority
1134	*
1135	* @param string The style to make the numbers
1136	* @param boolean Whether to merge the new styles with the old or just
1137	* to overwrite them
1138	* @param int Tells the group of numbers for which style should be set.
1139	* @since 1.0.0
1140	*/
1141	function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1142	if (!$preserve_defaults) {
1143	$this->language_data['STYLES']['NUMBERS'][$group] = $style;
1144	} else {
1145	$this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1146	}
1147	}
1148
1149	/**
1150	* Turns highlighting on/off for numbers
1151	*
1152	* @param boolean Whether to turn highlighting for numbers on or off
1153	* @since 1.0.0
1154	*/
1155	function set_numbers_highlighting($flag) {
1156	$this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1157	}
1158
1159	/**
1160	* Sets the styles for methods. $key is a number that references the
1161	* appropriate "object splitter" - see the language file for the language
1162	* you are highlighting to get this number. If $preserve_defaults is
1163	* true, then styles are merged with the default styles, with the
1164	* user defined styles having priority
1165	*
1166	* @param int The key of the object splitter to change the styles of
1167	* @param string The style to make the methods
1168	* @param boolean Whether to merge the new styles with the old or just
1169	* to overwrite them
1170	* @since 1.0.0
1171	*/
1172	function set_methods_style($key, $style, $preserve_defaults = false) {
1173	if (!$preserve_defaults) {
1174	$this->language_data['STYLES']['METHODS'][$key] = $style;
1175	} else {
1176	$this->language_data['STYLES']['METHODS'][$key] .= $style;
1177	}
1178	}
1179
1180	/**
1181	* Turns highlighting on/off for methods
1182	*
1183	* @param boolean Whether to turn highlighting for methods on or off
1184	* @since 1.0.0
1185	*/
1186	function set_methods_highlighting($flag) {
1187	$this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1188	}
1189
1190	/**
1191	* Sets the styles for regexps. If $preserve_defaults is
1192	* true, then styles are merged with the default styles, with the
1193	* user defined styles having priority
1194	*
1195	* @param string The style to make the regular expression matches
1196	* @param boolean Whether to merge the new styles with the old or just
1197	* to overwrite them
1198	* @since 1.0.0
1199	*/
1200	function set_regexps_style($key, $style, $preserve_defaults = false) {
1201	if (!$preserve_defaults) {
1202	$this->language_data['STYLES']['REGEXPS'][$key] = $style;
1203	} else {
1204	$this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1205	}
1206	}
1207
1208	/**
1209	* Turns highlighting on/off for regexps
1210	*
1211	* @param int The key of the regular expression group to turn on or off
1212	* @param boolean Whether to turn highlighting for the regular expression group on or off
1213	* @since 1.0.0
1214	*/
1215	function set_regexps_highlighting($key, $flag) {
1216	$this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1217	}
1218
1219	/**
1220	* Sets whether a set of keywords are checked for in a case sensitive manner
1221	*
1222	* @param int The key of the keyword group to change the case sensitivity of
1223	* @param boolean Whether to check in a case sensitive manner or not
1224	* @since 1.0.0
1225	*/
1226	function set_case_sensitivity($key, $case) {
1227	$this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1228	}
1229
1230	/**
1231	* Sets the case that keywords should use when found. Use the constants:
1232	*
1233	* - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1234	* - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1235	* - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1236	*
1237	* @param int A constant specifying what to do with matched keywords
1238	* @since 1.0.1
1239	*/
1240	function set_case_keywords($case) {
1241	if (in_array($case, array(
1242	GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1243	$this->language_data['CASE_KEYWORDS'] = $case;
1244	}
1245	}
1246
1247	/**
1248	* Sets how many spaces a tab is substituted for
1249	*
1250	* Widths below zero are ignored
1251	*
1252	* @param int The tab width
1253	* @since 1.0.0
1254	*/
1255	function set_tab_width($width) {
1256	$this->tab_width = intval($width);
1257
1258	//Check if it fit's the constraints:
1259	if ($this->tab_width < 1) {
1260	//Return it to the default
1261	$this->tab_width = 8;
1262	}
1263	}
1264
1265	/**
1266	* Sets whether or not to use tab-stop width specifed by language
1267	*
1268	* @param boolean Whether to use language-specific tab-stop widths
1269	* @since 1.0.7.20
1270	*/
1271	function set_use_language_tab_width($use) {
1272	$this->use_language_tab_width = (bool) $use;
1273	}
1274
1275	/**
1276	* Returns the tab width to use, based on the current language and user
1277	* preference
1278	*
1279	* @return int Tab width
1280	* @since 1.0.7.20
1281	*/
1282	function get_real_tab_width() {
1283	if (!$this->use_language_tab_width \|\|
1284	!isset($this->language_data['TAB_WIDTH'])) {
1285	return $this->tab_width;
1286	} else {
1287	return $this->language_data['TAB_WIDTH'];
1288	}
1289	}
1290
1291	/**
1292	* Enables/disables strict highlighting. Default is off, calling this
1293	* method without parameters will turn it on. See documentation
1294	* for more details on strict mode and where to use it.
1295	*
1296	* @param boolean Whether to enable strict mode or not
1297	* @since 1.0.0
1298	*/
1299	function enable_strict_mode($mode = true) {
1300	if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1301	$this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1302	}
1303	}
1304
1305	/**
1306	* Disables all highlighting
1307	*
1308	* @since 1.0.0
1309	* @todo Rewrite with array traversal
1310	* @deprecated In favour of enable_highlighting
1311	*/
1312	function disable_highlighting() {
1313	$this->enable_highlighting(false);
1314	}
1315
1316	/**
1317	* Enables all highlighting
1318	*
1319	* The optional flag parameter was added in version 1.0.7.21 and can be used
1320	* to enable (true) or disable (false) all highlighting.
1321	*
1322	* @since 1.0.0
1323	* @param boolean A flag specifying whether to enable or disable all highlighting
1324	* @todo Rewrite with array traversal
1325	*/
1326	function enable_highlighting($flag = true) {
1327	$flag = $flag ? true : false;
1328	foreach ($this->lexic_permissions as $key => $value) {
1329	if (is_array($value)) {
1330	foreach ($value as $k => $v) {
1331	$this->lexic_permissions[$key][$k] = $flag;
1332	}
1333	} else {
1334	$this->lexic_permissions[$key] = $flag;
1335	}
1336	}
1337
1338	// Context blocks
1339	$this->enable_important_blocks = $flag;
1340	}
1341
1342	/**
1343	* Given a file extension, this method returns either a valid geshi language
1344	* name, or the empty string if it couldn't be found
1345	*
1346	* @param string The extension to get a language name for
1347	* @param array A lookup array to use instead of the default one
1348	* @since 1.0.5
1349	* @todo Re-think about how this method works (maybe make it private and/or make it
1350	* a extension->lang lookup?)
1351	* @todo static?
1352	*/
1353	function get_language_name_from_extension( $extension, $lookup = array() ) {
1354	if ( !is_array($lookup) \|\| empty($lookup)) {
1355	$lookup = array(
1356	'abap' => array('abap'),
1357	'actionscript' => array('as'),
1358	'ada' => array('a', 'ada', 'adb', 'ads'),
1359	'apache' => array('conf'),
1360	'asm' => array('ash', 'asm', 'inc'),
1361	'asp' => array('asp'),
1362	'bash' => array('sh'),
1363	'bf' => array('bf'),
1364	'c' => array('c', 'h'),
1365	'c_mac' => array('c', 'h'),
1366	'caddcl' => array(),
1367	'cadlisp' => array(),
1368	'cdfg' => array('cdfg'),
1369	'cobol' => array('cbl'),
1370	'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1371	'csharp' => array('cs'),
1372	'css' => array('css'),
1373	'd' => array('d'),
1374	'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1375	'diff' => array('diff', 'patch'),
1376	'dos' => array('bat', 'cmd'),
1377	'gdb' => array('kcrash', 'crash', 'bt'),
1378	'gettext' => array('po', 'pot'),
1379	'gml' => array('gml'),
1380	'gnuplot' => array('plt'),
1381	'groovy' => array('groovy'),
1382	'haskell' => array('hs'),
1383	'html4strict' => array('html', 'htm'),
1384	'ini' => array('ini', 'desktop'),
1385	'java' => array('java'),
1386	'javascript' => array('js'),
1387	'klonec' => array('kl1'),
1388	'klonecpp' => array('klx'),
1389	'latex' => array('tex'),
1390	'lisp' => array('lisp'),
1391	'lua' => array('lua'),
1392	'matlab' => array('m'),
1393	'mpasm' => array(),
1394	'mysql' => array('sql'),
1395	'nsis' => array(),
1396	'objc' => array(),
1397	'oobas' => array(),
1398	'oracle8' => array(),
1399	'oracle10' => array(),
1400	'pascal' => array('pas'),
1401	'perl' => array('pl', 'pm'),
1402	'php' => array('php', 'php5', 'phtml', 'phps'),
1403	'povray' => array('pov'),
1404	'providex' => array('pvc', 'pvx'),
1405	'prolog' => array('pl'),
1406	'python' => array('py'),
1407	'qbasic' => array('bi'),
1408	'reg' => array('reg'),
1409	'ruby' => array('rb'),
1410	'sas' => array('sas'),
1411	'scala' => array('scala'),
1412	'scheme' => array('scm'),
1413	'scilab' => array('sci'),
1414	'smalltalk' => array('st'),
1415	'smarty' => array(),
1416	'tcl' => array('tcl'),
1417	'vb' => array('bas'),
1418	'vbnet' => array(),
1419	'visualfoxpro' => array(),
1420	'whitespace' => array('ws'),
1421	'xml' => array('xml', 'svg', 'xrc'),
1422	'z80' => array('z80', 'asm', 'inc')
1423	);
1424	}
1425
1426	foreach ($lookup as $lang => $extensions) {
1427	if (in_array($extension, $extensions)) {
1428	return $lang;
1429	}
1430	}
1431	return '';
1432	}
1433
1434	/**
1435	* Given a file name, this method loads its contents in, and attempts
1436	* to set the language automatically. An optional lookup table can be
1437	* passed for looking up the language name. If not specified a default
1438	* table is used
1439	*
1440	* The language table is in the form
1441	* <pre>array(
1442	* 'lang_name' => array('extension', 'extension', ...),
1443	* 'lang_name' ...
1444	* );</pre>
1445	*
1446	* @param string The filename to load the source from
1447	* @param array A lookup array to use instead of the default one
1448	* @todo Complete rethink of this and above method
1449	* @since 1.0.5
1450	*/
1451	function load_from_file($file_name, $lookup = array()) {
1452	if (is_readable($file_name)) {
1453	$this->set_source(file_get_contents($file_name));
1454	$this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1455	} else {
1456	$this->error = GESHI_ERROR_FILE_NOT_READABLE;
1457	}
1458	}
1459
1460	/**
1461	* Adds a keyword to a keyword group for highlighting
1462	*
1463	* @param int The key of the keyword group to add the keyword to
1464	* @param string The word to add to the keyword group
1465	* @since 1.0.0
1466	*/
1467	function add_keyword($key, $word) {
1468	if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1469	$this->language_data['KEYWORDS'][$key][] = $word;
1470
1471	//NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1472	if ($this->parse_cache_built) {
1473	$subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1474	$this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '\|' . preg_quote($word, '/');
1475	}
1476	}
1477	}
1478
1479	/**
1480	* Removes a keyword from a keyword group
1481	*
1482	* @param int The key of the keyword group to remove the keyword from
1483	* @param string The word to remove from the keyword group
1484	* @param bool Wether to automatically recompile the optimized regexp list or not.
1485	* Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1486	* for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1487	* or the removed keyword will stay in cache and still be highlighted! On the other hand
1488	* it might be too expensive to recompile the regexp list for every removal if you want to
1489	* remove a lot of keywords.
1490	* @since 1.0.0
1491	*/
1492	function remove_keyword($key, $word, $recompile = true) {
1493	$key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1494	if ($key_to_remove !== false) {
1495	unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1496
1497	//NEW in 1.0.8, optionally recompile keyword group
1498	if ($recompile && $this->parse_cache_built) {
1499	$this->optimize_keyword_group($key);
1500	}
1501	}
1502	}
1503
1504	/**
1505	* Creates a new keyword group
1506	*
1507	* @param int The key of the keyword group to create
1508	* @param string The styles for the keyword group
1509	* @param boolean Whether the keyword group is case sensitive ornot
1510	* @param array The words to use for the keyword group
1511	* @since 1.0.0
1512	*/
1513	function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1514	$words = (array) $words;
1515	if (empty($words)) {
1516	// empty word lists mess up highlighting
1517	return false;
1518	}
1519
1520	//Add the new keyword group internally
1521	$this->language_data['KEYWORDS'][$key] = $words;
1522	$this->lexic_permissions['KEYWORDS'][$key] = true;
1523	$this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1524	$this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1525
1526	//NEW in 1.0.8, cache keyword regexp
1527	if ($this->parse_cache_built) {
1528	$this->optimize_keyword_group($key);
1529	}
1530	}
1531
1532	/**
1533	* Removes a keyword group
1534	*
1535	* @param int The key of the keyword group to remove
1536	* @since 1.0.0
1537	*/
1538	function remove_keyword_group ($key) {
1539	//Remove the keyword group internally
1540	unset($this->language_data['KEYWORDS'][$key]);
1541	unset($this->lexic_permissions['KEYWORDS'][$key]);
1542	unset($this->language_data['CASE_SENSITIVE'][$key]);
1543	unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1544
1545	//NEW in 1.0.8
1546	unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1547	}
1548
1549	/**
1550	* compile optimized regexp list for keyword group
1551	*
1552	* @param int The key of the keyword group to compile & optimize
1553	* @since 1.0.8
1554	*/
1555	function optimize_keyword_group($key) {
1556	$this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1557	$this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1558	$space_as_whitespace = false;
1559	if(isset($this->language_data['PARSER_CONTROL'])) {
1560	if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1561	if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1562	$space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1563	}
1564	if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1565	if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1566	$space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1567	}
1568	}
1569	}
1570	}
1571	if($space_as_whitespace) {
1572	foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1573	$this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1574	str_replace(" ", "\\s+", $rxv);
1575	}
1576	}
1577	}
1578
1579	/**
1580	* Sets the content of the header block
1581	*
1582	* @param string The content of the header block
1583	* @since 1.0.2
1584	*/
1585	function set_header_content($content) {
1586	$this->header_content = $content;
1587	}
1588
1589	/**
1590	* Sets the content of the footer block
1591	*
1592	* @param string The content of the footer block
1593	* @since 1.0.2
1594	*/
1595	function set_footer_content($content) {
1596	$this->footer_content = $content;
1597	}
1598
1599	/**
1600	* Sets the style for the header content
1601	*
1602	* @param string The style for the header content
1603	* @since 1.0.2
1604	*/
1605	function set_header_content_style($style) {
1606	$this->header_content_style = $style;
1607	}
1608
1609	/**
1610	* Sets the style for the footer content
1611	*
1612	* @param string The style for the footer content
1613	* @since 1.0.2
1614	*/
1615	function set_footer_content_style($style) {
1616	$this->footer_content_style = $style;
1617	}
1618
1619	/**
1620	* Sets whether to force a surrounding block around
1621	* the highlighted code or not
1622	*
1623	* @param boolean Tells whether to enable or disable this feature
1624	* @since 1.0.7.20
1625	*/
1626	function enable_inner_code_block($flag) {
1627	$this->force_code_block = (bool)$flag;
1628	}
1629
1630	/**
1631	* Sets the base URL to be used for keywords
1632	*
1633	* @param int The key of the keyword group to set the URL for
1634	* @param string The URL to set for the group. If {FNAME} is in
1635	* the url somewhere, it is replaced by the keyword
1636	* that the URL is being made for
1637	* @since 1.0.2
1638	*/
1639	function set_url_for_keyword_group($group, $url) {
1640	$this->language_data['URLS'][$group] = $url;
1641	}
1642
1643	/**
1644	* Sets styles for links in code
1645	*
1646	* @param int A constant that specifies what state the style is being
1647	* set for - e.g. :hover or :visited
1648	* @param string The styles to use for that state
1649	* @since 1.0.2
1650	*/
1651	function set_link_styles($type, $styles) {
1652	$this->link_styles[$type] = $styles;
1653	}
1654
1655	/**
1656	* Sets the target for links in code
1657	*
1658	* @param string The target for links in the code, e.g. _blank
1659	* @since 1.0.3
1660	*/
1661	function set_link_target($target) {
1662	if (!$target) {
1663	$this->link_target = '';
1664	} else {
1665	$this->link_target = ' target="' . $target . '"';
1666	}
1667	}
1668
1669	/**
1670	* Sets styles for important parts of the code
1671	*
1672	* @param string The styles to use on important parts of the code
1673	* @since 1.0.2
1674	*/
1675	function set_important_styles($styles) {
1676	$this->important_styles = $styles;
1677	}
1678
1679	/**
1680	* Sets whether context-important blocks are highlighted
1681	*
1682	* @param boolean Tells whether to enable or disable highlighting of important blocks
1683	* @todo REMOVE THIS SHIZ FROM GESHI!
1684	* @deprecated
1685	* @since 1.0.2
1686	*/
1687	function enable_important_blocks($flag) {
1688	$this->enable_important_blocks = ( $flag ) ? true : false;
1689	}
1690
1691	/**
1692	* Whether CSS IDs should be added to each line
1693	*
1694	* @param boolean If true, IDs will be added to each line.
1695	* @since 1.0.2
1696	*/
1697	function enable_ids($flag = true) {
1698	$this->add_ids = ($flag) ? true : false;
1699	}
1700
1701	/**
1702	* Specifies which lines to highlight extra
1703	*
1704	* The extra style parameter was added in 1.0.7.21.
1705	*
1706	* @param mixed An array of line numbers to highlight, or just a line
1707	* number on its own.
1708	* @param string A string specifying the style to use for this line.
1709	* If null is specified, the default style is used.
1710	* If false is specified, the line will be removed from
1711	* special highlighting
1712	* @since 1.0.2
1713	* @todo Some data replication here that could be cut down on
1714	*/
1715	function highlight_lines_extra($lines, $style = null) {
1716	if (is_array($lines)) {
1717	//Split up the job using single lines at a time
1718	foreach ($lines as $line) {
1719	$this->highlight_lines_extra($line, $style);
1720	}
1721	} else {
1722	//Mark the line as being highlighted specially
1723	$lines = intval($lines);
1724	$this->highlight_extra_lines[$lines] = $lines;
1725
1726	//Decide on which style to use
1727	if ($style === null) { //Check if we should use default style
1728	unset($this->highlight_extra_lines_styles[$lines]);
1729	} else if ($style === false) { //Check if to remove this line
1730	unset($this->highlight_extra_lines[$lines]);
1731	unset($this->highlight_extra_lines_styles[$lines]);
1732	} else {
1733	$this->highlight_extra_lines_styles[$lines] = $style;
1734	}
1735	}
1736	}
1737
1738	/**
1739	* Sets the style for extra-highlighted lines
1740	*
1741	* @param string The style for extra-highlighted lines
1742	* @since 1.0.2
1743	*/
1744	function set_highlight_lines_extra_style($styles) {
1745	$this->highlight_extra_lines_style = $styles;
1746	}
1747
1748	/**
1749	* Sets the line-ending
1750	*
1751	* @param string The new line-ending
1752	* @since 1.0.2
1753	*/
1754	function set_line_ending($line_ending) {
1755	$this->line_ending = (string)$line_ending;
1756	}
1757
1758	/**
1759	* Sets what number line numbers should start at. Should
1760	* be a positive integer, and will be converted to one.
1761	*
1762	* <b>Warning:</b> Using this method will add the "start"
1763	* attribute to the <ol> that is used for line numbering.
1764	* This is <b>not</b> valid XHTML strict, so if that's what you
1765	* care about then don't use this method. Firefox is getting
1766	* support for the CSS method of doing this in 1.1 and Opera
1767	* has support for the CSS method, but (of course) IE doesn't
1768	* so it's not worth doing it the CSS way yet.
1769	*
1770	* @param int The number to start line numbers at
1771	* @since 1.0.2
1772	*/
1773	function start_line_numbers_at($number) {
1774	$this->line_numbers_start = abs(intval($number));
1775	}
1776
1777	/**
1778	* Sets the encoding used for htmlspecialchars(), for international
1779	* support.
1780	*
1781	* NOTE: This is not needed for now because htmlspecialchars() is not
1782	* being used (it has a security hole in PHP4 that has not been patched).
1783	* Maybe in a future version it may make a return for speed reasons, but
1784	* I doubt it.
1785	*
1786	* @param string The encoding to use for the source
1787	* @since 1.0.3
1788	*/
1789	function set_encoding($encoding) {
1790	if ($encoding) {
1791	$this->encoding = strtolower($encoding);
1792	}
1793	}
1794
1795	/**
1796	* Turns linking of keywords on or off.
1797	*
1798	* @param boolean If true, links will be added to keywords
1799	* @since 1.0.2
1800	*/
1801	function enable_keyword_links($enable = true) {
1802	$this->keyword_links = (bool) $enable;
1803	}
1804
1805	/**
1806	* Setup caches needed for styling. This is automatically called in
1807	* parse_code() and get_stylesheet() when appropriate. This function helps
1808	* stylesheet generators as they rely on some style information being
1809	* preprocessed
1810	*
1811	* @since 1.0.8
1812	* @access private
1813	*/
1814	function build_style_cache() {
1815	//Build the style cache needed to highlight numbers appropriate
1816	if($this->lexic_permissions['NUMBERS']) {
1817	//First check what way highlighting information for numbers are given
1818	if(!isset($this->language_data['NUMBERS'])) {
1819	$this->language_data['NUMBERS'] = 0;
1820	}
1821
1822	if(is_array($this->language_data['NUMBERS'])) {
1823	$this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1824	} else {
1825	$this->language_data['NUMBERS_CACHE'] = array();
1826	if(!$this->language_data['NUMBERS']) {
1827	$this->language_data['NUMBERS'] =
1828	GESHI_NUMBER_INT_BASIC \|
1829	GESHI_NUMBER_FLT_NONSCI;
1830	}
1831
1832	for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1833	//Rearrange style indices if required ...
1834	if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1835	$this->language_data['STYLES']['NUMBERS'][$i] =
1836	$this->language_data['STYLES']['NUMBERS'][1<<$i];
1837	unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1838	}
1839
1840	//Check if this bit is set for highlighting
1841	if($j&1) {
1842	//So this bit is set ...
1843	//Check if it belongs to group 0 or the actual stylegroup
1844	if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1845	$this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1846	} else {
1847	if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1848	$this->language_data['NUMBERS_CACHE'][0] = 0;
1849	}
1850	$this->language_data['NUMBERS_CACHE'][0] \|= 1 << $i;
1851	}
1852	}
1853	}
1854	}
1855	}
1856	}
1857
1858	/**
1859	* Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1860	* This function makes stylesheet generators much faster as they do not need these caches.
1861	*
1862	* @since 1.0.8
1863	* @access private
1864	*/
1865	function build_parse_cache() {
1866	// cache symbol regexp
1867	//As this is a costy operation, we avoid doing it for multiple groups ...
1868	//Instead we perform it for all symbols at once.
1869	//
1870	//For this to work, we need to reorganize the data arrays.
1871	if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1872	$this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1873
1874	$this->language_data['SYMBOL_DATA'] = array();
1875	$symbol_preg_multi = array(); // multi char symbols
1876	$symbol_preg_single = array(); // single char symbols
1877	foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1878	if (is_array($symbols)) {
1879	foreach ($symbols as $sym) {
1880	$sym = $this->hsc($sym);
1881	if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1882	$this->language_data['SYMBOL_DATA'][$sym] = $key;
1883	if (isset($sym[1])) { // multiple chars
1884	$symbol_preg_multi[] = preg_quote($sym, '/');
1885	} else { // single char
1886	if ($sym == '-') {
1887	// don't trigger range out of order error
1888	$symbol_preg_single[] = '\-';
1889	} else {
1890	$symbol_preg_single[] = preg_quote($sym, '/');
1891	}
1892	}
1893	}
1894	}
1895	} else {
1896	$symbols = $this->hsc($symbols);
1897	if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1898	$this->language_data['SYMBOL_DATA'][$symbols] = 0;
1899	if (isset($symbols[1])) { // multiple chars
1900	$symbol_preg_multi[] = preg_quote($symbols, '/');
1901	} else if ($symbols == '-') {
1902	// don't trigger range out of order error
1903	$symbol_preg_single[] = '\-';
1904	} else { // single char
1905	$symbol_preg_single[] = preg_quote($symbols, '/');
1906	}
1907	}
1908	}
1909	}
1910
1911	//Now we have an array with each possible symbol as the key and the style as the actual data.
1912	//This way we can set the correct style just the moment we highlight ...
1913	//
1914	//Now we need to rewrite our array to get a search string that
1915	$symbol_preg = array();
1916	if (!empty($symbol_preg_multi)) {
1917	rsort($symbol_preg_multi);
1918	$symbol_preg[] = implode('\|', $symbol_preg_multi);
1919	}
1920	if (!empty($symbol_preg_single)) {
1921	rsort($symbol_preg_single);
1922	$symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
1923	}
1924	$this->language_data['SYMBOL_SEARCH'] = implode("\|", $symbol_preg);
1925	}
1926
1927	// cache optimized regexp for keyword matching
1928	// remove old cache
1929	$this->language_data['CACHED_KEYWORD_LISTS'] = array();
1930	foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
1931	if (!isset($this->lexic_permissions['KEYWORDS'][$key]) \|\|
1932	$this->lexic_permissions['KEYWORDS'][$key]) {
1933	$this->optimize_keyword_group($key);
1934	}
1935	}
1936
1937	// brackets
1938	if ($this->lexic_permissions['BRACKETS']) {
1939	$this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
1940	if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
1941	$this->language_data['CACHE_BRACKET_REPLACE'] = array(
1942	'<\| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">[\|>',
1943	'<\| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">]\|>',
1944	'<\| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">(\|>',
1945	'<\| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">)\|>',
1946	'<\| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">{\|>',
1947	'<\| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">}\|>',
1948	);
1949	}
1950	else {
1951	$this->language_data['CACHE_BRACKET_REPLACE'] = array(
1952	'<\| class="br0">[\|>',
1953	'<\| class="br0">]\|>',
1954	'<\| class="br0">(\|>',
1955	'<\| class="br0">)\|>',
1956	'<\| class="br0">{\|>',
1957	'<\| class="br0">}\|>',
1958	);
1959	}
1960	}
1961
1962	//Build the parse cache needed to highlight numbers appropriate
1963	if($this->lexic_permissions['NUMBERS']) {
1964	//Check if the style rearrangements have been processed ...
1965	//This also does some preprocessing to check which style groups are useable ...
1966	if(!isset($this->language_data['NUMBERS_CACHE'])) {
1967	$this->build_style_cache();
1968	}
1969
1970	//Number format specification
1971	//All this formats are matched case-insensitively!
1972	static $numbers_format = array(
1973	GESHI_NUMBER_INT_BASIC =>
1974	'(?:(?<![0-9a-z_\.%])\|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?\|0)(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1975	GESHI_NUMBER_INT_CSTYLE =>
1976	'(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?\|0)l(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1977	GESHI_NUMBER_BIN_SUFFIX =>
1978	'(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1979	GESHI_NUMBER_BIN_PREFIX_PERCENT =>
1980	'(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1981	GESHI_NUMBER_BIN_PREFIX_0B =>
1982	'(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1983	GESHI_NUMBER_OCT_PREFIX =>
1984	'(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1985	GESHI_NUMBER_OCT_PREFIX_0O =>
1986	'(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1987	GESHI_NUMBER_OCT_SUFFIX =>
1988	'(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1989	GESHI_NUMBER_HEX_PREFIX =>
1990	'(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1991	GESHI_NUMBER_HEX_SUFFIX =>
1992	'(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1993	GESHI_NUMBER_FLT_NONSCI =>
1994	'(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1995	GESHI_NUMBER_FLT_NONSCI_F =>
1996	'(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?\|\.\d+?)f(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1997	GESHI_NUMBER_FLT_SCI_SHORT =>
1998	'(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)',
1999	GESHI_NUMBER_FLT_SCI_ZERO =>
2000	'(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?\|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]\|\.(?:[eE][+\-]?)?\d)'
2001	);
2002
2003	//At this step we have an associative array with flag groups for a
2004	//specific style or an string denoting a regexp given its index.
2005	$this->language_data['NUMBERS_RXCACHE'] = array();
2006	foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2007	if(is_string($rxdata)) {
2008	$regexp = $rxdata;
2009	} else {
2010	//This is a bitfield of number flags to highlight:
2011	//Build an array, implode them together and make this the actual RX
2012	$rxuse = array();
2013	for($i = 1; $i <= $rxdata; $i<<=1) {
2014	if($rxdata & $i) {
2015	$rxuse[] = $numbers_format[$i];
2016	}
2017	}
2018	$regexp = implode("\|", $rxuse);
2019	}
2020
2021	$this->language_data['NUMBERS_RXCACHE'][$key] =
2022	"/(?<!<\\|\/)(?<!<\\|!REG3XP)(?<!<\\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>\|(?>[^\<]))+>)(?![^<]*>)(?!\\|>)(?!\/>)/i"; //
2023	}
2024	}
2025
2026	$this->parse_cache_built = true;
2027	}
2028
2029	/**
2030	* Returns the code in $this->source, highlighted and surrounded by the
2031	* nessecary HTML.
2032	*
2033	* This should only be called ONCE, cos it's SLOW! If you want to highlight
2034	* the same source multiple times, you're better off doing a whole lot of
2035	* str_replaces to replace the <span>s
2036	*
2037	* @since 1.0.0
2038	*/
2039	function parse_code () {
2040	// Start the timer
2041	$start_time = microtime();
2042
2043	// Replace all newlines to a common form.
2044	$code = str_replace("\r\n", "\n", $this->source);
2045	$code = str_replace("\r", "\n", $code);
2046
2047	// Firstly, if there is an error, we won't highlight
2048	if ($this->error) {
2049	//Escape the source for output
2050	$result = $this->hsc($this->source);
2051
2052	//This fix is related to SF#1923020, but has to be applied regardless of
2053	//actually highlighting symbols.
2054	$result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '\|'), $result);
2055
2056	// Timing is irrelevant
2057	$this->set_time($start_time, $start_time);
2058	$this->finalise($result);
2059	return $result;
2060	}
2061
2062	// make sure the parse cache is up2date
2063	if (!$this->parse_cache_built) {
2064	$this->build_parse_cache();
2065	}
2066
2067	// Initialise various stuff
2068	$length = strlen($code);
2069	$COMMENT_MATCHED = false;
2070	$stuff_to_parse = '';
2071	$endresult = '';
2072
2073	// "Important" selections are handled like multiline comments
2074	// @todo GET RID OF THIS SHIZ
2075	if ($this->enable_important_blocks) {
2076	$this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2077	}
2078
2079	if ($this->strict_mode) {
2080	// Break the source into bits. Each bit will be a portion of the code
2081	// within script delimiters - for example, HTML between < and >
2082	$k = 0;
2083	$parts = array();
2084	$matches = array();
2085	$next_match_pointer = null;
2086	// we use a copy to unset delimiters on demand (when they are not found)
2087	$delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2088	$i = 0;
2089	while ($i < $length) {
2090	$next_match_pos = $length + 1; // never true
2091	foreach ($delim_copy as $dk => $delimiters) {
2092	if(is_array($delimiters)) {
2093	foreach ($delimiters as $open => $close) {
2094	// make sure the cache is setup properly
2095	if (!isset($matches[$dk][$open])) {
2096	$matches[$dk][$open] = array(
2097	'next_match' => -1,
2098	'dk' => $dk,
2099
2100	'open' => $open, // needed for grouping of adjacent code blocks (see below)
2101	'open_strlen' => strlen($open),
2102
2103	'close' => $close,
2104	'close_strlen' => strlen($close),
2105	);
2106	}
2107	// Get the next little bit for this opening string
2108	if ($matches[$dk][$open]['next_match'] < $i) {
2109	// only find the next pos if it was not already cached
2110	$open_pos = strpos($code, $open, $i);
2111	if ($open_pos === false) {
2112	// no match for this delimiter ever
2113	unset($delim_copy[$dk][$open]);
2114	continue;
2115	}
2116	$matches[$dk][$open]['next_match'] = $open_pos;
2117	}
2118	if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2119	//So we got a new match, update the close_pos
2120	$matches[$dk][$open]['close_pos'] =
2121	strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2122
2123	$next_match_pointer =& $matches[$dk][$open];
2124	$next_match_pos = $matches[$dk][$open]['next_match'];
2125	}
2126	}
2127	} else {
2128	//So we should match an RegExp as Strict Block ...
2129	/**
2130	* The value in $delimiters is expected to be an RegExp
2131	* containing exactly 2 matching groups:
2132	* - Group 1 is the opener
2133	* - Group 2 is the closer
2134	*/
2135	if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2136	preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2137	//We got a match ...
2138	if(isset($matches_rx['start']) && isset($matches_rx['end']))
2139	{
2140	$matches[$dk] = array(
2141	'next_match' => $matches_rx['start'][1],
2142	'dk' => $dk,
2143
2144	'close_strlen' => strlen($matches_rx['end'][0]),
2145	'close_pos' => $matches_rx['end'][1],
2146	);
2147	} else {
2148	$matches[$dk] = array(
2149	'next_match' => $matches_rx[1][1],
2150	'dk' => $dk,
2151
2152	'close_strlen' => strlen($matches_rx[2][0]),
2153	'close_pos' => $matches_rx[2][1],
2154	);
2155	}
2156	} else {
2157	// no match for this delimiter ever
2158	unset($delim_copy[$dk]);
2159	continue;
2160	}
2161
2162	if ($matches[$dk]['next_match'] <= $next_match_pos) {
2163	$next_match_pointer =& $matches[$dk];
2164	$next_match_pos = $matches[$dk]['next_match'];
2165	}
2166	}
2167	}
2168
2169	// non-highlightable text
2170	$parts[$k] = array(
2171	1 => substr($code, $i, $next_match_pos - $i)
2172	);
2173	++$k;
2174
2175	if ($next_match_pos > $length) {
2176	// out of bounds means no next match was found
2177	break;
2178	}
2179
2180	// highlightable code
2181	$parts[$k][0] = $next_match_pointer['dk'];
2182
2183	//Only combine for non-rx script blocks
2184	if(is_array($delim_copy[$next_match_pointer['dk']])) {
2185	// group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2186	$i = $next_match_pos + $next_match_pointer['open_strlen'];
2187	while (true) {
2188	$close_pos = strpos($code, $next_match_pointer['close'], $i);
2189	if ($close_pos == false) {
2190	break;
2191	}
2192	$i = $close_pos + $next_match_pointer['close_strlen'];
2193	if ($i == $length) {
2194	break;
2195	}
2196	if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 \|\|
2197	substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2198	// merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2199	foreach ($matches as $submatches) {
2200	foreach ($submatches as $match) {
2201	if ($match['next_match'] == $i) {
2202	// a different block already matches here!
2203	break 3;
2204	}
2205	}
2206	}
2207	} else {
2208	break;
2209	}
2210	}
2211	} else {
2212	$close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2213	$i = $close_pos;
2214	}
2215
2216	if ($close_pos === false) {
2217	// no closing delimiter found!
2218	$parts[$k][1] = substr($code, $next_match_pos);
2219	++$k;
2220	break;
2221	} else {
2222	$parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2223	++$k;
2224	}
2225	}
2226	unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2227	$num_parts = $k;
2228
2229	if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2230	// when we have only one part, we don't have anything to highlight at all.
2231	// if we have a "maybe" strict language, this should be handled as highlightable code
2232	$parts = array(
2233	0 => array(
2234	0 => '',
2235	1 => ''
2236	),
2237	1 => array(
2238	0 => null,
2239	1 => $parts[0][1]
2240	)
2241	);
2242	$num_parts = 2;
2243	}
2244
2245	} else {
2246	// Not strict mode - simply dump the source into
2247	// the array at index 1 (the first highlightable block)
2248	$parts = array(
2249	0 => array(
2250	0 => '',
2251	1 => ''
2252	),
2253	1 => array(
2254	0 => null,
2255	1 => $code
2256	)
2257	);
2258	$num_parts = 2;
2259	}
2260
2261	//Unset variables we won't need any longer
2262	unset($code);
2263
2264	//Preload some repeatedly used values regarding hardquotes ...
2265	$hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2266	$hq_strlen = strlen($hq);
2267
2268	//Preload if line numbers are to be generated afterwards
2269	//Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2270	$check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS \|\|
2271	!empty($this->highlight_extra_lines) \|\| !$this->allow_multiline_span;
2272
2273	//preload the escape char for faster checking ...
2274	$escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2275
2276	// this is used for single-line comments
2277	$sc_disallowed_before = "";
2278	$sc_disallowed_after = "";
2279
2280	if (isset($this->language_data['PARSER_CONTROL'])) {
2281	if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2282	if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2283	$sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2284	}
2285	if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2286	$sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2287	}
2288	}
2289	}
2290
2291	//Fix for SF#1932083: Multichar Quotemarks unsupported
2292	$is_string_starter = array();
2293	if ($this->lexic_permissions['STRINGS']) {
2294	foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2295	if (!isset($is_string_starter[$quotemark[0]])) {
2296	$is_string_starter[$quotemark[0]] = (string)$quotemark;
2297	} else if (is_string($is_string_starter[$quotemark[0]])) {
2298	$is_string_starter[$quotemark[0]] = array(
2299	$is_string_starter[$quotemark[0]],
2300	$quotemark);
2301	} else {
2302	$is_string_starter[$quotemark[0]][] = $quotemark;
2303	}
2304	}
2305	}
2306
2307	// Now we go through each part. We know that even-indexed parts are
2308	// code that shouldn't be highlighted, and odd-indexed parts should
2309	// be highlighted
2310	for ($key = 0; $key < $num_parts; ++$key) {
2311	$STRICTATTRS = '';
2312
2313	// If this block should be highlighted...
2314	if (!($key & 1)) {
2315	// Else not a block to highlight
2316	$endresult .= $this->hsc($parts[$key][1]);
2317	unset($parts[$key]);
2318	continue;
2319	}
2320
2321	$result = '';
2322	$part = $parts[$key][1];
2323
2324	$highlight_part = true;
2325	if ($this->strict_mode && !is_null($parts[$key][0])) {
2326	// get the class key for this block of code
2327	$script_key = $parts[$key][0];
2328	$highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2329	if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2330	$this->lexic_permissions['SCRIPT']) {
2331	// Add a span element around the source to
2332	// highlight the overall source block
2333	if (!$this->use_classes &&
2334	$this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2335	$attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2336	} else {
2337	$attributes = ' class="sc' . $script_key . '"';
2338	}
2339	$result .= "<span$attributes>";
2340	$STRICTATTRS = $attributes;
2341	}
2342	}
2343
2344	if ($highlight_part) {
2345	// Now, highlight the code in this block. This code
2346	// is really the engine of GeSHi (along with the method
2347	// parse_non_string_part).
2348
2349	// cache comment regexps incrementally
2350	$next_comment_regexp_key = '';
2351	$next_comment_regexp_pos = -1;
2352	$next_comment_multi_pos = -1;
2353	$next_comment_single_pos = -1;
2354	$comment_regexp_cache_per_key = array();
2355	$comment_multi_cache_per_key = array();
2356	$comment_single_cache_per_key = array();
2357	$next_open_comment_multi = '';
2358	$next_comment_single_key = '';
2359	$escape_regexp_cache_per_key = array();
2360	$next_escape_regexp_key = '';
2361	$next_escape_regexp_pos = -1;
2362
2363	$length = strlen($part);
2364	for ($i = 0; $i < $length; ++$i) {
2365	// Get the next char
2366	$char = $part[$i];
2367	$char_len = 1;
2368
2369	// update regexp comment cache if needed
2370	if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2371	$next_comment_regexp_pos = $length;
2372	foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2373	$match_i = false;
2374	if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2375	($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i \|\|
2376	$comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2377	// we have already matched something
2378	if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2379	// this comment is never matched
2380	continue;
2381	}
2382	$match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2383	} else if (
2384	//This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2385	(GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) \|\|
2386	(!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2387	) {
2388	$match_i = $match[0][1];
2389	if (GESHI_PHP_PRE_433) {
2390	$match_i += $i;
2391	}
2392
2393	$comment_regexp_cache_per_key[$comment_key] = array(
2394	'key' => $comment_key,
2395	'length' => strlen($match[0][0]),
2396	'pos' => $match_i
2397	);
2398	} else {
2399	$comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2400	continue;
2401	}
2402
2403	if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2404	$next_comment_regexp_pos = $match_i;
2405	$next_comment_regexp_key = $comment_key;
2406	if ($match_i === $i) {
2407	break;
2408	}
2409	}
2410	}
2411	}
2412
2413	$string_started = false;
2414
2415	if (isset($is_string_starter[$char])) {
2416	// Possibly the start of a new string ...
2417
2418	//Check which starter it was ...
2419	//Fix for SF#1932083: Multichar Quotemarks unsupported
2420	if (is_array($is_string_starter[$char])) {
2421	$char_new = '';
2422	foreach ($is_string_starter[$char] as $testchar) {
2423	if ($testchar === substr($part, $i, strlen($testchar)) &&
2424	strlen($testchar) > strlen($char_new)) {
2425	$char_new = $testchar;
2426	$string_started = true;
2427	}
2428	}
2429	if ($string_started) {
2430	$char = $char_new;
2431	}
2432	} else {
2433	$testchar = $is_string_starter[$char];
2434	if ($testchar === substr($part, $i, strlen($testchar))) {
2435	$char = $testchar;
2436	$string_started = true;
2437	}
2438	}
2439	$char_len = strlen($char);
2440	}
2441
2442	if ($string_started && ($i != $next_comment_regexp_pos)) {
2443	// Hand out the correct style information for this string
2444	$string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2445	if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) \|\|
2446	!isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2447	$string_key = 0;
2448	}
2449
2450	// parse the stuff before this
2451	$result .= $this->parse_non_string_part($stuff_to_parse);
2452	$stuff_to_parse = '';
2453
2454	if (!$this->use_classes) {
2455	$string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2456	} else {
2457	$string_attributes = ' class="st'.$string_key.'"';
2458	}
2459
2460	// now handle the string
2461	$string = "<span$string_attributes>" . GeSHi::hsc($char);
2462	$start = $i + $char_len;
2463	$string_open = true;
2464
2465	if(empty($this->language_data['ESCAPE_REGEXP'])) {
2466	$next_escape_regexp_pos = $length;
2467	}
2468
2469	do {
2470	//Get the regular ending pos ...
2471	$close_pos = strpos($part, $char, $start);
2472	if(false === $close_pos) {
2473	$close_pos = $length;
2474	}
2475
2476	if($this->lexic_permissions['ESCAPE_CHAR']) {
2477	// update escape regexp cache if needed
2478	if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2479	$next_escape_regexp_pos = $length;
2480	foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2481	$match_i = false;
2482	if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2483	($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start \|\|
2484	$escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2485	// we have already matched something
2486	if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2487	// this comment is never matched
2488	continue;
2489	}
2490	$match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2491	} else if (
2492	//This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2493	(GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) \|\|
2494	(!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2495	) {
2496	$match_i = $match[0][1];
2497	if (GESHI_PHP_PRE_433) {
2498	$match_i += $start;
2499	}
2500
2501	$escape_regexp_cache_per_key[$escape_key] = array(
2502	'key' => $escape_key,
2503	'length' => strlen($match[0][0]),
2504	'pos' => $match_i
2505	);
2506	} else {
2507	$escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2508	continue;
2509	}
2510
2511	if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2512	$next_escape_regexp_pos = $match_i;
2513	$next_escape_regexp_key = $escape_key;
2514	if ($match_i === $start) {
2515	break;
2516	}
2517	}
2518	}
2519	}
2520
2521	//Find the next simple escape position
2522	if('' != $this->language_data['ESCAPE_CHAR']) {
2523	$simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2524	if(false === $simple_escape) {
2525	$simple_escape = $length;
2526	}
2527	} else {
2528	$simple_escape = $length;
2529	}
2530	} else {
2531	$next_escape_regexp_pos = $length;
2532	$simple_escape = $length;
2533	}
2534
2535	if($simple_escape < $next_escape_regexp_pos &&
2536	$simple_escape < $length &&
2537	$simple_escape < $close_pos) {
2538	//The nexxt escape sequence is a simple one ...
2539	$es_pos = $simple_escape;
2540
2541	//Add the stuff not in the string yet ...
2542	$string .= $this->hsc(substr($part, $start, $es_pos - $start));
2543
2544	//Get the style for this escaped char ...
2545	if (!$this->use_classes) {
2546	$escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2547	} else {
2548	$escape_char_attributes = ' class="es0"';
2549	}
2550
2551	//Add the style for the escape char ...
2552	$string .= "<span$escape_char_attributes>" .
2553	GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2554
2555	//Get the byte AFTER the ESCAPE_CHAR we just found
2556	$es_char = $part[$es_pos + 1];
2557	if ($es_char == "\n") {
2558	// don't put a newline around newlines
2559	$string .= "</span>\n";
2560	$start = $es_pos + 2;
2561	} else if (ord($es_char) >= 128) {
2562	//This is an non-ASCII char (UTF8 or single byte)
2563	//This code tries to work around SF#2037598 ...
2564	if(function_exists('mb_substr')) {
2565	$es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2566	$string .= $es_char_m . '</span>';
2567	} else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2568	if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2569	"\|\xE0[\xA0-\xBF][\x80-\xBF]".
2570	"\|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2571	"\|\xED[\x80-\x9F][\x80-\xBF]".
2572	"\|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2573	"\|[\xF1-\xF3][\x80-\xBF]{3}".
2574	"\|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2575	$part, $es_char_m, null, $es_pos + 1)) {
2576	$es_char_m = $es_char_m[0];
2577	} else {
2578	$es_char_m = $es_char;
2579	}
2580	$string .= $this->hsc($es_char_m) . '</span>';
2581	} else {
2582	$es_char_m = $this->hsc($es_char);
2583	}
2584	$start = $es_pos + strlen($es_char_m) + 1;
2585	} else {
2586	$string .= $this->hsc($es_char) . '</span>';
2587	$start = $es_pos + 2;
2588	}
2589	} else if ($next_escape_regexp_pos < $length &&
2590	$next_escape_regexp_pos < $close_pos) {
2591	$es_pos = $next_escape_regexp_pos;
2592	//Add the stuff not in the string yet ...
2593	$string .= $this->hsc(substr($part, $start, $es_pos - $start));
2594
2595	//Get the key and length of this match ...
2596	$escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2597	$escape_str = substr($part, $es_pos, $escape['length']);
2598	$escape_key = $escape['key'];
2599
2600	//Get the style for this escaped char ...
2601	if (!$this->use_classes) {
2602	$escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2603	} else {
2604	$escape_char_attributes = ' class="es' . $escape_key . '"';
2605	}
2606
2607	//Add the style for the escape char ...
2608	$string .= "<span$escape_char_attributes>" .
2609	$this->hsc($escape_str) . '</span>';
2610
2611	$start = $es_pos + $escape['length'];
2612	} else {
2613	//Copy the remainder of the string ...
2614	$string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2615	$start = $close_pos + $char_len;
2616	$string_open = false;
2617	}
2618	} while($string_open);
2619
2620	if ($check_linenumbers) {
2621	// Are line numbers used? If, we should end the string before
2622	// the newline and begin it again (so when <li>s are put in the source
2623	// remains XHTML compliant)
2624	// note to self: This opens up possibility of config files specifying
2625	// that languages can/cannot have multiline strings???
2626	$string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2627	}
2628
2629	$result .= $string;
2630	$string = '';
2631	$i = $start - 1;
2632	continue;
2633	} else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2634	substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2635	// The start of a hard quoted string
2636	if (!$this->use_classes) {
2637	$string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2638	$escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2639	} else {
2640	$string_attributes = ' class="st_h"';
2641	$escape_char_attributes = ' class="es_h"';
2642	}
2643	// parse the stuff before this
2644	$result .= $this->parse_non_string_part($stuff_to_parse);
2645	$stuff_to_parse = '';
2646
2647	// now handle the string
2648	$string = '';
2649
2650	// look for closing quote
2651	$start = $i + $hq_strlen;
2652	while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2653	$start = $close_pos + 1;
2654	if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2655	(($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2656	// make sure this quote is not escaped
2657	foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2658	if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2659	// check wether this quote is escaped or if it is something like '\\'
2660	$escape_char_pos = $close_pos - 1;
2661	while ($escape_char_pos > 0
2662	&& $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2663	--$escape_char_pos;
2664	}
2665	if (($close_pos - $escape_char_pos) & 1) {
2666	// uneven number of escape chars => this quote is escaped
2667	continue 2;
2668	}
2669	}
2670	}
2671	}
2672
2673	// found closing quote
2674	break;
2675	}
2676
2677	//Found the closing delimiter?
2678	if (!$close_pos) {
2679	// span till the end of this $part when no closing delimiter is found
2680	$close_pos = $length;
2681	}
2682
2683	//Get the actual string
2684	$string = substr($part, $i, $close_pos - $i + 1);
2685	$i = $close_pos;
2686
2687	// handle escape chars and encode html chars
2688	// (special because when we have escape chars within our string they may not be escaped)
2689	if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2690	$start = 0;
2691	$new_string = '';
2692	while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2693	// hmtl escape stuff before
2694	$new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2695	// check if this is a hard escape
2696	foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2697	if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2698	// indeed, this is a hardescape
2699	$new_string .= "<span$escape_char_attributes>" .
2700	$this->hsc($hardescape) . '</span>';
2701	$start = $es_pos + strlen($hardescape);
2702	continue 2;
2703	}
2704	}
2705	// not a hard escape, but a normal escape
2706	// they come in pairs of two
2707	$c = 0;
2708	while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2709	&& $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2710	&& $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2711	$c += 2;
2712	}
2713	if ($c) {
2714	$new_string .= "<span$escape_char_attributes>" .
2715	str_repeat($escaped_escape_char, $c) .
2716	'</span>';
2717	$start = $es_pos + $c;
2718	} else {
2719	// this is just a single lonely escape char...
2720	$new_string .= $escaped_escape_char;
2721	$start = $es_pos + 1;
2722	}
2723	}
2724	$string = $new_string . $this->hsc(substr($string, $start));
2725	} else {
2726	$string = $this->hsc($string);
2727	}
2728
2729	if ($check_linenumbers) {
2730	// Are line numbers used? If, we should end the string before
2731	// the newline and begin it again (so when <li>s are put in the source
2732	// remains XHTML compliant)
2733	// note to self: This opens up possibility of config files specifying
2734	// that languages can/cannot have multiline strings???
2735	$string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2736	}
2737
2738	$result .= "<span$string_attributes>" . $string . '</span>';
2739	$string = '';
2740	continue;
2741	} else {
2742	//Have a look for regexp comments
2743	if ($i == $next_comment_regexp_pos) {
2744	$COMMENT_MATCHED = true;
2745	$comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2746	$test_str = $this->hsc(substr($part, $i, $comment['length']));
2747
2748	//@todo If remove important do remove here
2749	if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2750	if (!$this->use_classes) {
2751	$attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2752	} else {
2753	$attributes = ' class="co' . $comment['key'] . '"';
2754	}
2755
2756	$test_str = "<span$attributes>" . $test_str . "</span>";
2757
2758	// Short-cut through all the multiline code
2759	if ($check_linenumbers) {
2760	// strreplace to put close span and open span around multiline newlines
2761	$test_str = str_replace(
2762	"\n", "</span>\n<span$attributes>",
2763	str_replace("\n ", "\n ", $test_str)
2764	);
2765	}
2766	}
2767
2768	$i += $comment['length'] - 1;
2769
2770	// parse the rest
2771	$result .= $this->parse_non_string_part($stuff_to_parse);
2772	$stuff_to_parse = '';
2773	}
2774
2775	// If we haven't matched a regexp comment, try multi-line comments
2776	if (!$COMMENT_MATCHED) {
2777	// Is this a multiline comment?
2778	if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2779	$next_comment_multi_pos = $length;
2780	foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2781	$match_i = false;
2782	if (isset($comment_multi_cache_per_key[$open]) &&
2783	($comment_multi_cache_per_key[$open] >= $i \|\|
2784	$comment_multi_cache_per_key[$open] === false)) {
2785	// we have already matched something
2786	if ($comment_multi_cache_per_key[$open] === false) {
2787	// this comment is never matched
2788	continue;
2789	}
2790	$match_i = $comment_multi_cache_per_key[$open];
2791	} else if (($match_i = stripos($part, $open, $i)) !== false) {
2792	$comment_multi_cache_per_key[$open] = $match_i;
2793	} else {
2794	$comment_multi_cache_per_key[$open] = false;
2795	continue;
2796	}
2797	if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2798	$next_comment_multi_pos = $match_i;
2799	$next_open_comment_multi = $open;
2800	if ($match_i === $i) {
2801	break;
2802	}
2803	}
2804	}
2805	}
2806	if ($i == $next_comment_multi_pos) {
2807	$open = $next_open_comment_multi;
2808	$close = $this->language_data['COMMENT_MULTI'][$open];
2809	$open_strlen = strlen($open);
2810	$close_strlen = strlen($close);
2811	$COMMENT_MATCHED = true;
2812	$test_str_match = $open;
2813	//@todo If remove important do remove here
2814	if ($this->lexic_permissions['COMMENTS']['MULTI'] \|\|
2815	$open == GESHI_START_IMPORTANT) {
2816	if ($open != GESHI_START_IMPORTANT) {
2817	if (!$this->use_classes) {
2818	$attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2819	} else {
2820	$attributes = ' class="coMULTI"';
2821	}
2822	$test_str = "<span$attributes>" . $this->hsc($open);
2823	} else {
2824	if (!$this->use_classes) {
2825	$attributes = ' style="' . $this->important_styles . '"';
2826	} else {
2827	$attributes = ' class="imp"';
2828	}
2829
2830	// We don't include the start of the comment if it's an
2831	// "important" part
2832	$test_str = "<span$attributes>";
2833	}
2834	} else {
2835	$test_str = $this->hsc($open);
2836	}
2837
2838	$close_pos = strpos( $part, $close, $i + $open_strlen );
2839
2840	if ($close_pos === false) {
2841	$close_pos = $length;
2842	}
2843
2844	// Short-cut through all the multiline code
2845	$rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2846	if (($this->lexic_permissions['COMMENTS']['MULTI'] \|\|
2847	$test_str_match == GESHI_START_IMPORTANT) &&
2848	$check_linenumbers) {
2849
2850	// strreplace to put close span and open span around multiline newlines
2851	$test_str .= str_replace(
2852	"\n", "</span>\n<span$attributes>",
2853	str_replace("\n ", "\n ", $rest_of_comment)
2854	);
2855	} else {
2856	$test_str .= $rest_of_comment;
2857	}
2858
2859	if ($this->lexic_permissions['COMMENTS']['MULTI'] \|\|
2860	$test_str_match == GESHI_START_IMPORTANT) {
2861	$test_str .= '</span>';
2862	}
2863
2864	$i = $close_pos + $close_strlen - 1;
2865
2866	// parse the rest
2867	$result .= $this->parse_non_string_part($stuff_to_parse);
2868	$stuff_to_parse = '';
2869	}
2870	}
2871
2872	// If we haven't matched a multiline comment, try single-line comments
2873	if (!$COMMENT_MATCHED) {
2874	// cache potential single line comment occurances
2875	if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2876	$next_comment_single_pos = $length;
2877	foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2878	$match_i = false;
2879	if (isset($comment_single_cache_per_key[$comment_key]) &&
2880	($comment_single_cache_per_key[$comment_key] >= $i \|\|
2881	$comment_single_cache_per_key[$comment_key] === false)) {
2882	// we have already matched something
2883	if ($comment_single_cache_per_key[$comment_key] === false) {
2884	// this comment is never matched
2885	continue;
2886	}
2887	$match_i = $comment_single_cache_per_key[$comment_key];
2888	} else if (
2889	// case sensitive comments
2890	($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2891	($match_i = stripos($part, $comment_mark, $i)) !== false) \|\|
2892	// non case sensitive
2893	(!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2894	(($match_i = strpos($part, $comment_mark, $i)) !== false))) {
2895	$comment_single_cache_per_key[$comment_key] = $match_i;
2896	} else {
2897	$comment_single_cache_per_key[$comment_key] = false;
2898	continue;
2899	}
2900	if ($match_i !== false && $match_i < $next_comment_single_pos) {
2901	$next_comment_single_pos = $match_i;
2902	$next_comment_single_key = $comment_key;
2903	if ($match_i === $i) {
2904	break;
2905	}
2906	}
2907	}
2908	}
2909	if ($next_comment_single_pos == $i) {
2910	$comment_key = $next_comment_single_key;
2911	$comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
2912	$com_len = strlen($comment_mark);
2913
2914	// This check will find special variables like $# in bash
2915	// or compiler directives of Delphi beginning {$
2916	if ((empty($sc_disallowed_before) \|\| ($i == 0) \|\|
2917	(false === strpos($sc_disallowed_before, $part[$i-1]))) &&
2918	(empty($sc_disallowed_after) \|\| ($length <= $i + $com_len) \|\|
2919	(false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
2920	{
2921	// this is a valid comment
2922	$COMMENT_MATCHED = true;
2923	if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2924	if (!$this->use_classes) {
2925	$attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
2926	} else {
2927	$attributes = ' class="co' . $comment_key . '"';
2928	}
2929	$test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
2930	} else {
2931	$test_str = $this->hsc($comment_mark);
2932	}
2933
2934	//Check if this comment is the last in the source
2935	$close_pos = strpos($part, "\n", $i);
2936	$oops = false;
2937	if ($close_pos === false) {
2938	$close_pos = $length;
2939	$oops = true;
2940	}
2941	$test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
2942	if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2943	$test_str .= "</span>";
2944	}
2945
2946	// Take into account that the comment might be the last in the source
2947	if (!$oops) {
2948	$test_str .= "\n";
2949	}
2950
2951	$i = $close_pos;
2952
2953	// parse the rest
2954	$result .= $this->parse_non_string_part($stuff_to_parse);
2955	$stuff_to_parse = '';
2956	}
2957	}
2958	}
2959	}
2960
2961	// Where are we adding this char?
2962	if (!$COMMENT_MATCHED) {
2963	$stuff_to_parse .= $char;
2964	} else {
2965	$result .= $test_str;
2966	unset($test_str);
2967	$COMMENT_MATCHED = false;
2968	}
2969	}
2970	// Parse the last bit
2971	$result .= $this->parse_non_string_part($stuff_to_parse);
2972	$stuff_to_parse = '';
2973	} else {
2974	$result .= $this->hsc($part);
2975	}
2976	// Close the <span> that surrounds the block
2977	if ($STRICTATTRS != '') {
2978	$result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
2979	$result .= '</span>';
2980	}
2981
2982	$endresult .= $result;
2983	unset($part, $parts[$key], $result);
2984	}
2985
2986	//This fix is related to SF#1923020, but has to be applied regardless of
2987	//actually highlighting symbols.
2988	/** NOTE: memorypeak #3 */
2989	$endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '\|'), $endresult);
2990
2991	// // Parse the last stuff (redundant?)
2992	// $result .= $this->parse_non_string_part($stuff_to_parse);
2993
2994	// Lop off the very first and last spaces
2995	// $result = substr($result, 1, -1);
2996
2997	// We're finished: stop timing
2998	$this->set_time($start_time, microtime());
2999
3000	$this->finalise($endresult);
3001	return $endresult;
3002	}
3003
3004	/**
3005	* Swaps out spaces and tabs for HTML indentation. Not needed if
3006	* the code is in a pre block...
3007	*
3008	* @param string The source to indent (reference!)
3009	* @since 1.0.0
3010	* @access private
3011	*/
3012	function indent(&$result) {
3013	/// Replace tabs with the correct number of spaces
3014	if (false !== strpos($result, "\t")) {
3015	$lines = explode("\n", $result);
3016	$result = null;//Save memory while we process the lines individually
3017	$tab_width = $this->get_real_tab_width();
3018	$tab_string = ' ' . str_repeat(' ', $tab_width);
3019
3020	for ($key = 0, $n = count($lines); $key < $n; $key++) {
3021	$line = $lines[$key];
3022	if (false === strpos($line, "\t")) {
3023	continue;
3024	}
3025
3026	$pos = 0;
3027	$length = strlen($line);
3028	$lines[$key] = ''; // reduce memory
3029
3030	$IN_TAG = false;
3031	for ($i = 0; $i < $length; ++$i) {
3032	$char = $line[$i];
3033	// Simple engine to work out whether we're in a tag.
3034	// If we are we modify $pos. This is so we ignore HTML
3035	// in the line and only workout the tab replacement
3036	// via the actual content of the string
3037	// This test could be improved to include strings in the
3038	// html so that < or > would be allowed in user's styles
3039	// (e.g. quotes: '<' '>'; or similar)
3040	if ($IN_TAG) {
3041	if ('>' == $char) {
3042	$IN_TAG = false;
3043	}
3044	$lines[$key] .= $char;
3045	} else if ('<' == $char) {
3046	$IN_TAG = true;
3047	$lines[$key] .= '<';
3048	} else if ('&' == $char) {
3049	$substr = substr($line, $i + 3, 5);
3050	$posi = strpos($substr, ';');
3051	if (false === $posi) {
3052	++$pos;
3053	} else {
3054	$pos -= $posi+2;
3055	}
3056	$lines[$key] .= $char;
3057	} else if ("\t" == $char) {
3058	$str = '';
3059	// OPTIMISE - move $strs out. Make an array:
3060	// $tabs = array(
3061	// 1 => ' ',
3062	// 2 => '  ',
3063	// 3 => '   ' etc etc
3064	// to use instead of building a string every time
3065	$tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3066	if (($pos & 1) \|\| 1 == $tab_end_width) {
3067	$str .= substr($tab_string, 6, $tab_end_width);
3068	} else {
3069	$str .= substr($tab_string, 0, $tab_end_width+5);
3070	}
3071	$lines[$key] .= $str;
3072	$pos += $tab_end_width;
3073
3074	if (false === strpos($line, "\t", $i + 1)) {
3075	$lines[$key] .= substr($line, $i + 1);
3076	break;
3077	}
3078	} else if (0 == $pos && ' ' == $char) {
3079	$lines[$key] .= ' ';
3080	++$pos;
3081	} else {
3082	$lines[$key] .= $char;
3083	++$pos;
3084	}
3085	}
3086	}
3087	$result = implode("\n", $lines);
3088	unset($lines);//We don't need the lines separated beyond this --- free them!
3089	}
3090	// Other whitespace
3091	// BenBE: Fix to reduce the number of replacements to be done
3092	$result = preg_replace('/^ /m', ' ', $result);
3093	$result = str_replace(' ', '  ', $result);
3094
3095	if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3096	if ($this->line_ending === null) {
3097	$result = nl2br($result);
3098	} else {
3099	$result = str_replace("\n", $this->line_ending, $result);
3100	}
3101	}
3102	}
3103
3104	/**
3105	* Changes the case of a keyword for those languages where a change is asked for
3106	*
3107	* @param string The keyword to change the case of
3108	* @return string The keyword with its case changed
3109	* @since 1.0.0
3110	* @access private
3111	*/
3112	function change_case($instr) {
3113	switch ($this->language_data['CASE_KEYWORDS']) {
3114	case GESHI_CAPS_UPPER:
3115	return strtoupper($instr);
3116	case GESHI_CAPS_LOWER:
3117	return strtolower($instr);
3118	default:
3119	return $instr;
3120	}
3121	}
3122
3123	/**
3124	* Handles replacements of keywords to include markup and links if requested
3125	*
3126	* @param string The keyword to add the Markup to
3127	* @return The HTML for the match found
3128	* @since 1.0.8
3129	* @access private
3130	*
3131	* @todo Get rid of ender in keyword links
3132	*/
3133	function handle_keyword_replace($match) {
3134	$k = $this->_kw_replace_group;
3135	$keyword = $match[0];
3136
3137	$before = '';
3138	$after = '';
3139
3140	if ($this->keyword_links) {
3141	// Keyword links have been ebabled
3142
3143	if (isset($this->language_data['URLS'][$k]) &&
3144	$this->language_data['URLS'][$k] != '') {
3145	// There is a base group for this keyword
3146
3147	// Old system: strtolower
3148	//$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3149	// New system: get keyword from language file to get correct case
3150	if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3151	strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3152	foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3153	if (strcasecmp($word, $keyword) == 0) {
3154	break;
3155	}
3156	}
3157	} else {
3158	$word = $keyword;
3159	}
3160
3161	$before = '<\|UR1\|"' .
3162	str_replace(
3163	array(
3164	'{FNAME}',
3165	'{FNAMEL}',
3166	'{FNAMEU}',
3167	'.'),
3168	array(
3169	str_replace('+', '%20', urlencode($this->hsc($word))),
3170	str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3171	str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3172	'<DOT>'),
3173	$this->language_data['URLS'][$k]
3174	) . '">';
3175	$after = '</a>';
3176	}
3177	}
3178
3179	return $before . '<\|/'. $k .'/>' . $this->change_case($keyword) . '\|>' . $after;
3180	}
3181
3182	/**
3183	* handles regular expressions highlighting-definitions with callback functions
3184	*
3185	* @note this is a callback, don't use it directly
3186	*
3187	* @param array the matches array
3188	* @return The highlighted string
3189	* @since 1.0.8
3190	* @access private
3191	*/
3192	function handle_regexps_callback($matches) {
3193	// before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1\|>'",
3194	return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '\|>';
3195	}
3196
3197	/**
3198	* handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3199	*
3200	* @note this is a callback, don't use it directly
3201	*
3202	* @param array the matches array
3203	* @return string
3204	* @since 1.0.8
3205	* @access private
3206	*/
3207	function handle_multiline_regexps($matches) {
3208	$before = $this->_hmr_before;
3209	$after = $this->_hmr_after;
3210	if ($this->_hmr_replace) {
3211	$replace = $this->_hmr_replace;
3212	$search = array();
3213
3214	foreach (array_keys($matches) as $k) {
3215	$search[] = '\\' . $k;
3216	}
3217
3218	$before = str_replace($search, $matches, $before);
3219	$after = str_replace($search, $matches, $after);
3220	$replace = str_replace($search, $matches, $replace);
3221	} else {
3222	$replace = $matches[0];
3223	}
3224	return $before
3225	. '<\|!REG3XP' . $this->_hmr_key .'!>'
3226	. str_replace("\n", "\|>\n<\|!REG3XP" . $this->_hmr_key . '!>', $replace)
3227	. '\|>'
3228	. $after;
3229	}
3230
3231	/**
3232	* Takes a string that has no strings or comments in it, and highlights
3233	* stuff like keywords, numbers and methods.
3234	*
3235	* @param string The string to parse for keyword, numbers etc.
3236	* @since 1.0.0
3237	* @access private
3238	* @todo BUGGY! Why? Why not build string and return?
3239	*/
3240	function parse_non_string_part($stuff_to_parse) {
3241	$stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3242
3243	// Highlight keywords
3244	$disallowed_before = "(?<![a-zA-Z0-9\$_\\|\#;>\|^&";
3245	$disallowed_after = "(?![a-zA-Z0-9_\\|%\\-&;";
3246	if ($this->lexic_permissions['STRINGS']) {
3247	$quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3248	$disallowed_before .= $quotemarks;
3249	$disallowed_after .= $quotemarks;
3250	}
3251	$disallowed_before .= "])";
3252	$disallowed_after .= "])";
3253
3254	$parser_control_pergroup = false;
3255	if (isset($this->language_data['PARSER_CONTROL'])) {
3256	if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3257	$x = 0; // check wether per-keyword-group parser_control is enabled
3258	if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3259	$disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3260	++$x;
3261	}
3262	if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3263	$disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3264	++$x;
3265	}
3266	$parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3267	}
3268	}
3269
3270	foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3271	if (!isset($this->lexic_permissions['KEYWORDS'][$k]) \|\|
3272	$this->lexic_permissions['KEYWORDS'][$k]) {
3273
3274	$case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3275	$modifiers = $case_sensitive ? '' : 'i';
3276
3277	// NEW in 1.0.8 - per-keyword-group parser control
3278	$disallowed_before_local = $disallowed_before;
3279	$disallowed_after_local = $disallowed_after;
3280	if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3281	if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3282	$disallowed_before_local =
3283	$this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3284	}
3285
3286	if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3287	$disallowed_after_local =
3288	$this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3289	}
3290	}
3291
3292	$this->_kw_replace_group = $k;
3293
3294	//NEW in 1.0.8, the cached regexp list
3295	// since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3296	for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) {
3297	$keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3298	// Might make a more unique string for putting the number in soon
3299	// Basically, we don't put the styles in yet because then the styles themselves will
3300	// get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3301	$stuff_to_parse = preg_replace_callback(
3302	"/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm\|php))$disallowed_after_local/$modifiers",
3303	array($this, 'handle_keyword_replace'),
3304	$stuff_to_parse
3305	);
3306	}
3307	}
3308	}
3309
3310	// Regular expressions
3311	foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3312	if ($this->lexic_permissions['REGEXPS'][$key]) {
3313	if (is_array($regexp)) {
3314	if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3315	// produce valid HTML when we match multiple lines
3316	$this->_hmr_replace = $regexp[GESHI_REPLACE];
3317	$this->_hmr_before = $regexp[GESHI_BEFORE];
3318	$this->_hmr_key = $key;
3319	$this->_hmr_after = $regexp[GESHI_AFTER];
3320	$stuff_to_parse = preg_replace_callback(
3321	"/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3322	array($this, 'handle_multiline_regexps'),
3323	$stuff_to_parse);
3324	$this->_hmr_replace = false;
3325	$this->_hmr_before = '';
3326	$this->_hmr_after = '';
3327	} else {
3328	$stuff_to_parse = preg_replace(
3329	'/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3330	$regexp[GESHI_BEFORE] . '<\|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '\|>' . $regexp[GESHI_AFTER],
3331	$stuff_to_parse);
3332	}
3333	} else {
3334	if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3335	// produce valid HTML when we match multiple lines
3336	$this->_hmr_key = $key;
3337	$stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3338	array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3339	$this->_hmr_key = '';
3340	} else {
3341	$stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<\|!REG3XP$key!>\\1\|>", $stuff_to_parse);
3342	}
3343	}
3344	}
3345	}
3346
3347	// Highlight numbers. As of 1.0.8 we support different types of numbers
3348	$numbers_found = false;
3349	if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) {
3350	$numbers_found = true;
3351
3352	//For each of the formats ...
3353	foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3354	//Check if it should be highlighted ...
3355	$stuff_to_parse = preg_replace($regexp, "<\|/NUM!$id/>\\1\|>", $stuff_to_parse);
3356	}
3357	}
3358
3359	//
3360	// Now that's all done, replace /[number]/ with the correct styles
3361	//
3362	foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3363	if (!$this->use_classes) {
3364	$attributes = ' style="' .
3365	(isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3366	$this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3367	} else {
3368	$attributes = ' class="kw' . $k . '"';
3369	}
3370	$stuff_to_parse = str_replace("<\|/$k/>", "<\|$attributes>", $stuff_to_parse);
3371	}
3372
3373	if ($numbers_found) {
3374	// Put number styles in
3375	foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3376	//Commented out for now, as this needs some review ...
3377	// if ($numbers_permissions & $id) {
3378	//Get the appropriate style ...
3379	//Checking for unset styles is done by the style cache builder ...
3380	if (!$this->use_classes) {
3381	$attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3382	} else {
3383	$attributes = ' class="nu'.$id.'"';
3384	}
3385
3386	//Set in the correct styles ...
3387	$stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3388	// }
3389	}
3390	}
3391
3392	// Highlight methods and fields in objects
3393	if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3394	$oolang_spaces = "[\s]*";
3395	$oolang_before = "";
3396	$oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3397	if (isset($this->language_data['PARSER_CONTROL'])) {
3398	if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3399	if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3400	$oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3401	}
3402	if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3403	$oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3404	}
3405	if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3406	$oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3407	}
3408	}
3409	}
3410
3411	foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3412	if (false !== strpos($stuff_to_parse, $splitter)) {
3413	if (!$this->use_classes) {
3414	$attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3415	} else {
3416	$attributes = ' class="me' . $key . '"';
3417	}
3418	$stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<\|$attributes>\\4\|>", $stuff_to_parse);
3419	}
3420	}
3421	}
3422
3423	//
3424	// Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3425	// You try it, and see what happens ;)
3426	// TODO: Fix lexic permissions not converting entities if shouldn't
3427	// be highlighting regardless
3428	//
3429	if ($this->lexic_permissions['BRACKETS']) {
3430	$stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3431	$this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3432	}
3433
3434
3435	//FIX for symbol highlighting ...
3436	if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3437	//Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3438	$n_symbols = preg_match_all("/<\\|(?:<DOT>\|[^>])+>(?:(?!\\|>).*?)\\|>\|<\/a>\|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE \| PREG_SET_ORDER);
3439	$global_offset = 0;
3440	for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3441	$symbol_match = $pot_symbols[$s_id][0][0];
3442	if (strpos($symbol_match, '<') !== false \|\| strpos($symbol_match, '>') !== false) {
3443	// already highlighted blocks _must_ include either < or >
3444	// so if this conditional applies, we have to skip this match
3445	// BenBE: UNLESS the block contains <SEMI> or <PIPE>
3446	if(strpos($symbol_match, '<SEMI>') === false &&
3447	strpos($symbol_match, '<PIPE>') === false) {
3448	continue;
3449	}
3450	}
3451
3452	// if we reach this point, we have a valid match which needs to be highlighted
3453
3454	$symbol_length = strlen($symbol_match);
3455	$symbol_offset = $pot_symbols[$s_id][0][1];
3456	unset($pot_symbols[$s_id]);
3457	$symbol_end = $symbol_length + $symbol_offset;
3458	$symbol_hl = "";
3459
3460	// if we have multiple styles, we have to handle them properly
3461	if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3462	$old_sym = -1;
3463	// Split the current stuff to replace into its atomic symbols ...
3464	preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3465	foreach ($sym_match_syms[0] as $sym_ms) {
3466	//Check if consequtive symbols belong to the same group to save output ...
3467	if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3468	&& ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3469	if (-1 != $old_sym) {
3470	$symbol_hl .= "\|>";
3471	}
3472	$old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3473	if (!$this->use_classes) {
3474	$symbol_hl .= '<\| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3475	} else {
3476	$symbol_hl .= '<\| class="sy' . $old_sym . '">';
3477	}
3478	}
3479	$symbol_hl .= $sym_ms;
3480	}
3481	unset($sym_match_syms);
3482
3483	//Close remaining tags and insert the replacement at the right position ...
3484	//Take caution if symbol_hl is empty to avoid doubled closing spans.
3485	if (-1 != $old_sym) {
3486	$symbol_hl .= "\|>";
3487	}
3488	} else {
3489	if (!$this->use_classes) {
3490	$symbol_hl = '<\| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3491	} else {
3492	$symbol_hl = '<\| class="sy0">';
3493	}
3494	$symbol_hl .= $symbol_match . '\|>';
3495	}
3496
3497	$stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3498
3499	// since we replace old text with something of different size,
3500	// we'll have to keep track of the differences
3501	$global_offset += strlen($symbol_hl) - $symbol_length;
3502	}
3503	}
3504	//FIX for symbol highlighting ...
3505
3506	// Add class/style for regexps
3507	foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3508	if ($this->lexic_permissions['REGEXPS'][$key]) {
3509	if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3510	$this->_rx_key = $key;
3511	$stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\\|>/U",
3512	array($this, 'handle_regexps_callback'),
3513	$stuff_to_parse);
3514	} else {
3515	if (!$this->use_classes) {
3516	$attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3517	} else {
3518	if (is_array($this->language_data['REGEXPS'][$key]) &&
3519	array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3520	$attributes = ' class="' .
3521	$this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3522	} else {
3523	$attributes = ' class="re' . $key . '"';
3524	}
3525	}
3526	$stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3527	}
3528	}
3529	}
3530
3531	// Replace <DOT> with . for urls
3532	$stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3533	// Replace <\|UR1\| with <a href= for urls also
3534	if (isset($this->link_styles[GESHI_LINK])) {
3535	if ($this->use_classes) {
3536	$stuff_to_parse = str_replace('<\|UR1\|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3537	} else {
3538	$stuff_to_parse = str_replace('<\|UR1\|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3539	}
3540	} else {
3541	$stuff_to_parse = str_replace('<\|UR1\|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3542	}
3543
3544	//
3545	// NOW we add the span thingy ;)
3546	//
3547
3548	$stuff_to_parse = str_replace('<\|', '<span', $stuff_to_parse);
3549	$stuff_to_parse = str_replace ( '\|>', '</span>', $stuff_to_parse );
3550	return substr($stuff_to_parse, 1);
3551	}
3552
3553	/**
3554	* Sets the time taken to parse the code
3555	*
3556	* @param microtime The time when parsing started
3557	* @param microtime The time when parsing ended
3558	* @since 1.0.2
3559	* @access private
3560	*/
3561	function set_time($start_time, $end_time) {
3562	$start = explode(' ', $start_time);
3563	$end = explode(' ', $end_time);
3564	$this->time = $end[0] + $end[1] - $start[0] - $start[1];
3565	}
3566
3567	/**
3568	* Gets the time taken to parse the code
3569	*
3570	* @return double The time taken to parse the code
3571	* @since 1.0.2
3572	*/
3573	function get_time() {
3574	return $this->time;
3575	}
3576
3577	/**
3578	* Merges arrays recursively, overwriting values of the first array with values of later arrays
3579	*
3580	* @since 1.0.8
3581	* @access private
3582	*/
3583	function merge_arrays() {
3584	$arrays = func_get_args();
3585	$narrays = count($arrays);
3586
3587	// check arguments
3588	// comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3589	for ($i = 0; $i < $narrays; $i ++) {
3590	if (!is_array($arrays[$i])) {
3591	// also array_merge_recursive returns nothing in this case
3592	trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3593	return false;
3594	}
3595	}
3596
3597	// the first array is in the output set in every case
3598	$ret = $arrays[0];
3599
3600	// merege $ret with the remaining arrays
3601	for ($i = 1; $i < $narrays; $i ++) {
3602	foreach ($arrays[$i] as $key => $value) {
3603	if (is_array($value) && isset($ret[$key])) {
3604	// if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3605	// in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3606	$ret[$key] = $this->merge_arrays($ret[$key], $value);
3607	} else {
3608	$ret[$key] = $value;
3609	}
3610	}
3611	}
3612
3613	return $ret;
3614	}
3615
3616	/**
3617	* Gets language information and stores it for later use
3618	*
3619	* @param string The filename of the language file you want to load
3620	* @since 1.0.0
3621	* @access private
3622	* @todo Needs to load keys for lexic permissions for keywords, regexps etc
3623	*/
3624	function load_language($file_name) {
3625	if ($file_name == $this->loaded_language) {
3626	// this file is already loaded!
3627	return;
3628	}
3629
3630	//Prepare some stuff before actually loading the language file
3631	$this->loaded_language = $file_name;
3632	$this->parse_cache_built = false;
3633	$this->enable_highlighting();
3634	$language_data = array();
3635
3636	//Load the language file
3637	require $file_name;
3638
3639	// Perhaps some checking might be added here later to check that
3640	// $language data is a valid thing but maybe not
3641	$this->language_data = $language_data;
3642
3643	// Set strict mode if should be set
3644	$this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3645
3646	// Set permissions for all lexics to true
3647	// so they'll be highlighted by default
3648	foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3649	if (!empty($this->language_data['KEYWORDS'][$key])) {
3650	$this->lexic_permissions['KEYWORDS'][$key] = true;
3651	} else {
3652	$this->lexic_permissions['KEYWORDS'][$key] = false;
3653	}
3654	}
3655
3656	foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3657	$this->lexic_permissions['COMMENTS'][$key] = true;
3658	}
3659	foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3660	$this->lexic_permissions['REGEXPS'][$key] = true;
3661	}
3662
3663	// for BenBE and future code reviews:
3664	// we can use empty here since we only check for existance and emptiness of an array
3665	// if it is not an array at all but rather false or null this will work as intended as well
3666	// even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3667	if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3668	foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3669	// it's either true or false and maybe is true as well
3670	$perm = $value !== GESHI_NEVER;
3671	if ($flag == 'ALL') {
3672	$this->enable_highlighting($perm);
3673	continue;
3674	}
3675	if (!isset($this->lexic_permissions[$flag])) {
3676	// unknown lexic permission
3677	continue;
3678	}
3679	if (is_array($this->lexic_permissions[$flag])) {
3680	foreach ($this->lexic_permissions[$flag] as $key => $val) {
3681	$this->lexic_permissions[$flag][$key] = $perm;
3682	}
3683	} else {
3684	$this->lexic_permissions[$flag] = $perm;
3685	}
3686	}
3687	unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3688	}
3689
3690	//Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3691	//You need to set one for HARDESCAPES only in this case.
3692	if(!isset($this->language_data['HARDCHAR'])) {
3693	$this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3694	}
3695
3696	//NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3697	$style_filename = substr($file_name, 0, -4) . '.style.php';
3698	if (is_readable($style_filename)) {
3699	//Clear any style_data that could have been set before ...
3700	if (isset($style_data)) {
3701	unset($style_data);
3702	}
3703
3704	//Read the Style Information from the style file
3705	include $style_filename;
3706
3707	//Apply the new styles to our current language styles
3708	if (isset($style_data) && is_array($style_data)) {
3709	$this->language_data['STYLES'] =
3710	$this->merge_arrays($this->language_data['STYLES'], $style_data);
3711	}
3712	}
3713	}
3714
3715	/**
3716	* Takes the parsed code and various options, and creates the HTML
3717	* surrounding it to make it look nice.
3718	*
3719	* @param string The code already parsed (reference!)
3720	* @since 1.0.0
3721	* @access private
3722	*/
3723	function finalise(&$parsed_code) {
3724	// Remove end parts of important declarations
3725	// This is BUGGY!! My fault for bad code: fix coming in 1.2
3726	// @todo Remove this crap
3727	if ($this->enable_important_blocks &&
3728	(strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3729	$parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3730	}
3731
3732	// Add HTML whitespace stuff if we're using the <div> header
3733	if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3734	$this->indent($parsed_code);
3735	}
3736
3737	// purge some unnecessary stuff
3738	/** NOTE: memorypeak #1 */
3739	$parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3740
3741	// If we are using IDs for line numbers, there needs to be an overall
3742	// ID set to prevent collisions.
3743	if ($this->add_ids && !$this->overall_id) {
3744	$this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3745	}
3746
3747	// Get code into lines
3748	/** NOTE: memorypeak #2 */
3749	$code = explode("\n", $parsed_code);
3750	$parsed_code = $this->header();
3751
3752	// If we're using line numbers, we insert <li>s and appropriate
3753	// markup to style them (otherwise we don't need to do anything)
3754	if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3755	// If we're using the <pre> header, we shouldn't add newlines because
3756	// the <pre> will line-break them (and the <li>s already do this for us)
3757	$ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3758
3759	// Set vars to defaults for following loop
3760	$i = 0;
3761
3762	// Foreach line...
3763	for ($i = 0, $n = count($code); $i < $n;) {
3764	//Reset the attributes for a new line ...
3765	$attrs = array();
3766
3767	// Make lines have at least one space in them if they're empty
3768	// BenBE: Checking emptiness using trim instead of relying on blanks
3769	if ('' == trim($code[$i])) {
3770	$code[$i] = ' ';
3771	}
3772
3773	// If this is a "special line"...
3774	if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3775	$i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3776	// Set the attributes to style the line
3777	if ($this->use_classes) {
3778	//$attr = ' class="li2"';
3779	$attrs['class'][] = 'li2';
3780	$def_attr = ' class="de2"';
3781	} else {
3782	//$attr = ' style="' . $this->line_style2 . '"';
3783	$attrs['style'][] = $this->line_style2;
3784	// This style "covers up" the special styles set for special lines
3785	// so that styles applied to special lines don't apply to the actual
3786	// code on that line
3787	$def_attr = ' style="' . $this->code_style . '"';
3788	}
3789	} else {
3790	if ($this->use_classes) {
3791	//$attr = ' class="li1"';
3792	$attrs['class'][] = 'li1';
3793	$def_attr = ' class="de1"';
3794	} else {
3795	//$attr = ' style="' . $this->line_style1 . '"';
3796	$attrs['style'][] = $this->line_style1;
3797	$def_attr = ' style="' . $this->code_style . '"';
3798	}
3799	}
3800
3801	//Check which type of tag to insert for this line
3802	if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3803	$start = "<pre$def_attr>";
3804	$end = '</pre>';
3805	} else {
3806	// Span or div?
3807	$start = "<div$def_attr>";
3808	$end = '</div>';
3809	}
3810
3811	++$i;
3812
3813	// Are we supposed to use ids? If so, add them
3814	if ($this->add_ids) {
3815	$attrs['id'][] = "$this->overall_id-$i";
3816	}
3817
3818	//Is this some line with extra styles???
3819	if (in_array($i, $this->highlight_extra_lines)) {
3820	if ($this->use_classes) {
3821	if (isset($this->highlight_extra_lines_styles[$i])) {
3822	$attrs['class'][] = "lx$i";
3823	} else {
3824	$attrs['class'][] = "ln-xtra";
3825	}
3826	} else {
3827	array_push($attrs['style'], $this->get_line_style($i));
3828	}
3829	}
3830
3831	// Add in the line surrounded by appropriate list HTML
3832	$attr_string = '';
3833	foreach ($attrs as $key => $attr) {
3834	$attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3835	}
3836
3837	$parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3838	unset($code[$i - 1]);
3839	}
3840	} else {
3841	$n = count($code);
3842	if ($this->use_classes) {
3843	$attributes = ' class="de1"';
3844	} else {
3845	$attributes = ' style="'. $this->code_style .'"';
3846	}
3847	if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3848	$parsed_code .= '<pre'. $attributes .'>';
3849	} elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3850	if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3851	if ($this->use_classes) {
3852	$attrs = ' class="ln"';
3853	} else {
3854	$attrs = ' style="'. $this->table_linenumber_style .'"';
3855	}
3856	$parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3857	// get linenumbers
3858	// we don't merge it with the for below, since it should be better for
3859	// memory consumption this way
3860	// @todo: but... actually it would still be somewhat nice to merge the two loops
3861	// the mem peaks are at different positions
3862	for ($i = 0; $i < $n; ++$i) {
3863	$close = 0;
3864	// fancy lines
3865	if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3866	$i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3867	// Set the attributes to style the line
3868	if ($this->use_classes) {
3869	$parsed_code .= '<span class="xtra li2"><span class="de2">';
3870	} else {
3871	// This style "covers up" the special styles set for special lines
3872	// so that styles applied to special lines don't apply to the actual
3873	// code on that line
3874	$parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3875	.'<span style="' . $this->code_style .'">';
3876	}
3877	$close += 2;
3878	}
3879	//Is this some line with extra styles???
3880	if (in_array($i + 1, $this->highlight_extra_lines)) {
3881	if ($this->use_classes) {
3882	if (isset($this->highlight_extra_lines_styles[$i])) {
3883	$parsed_code .= "<span class=\"xtra lx$i\">";
3884	} else {
3885	$parsed_code .= "<span class=\"xtra ln-xtra\">";
3886	}
3887	} else {
3888	$parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3889	}
3890	++$close;
3891	}
3892	$parsed_code .= $this->line_numbers_start + $i;
3893	if ($close) {
3894	$parsed_code .= str_repeat('</span>', $close);
3895	} else if ($i != $n) {
3896	$parsed_code .= "\n";
3897	}
3898	}
3899	$parsed_code .= '</pre></td><td'.$attributes.'>';
3900	}
3901	$parsed_code .= '<pre'. $attributes .'>';
3902	}
3903	// No line numbers, but still need to handle highlighting lines extra.
3904	// Have to use divs so the full width of the code is highlighted
3905	$close = 0;
3906	for ($i = 0; $i < $n; ++$i) {
3907	// Make lines have at least one space in them if they're empty
3908	// BenBE: Checking emptiness using trim instead of relying on blanks
3909	if ('' == trim($code[$i])) {
3910	$code[$i] = ' ';
3911	}
3912	// fancy lines
3913	if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3914	$i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3915	// Set the attributes to style the line
3916	if ($this->use_classes) {
3917	$parsed_code .= '<span class="xtra li2"><span class="de2">';
3918	} else {
3919	// This style "covers up" the special styles set for special lines
3920	// so that styles applied to special lines don't apply to the actual
3921	// code on that line
3922	$parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3923	.'<span style="' . $this->code_style .'">';
3924	}
3925	$close += 2;
3926	}
3927	//Is this some line with extra styles???
3928	if (in_array($i + 1, $this->highlight_extra_lines)) {
3929	if ($this->use_classes) {
3930	if (isset($this->highlight_extra_lines_styles[$i])) {
3931	$parsed_code .= "<span class=\"xtra lx$i\">";
3932	} else {
3933	$parsed_code .= "<span class=\"xtra ln-xtra\">";
3934	}
3935	} else {
3936	$parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3937	}
3938	++$close;
3939	}
3940
3941	$parsed_code .= $code[$i];
3942
3943	if ($close) {
3944	$parsed_code .= str_repeat('</span>', $close);
3945	$close = 0;
3946	}
3947	elseif ($i + 1 < $n) {
3948	$parsed_code .= "\n";
3949	}
3950	unset($code[$i]);
3951	}
3952
3953	if ($this->header_type == GESHI_HEADER_PRE_VALID \|\| $this->header_type == GESHI_HEADER_PRE_TABLE) {
3954	$parsed_code .= '</pre>';
3955	}
3956	if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3957	$parsed_code .= '</td>';
3958	}
3959	}
3960
3961	$parsed_code .= $this->footer();
3962	}
3963
3964	/**
3965	* Creates the header for the code block (with correct attributes)
3966	*
3967	* @return string The header for the code block
3968	* @since 1.0.0
3969	* @access private
3970	*/
3971	function header() {
3972	// Get attributes needed
3973	/**
3974	* @todo Document behaviour change - class is outputted regardless of whether
3975	* we're using classes or not. Same with style
3976	*/
3977	$attributes = ' class="' . $this->_genCSSName($this->language);
3978	if ($this->overall_class != '') {
3979	$attributes .= " ".$this->_genCSSName($this->overall_class);
3980	}
3981	$attributes .= '"';
3982
3983	if ($this->overall_id != '') {
3984	$attributes .= " id=\"{$this->overall_id}\"";
3985	}
3986	if ($this->overall_style != '' && !$this->use_classes) {
3987	$attributes .= ' style="' . $this->overall_style . '"';
3988	}
3989
3990	$ol_attributes = '';
3991
3992	if ($this->line_numbers_start != 1) {
3993	$ol_attributes .= ' start="' . $this->line_numbers_start . '"';
3994	}
3995
3996	// Get the header HTML
3997	$header = $this->header_content;
3998	if ($header) {
3999	if ($this->header_type == GESHI_HEADER_PRE \|\| $this->header_type == GESHI_HEADER_PRE_VALID) {
4000	$header = str_replace("\n", '', $header);
4001	}
4002	$header = $this->replace_keywords($header);
4003
4004	if ($this->use_classes) {
4005	$attr = ' class="head"';
4006	} else {
4007	$attr = " style=\"{$this->header_content_style}\"";
4008	}
4009	if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4010	$header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4011	} else {
4012	$header = "<div$attr>$header</div>";
4013	}
4014	}
4015
4016	if (GESHI_HEADER_NONE == $this->header_type) {
4017	if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4018	return "$header<ol$attributes$ol_attributes>";
4019	}
4020	return $header . ($this->force_code_block ? '<div>' : '');
4021	}
4022
4023	// Work out what to return and do it
4024	if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4025	if ($this->header_type == GESHI_HEADER_PRE) {
4026	return "<pre$attributes>$header<ol$ol_attributes>";
4027	} else if ($this->header_type == GESHI_HEADER_DIV \|\|
4028	$this->header_type == GESHI_HEADER_PRE_VALID) {
4029	return "<div$attributes>$header<ol$ol_attributes>";
4030	} else if ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4031	return "<table$attributes>$header<tbody><tr class=\"li1\">";
4032	}
4033	} else {
4034	if ($this->header_type == GESHI_HEADER_PRE) {
4035	return "<pre$attributes>$header" .
4036	($this->force_code_block ? '<div>' : '');
4037	} else {
4038	return "<div$attributes>$header" .
4039	($this->force_code_block ? '<div>' : '');
4040	}
4041	}
4042	}
4043
4044	/**
4045	* Returns the footer for the code block.
4046	*
4047	* @return string The footer for the code block
4048	* @since 1.0.0
4049	* @access private
4050	*/
4051	function footer() {
4052	$footer = $this->footer_content;
4053	if ($footer) {
4054	if ($this->header_type == GESHI_HEADER_PRE) {
4055	$footer = str_replace("\n", '', $footer);;
4056	}
4057	$footer = $this->replace_keywords($footer);
4058
4059	if ($this->use_classes) {
4060	$attr = ' class="foot"';
4061	} else {
4062	$attr = " style=\"{$this->footer_content_style}\"";
4063	}
4064	if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4065	$footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4066	} else {
4067	$footer = "<div$attr>$footer</div>";
4068	}
4069	}
4070
4071	if (GESHI_HEADER_NONE == $this->header_type) {
4072	return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4073	}
4074
4075	if ($this->header_type == GESHI_HEADER_DIV \|\| $this->header_type == GESHI_HEADER_PRE_VALID) {
4076	if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4077	return "</ol>$footer</div>";
4078	}
4079	return ($this->force_code_block ? '</div>' : '') .
4080	"$footer</div>";
4081	}
4082	elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4083	if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4084	return "</tr></tbody>$footer</table>";
4085	}
4086	return ($this->force_code_block ? '</div>' : '') .
4087	"$footer</div>";
4088	}
4089	else {
4090	if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4091	return "</ol>$footer</pre>";
4092	}
4093	return ($this->force_code_block ? '</div>' : '') .
4094	"$footer</pre>";
4095	}
4096	}
4097
4098	/**
4099	* Replaces certain keywords in the header and footer with
4100	* certain configuration values
4101	*
4102	* @param string The header or footer content to do replacement on
4103	* @return string The header or footer with replaced keywords
4104	* @since 1.0.2
4105	* @access private
4106	*/
4107	function replace_keywords($instr) {
4108	$keywords = $replacements = array();
4109
4110	$keywords[] = '<TIME>';
4111	$keywords[] = '{TIME}';
4112	$replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4113
4114	$keywords[] = '<LANGUAGE>';
4115	$keywords[] = '{LANGUAGE}';
4116	$replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4117
4118	$keywords[] = '<VERSION>';
4119	$keywords[] = '{VERSION}';
4120	$replacements[] = $replacements[] = GESHI_VERSION;
4121
4122	$keywords[] = '<SPEED>';
4123	$keywords[] = '{SPEED}';
4124	if ($time <= 0) {
4125	$speed = 'N/A';
4126	} else {
4127	$speed = strlen($this->source) / $time;
4128	if ($speed >= 1024) {
4129	$speed = sprintf("%.2f KB/s", $speed / 1024.0);
4130	} else {
4131	$speed = sprintf("%.0f B/s", $speed);
4132	}
4133	}
4134	$replacements[] = $replacements[] = $speed;
4135
4136	return str_replace($keywords, $replacements, $instr);
4137	}
4138
4139	/**
4140	* Secure replacement for PHP built-in function htmlspecialchars().
4141	*
4142	* See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4143	* for this replacement function.
4144	*
4145	* The INTERFACE for this function is almost the same as that for
4146	* htmlspecialchars(), with the same default for quote style; however, there
4147	* is no 'charset' parameter. The reason for this is as follows:
4148	*
4149	* The PHP docs say:
4150	* "The third argument charset defines character set used in conversion."
4151	*
4152	* I suspect PHP's htmlspecialchars() is working at the byte-value level and
4153	* thus _needs_ to know (or asssume) a character set because the special
4154	* characters to be replaced could exist at different code points in
4155	* different character sets. (If indeed htmlspecialchars() works at
4156	* byte-value level that goes some way towards explaining why the
4157	* vulnerability would exist in this function, too, and not only in
4158	* htmlentities() which certainly is working at byte-value level.)
4159	*
4160	* This replacement function however works at character level and should
4161	* therefore be "immune" to character set differences - so no charset
4162	* parameter is needed or provided. If a third parameter is passed, it will
4163	* be silently ignored.
4164	*
4165	* In the OUTPUT there is a minor difference in that we use ''' instead
4166	* of PHP's ''' for a single quote: this provides compatibility with
4167	* get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4168	* (see comment by mikiwoz at yahoo dot co dot uk on
4169	* http://php.net/htmlspecialchars); it also matches the entity definition
4170	* for XML 1.0
4171	* (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4172	* Like PHP we use a numeric character reference instead of ''' for the
4173	* single quote. For the other special characters we use the named entity
4174	* references, as PHP is doing.
4175	*
4176	* @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4177	*
4178	* @license http://www.gnu.org/copyleft/lgpl.html
4179	* GNU Lesser General Public License
4180	* @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4181	* Wikka Development Team}
4182	*
4183	* @access private
4184	* @param string $string string to be converted
4185	* @param integer $quote_style
4186	* - ENT_COMPAT: escapes &, <, > and double quote (default)
4187	* - ENT_NOQUOTES: escapes only &, < and >
4188	* - ENT_QUOTES: escapes &, <, >, double and single quotes
4189	* @return string converted string
4190	* @since 1.0.7.18
4191	*/
4192	function hsc($string, $quote_style = ENT_COMPAT) {
4193	// init
4194	static $aTransSpecchar = array(
4195	'&' => '&',
4196	'"' => '"',
4197	'<' => '<',
4198	'>' => '>',
4199
4200	//This fix is related to SF#1923020, but has to be applied
4201	//regardless of actually highlighting symbols.
4202
4203	//Circumvent a bug with symbol highlighting
4204	//This is required as ; would produce undesirable side-effects if it
4205	//was not to be processed as an entity.
4206	';' => '<SEMI>', // Force ; to be processed as entity
4207	'\|' => '<PIPE>' // Force \| to be processed as entity
4208	); // ENT_COMPAT set
4209
4210	switch ($quote_style) {
4211	case ENT_NOQUOTES: // don't convert double quotes
4212	unset($aTransSpecchar['"']);
4213	break;
4214	case ENT_QUOTES: // convert single quotes as well
4215	$aTransSpecchar["'"] = '''; // (apos) htmlspecialchars() uses '''
4216	break;
4217	}
4218
4219	// return translated string
4220	return strtr($string, $aTransSpecchar);
4221	}
4222
4223	function _genCSSName($name){
4224	return (is_numeric($name[0]) ? '_' : '') . $name;
4225	}
4226
4227	/**
4228	* Returns a stylesheet for the highlighted code. If $economy mode
4229	* is true, we only return the stylesheet declarations that matter for
4230	* this code block instead of the whole thing
4231	*
4232	* @param boolean Whether to use economy mode or not
4233	* @return string A stylesheet built on the data for the current language
4234	* @since 1.0.0
4235	*/
4236	function get_stylesheet($economy_mode = true) {
4237	// If there's an error, chances are that the language file
4238	// won't have populated the language data file, so we can't
4239	// risk getting a stylesheet...
4240	if ($this->error) {
4241	return '';
4242	}
4243
4244	//Check if the style rearrangements have been processed ...
4245	//This also does some preprocessing to check which style groups are useable ...
4246	if(!isset($this->language_data['NUMBERS_CACHE'])) {
4247	$this->build_style_cache();
4248	}
4249
4250	// First, work out what the selector should be. If there's an ID,
4251	// that should be used, the same for a class. Otherwise, a selector
4252	// of '' means that these styles will be applied anywhere
4253	if ($this->overall_id) {
4254	$selector = '#' . $this->_genCSSName($this->overall_id);
4255	} else {
4256	$selector = '.' . $this->_genCSSName($this->language);
4257	if ($this->overall_class) {
4258	$selector .= '.' . $this->_genCSSName($this->overall_class);
4259	}
4260	}
4261	$selector .= ' ';
4262
4263	// Header of the stylesheet
4264	if (!$economy_mode) {
4265	$stylesheet = "/**\n".
4266	" * GeSHi Dynamically Generated Stylesheet\n".
4267	" * --------------------------------------\n".
4268	" * Dynamically generated stylesheet for {$this->language}\n".
4269	" * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4270	" * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4271	" * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4272	" * --------------------------------------\n".
4273	" */\n";
4274	} else {
4275	$stylesheet = "/**\n".
4276	" * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4277	" * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4278	" */\n";
4279	}
4280
4281	// Set the <ol> to have no effect at all if there are line numbers
4282	// (<ol>s have margins that should be destroyed so all layout is
4283	// controlled by the set_overall_style method, which works on the
4284	// <pre> or <div> container). Additionally, set default styles for lines
4285	if (!$economy_mode \|\| $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4286	//$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4287	$stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4288	}
4289
4290	// Add overall styles
4291	// note: neglect economy_mode, empty styles are meaningless
4292	if ($this->overall_style != '') {
4293	$stylesheet .= "$selector {{$this->overall_style}}\n";
4294	}
4295
4296	// Add styles for links
4297	// note: economy mode does not make _any_ sense here
4298	// either the style is empty and thus no selector is needed
4299	// or the appropriate key is given.
4300	foreach ($this->link_styles as $key => $style) {
4301	if ($style != '') {
4302	switch ($key) {
4303	case GESHI_LINK:
4304	$stylesheet .= "{$selector}a:link {{$style}}\n";
4305	break;
4306	case GESHI_HOVER:
4307	$stylesheet .= "{$selector}a:hover {{$style}}\n";
4308	break;
4309	case GESHI_ACTIVE:
4310	$stylesheet .= "{$selector}a:active {{$style}}\n";
4311	break;
4312	case GESHI_VISITED:
4313	$stylesheet .= "{$selector}a:visited {{$style}}\n";
4314	break;
4315	}
4316	}
4317	}
4318
4319	// Header and footer
4320	// note: neglect economy_mode, empty styles are meaningless
4321	if ($this->header_content_style != '') {
4322	$stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4323	}
4324	if ($this->footer_content_style != '') {
4325	$stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4326	}
4327
4328	// Styles for important stuff
4329	// note: neglect economy_mode, empty styles are meaningless
4330	if ($this->important_styles != '') {
4331	$stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4332	}
4333
4334	// Simple line number styles
4335	if ((!$economy_mode \|\| $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4336	$stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4337	}
4338	if ((!$economy_mode \|\| $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4339	$stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4340	}
4341	// If there is a style set for fancy line numbers, echo it out
4342	if ((!$economy_mode \|\| $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4343	$stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4344	}
4345
4346	// note: empty styles are meaningless
4347	foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4348	if ($styles != '' && (!$economy_mode \|\|
4349	(isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4350	$this->lexic_permissions['KEYWORDS'][$group]))) {
4351	$stylesheet .= "$selector.kw$group {{$styles}}\n";
4352	}
4353	}
4354	foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4355	if ($styles != '' && (!$economy_mode \|\|
4356	(isset($this->lexic_permissions['COMMENTS'][$group]) &&
4357	$this->lexic_permissions['COMMENTS'][$group]) \|\|
4358	(!empty($this->language_data['COMMENT_REGEXP']) &&
4359	!empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4360	$stylesheet .= "$selector.co$group {{$styles}}\n";
4361	}
4362	}
4363	foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4364	if ($styles != '' && (!$economy_mode \|\| $this->lexic_permissions['ESCAPE_CHAR'])) {
4365	// NEW: since 1.0.8 we have to handle hardescapes
4366	if ($group === 'HARD') {
4367	$group = '_h';
4368	}
4369	$stylesheet .= "$selector.es$group {{$styles}}\n";
4370	}
4371	}
4372	foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4373	if ($styles != '' && (!$economy_mode \|\| $this->lexic_permissions['BRACKETS'])) {
4374	$stylesheet .= "$selector.br$group {{$styles}}\n";
4375	}
4376	}
4377	foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4378	if ($styles != '' && (!$economy_mode \|\| $this->lexic_permissions['SYMBOLS'])) {
4379	$stylesheet .= "$selector.sy$group {{$styles}}\n";
4380	}
4381	}
4382	foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4383	if ($styles != '' && (!$economy_mode \|\| $this->lexic_permissions['STRINGS'])) {
4384	// NEW: since 1.0.8 we have to handle hardquotes
4385	if ($group === 'HARD') {
4386	$group = '_h';
4387	}
4388	$stylesheet .= "$selector.st$group {{$styles}}\n";
4389	}
4390	}
4391	foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4392	if ($styles != '' && (!$economy_mode \|\| $this->lexic_permissions['NUMBERS'])) {
4393	$stylesheet .= "$selector.nu$group {{$styles}}\n";
4394	}
4395	}
4396	foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4397	if ($styles != '' && (!$economy_mode \|\| $this->lexic_permissions['METHODS'])) {
4398	$stylesheet .= "$selector.me$group {{$styles}}\n";
4399	}
4400	}
4401	// note: neglect economy_mode, empty styles are meaningless
4402	foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4403	if ($styles != '') {
4404	$stylesheet .= "$selector.sc$group {{$styles}}\n";
4405	}
4406	}
4407	foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4408	if ($styles != '' && (!$economy_mode \|\|
4409	(isset($this->lexic_permissions['REGEXPS'][$group]) &&
4410	$this->lexic_permissions['REGEXPS'][$group]))) {
4411	if (is_array($this->language_data['REGEXPS'][$group]) &&
4412	array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4413	$stylesheet .= "$selector.";
4414	$stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4415	$stylesheet .= " {{$styles}}\n";
4416	} else {
4417	$stylesheet .= "$selector.re$group {{$styles}}\n";
4418	}
4419	}
4420	}
4421	// Styles for lines being highlighted extra
4422	if (!$economy_mode \|\| (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4423	$stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4424	}
4425	$stylesheet .= "{$selector}span.xtra { display:block; }\n";
4426	foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4427	$stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4428	}
4429
4430	return $stylesheet;
4431	}
4432
4433	/**
4434	* Get's the style that is used for the specified line
4435	*
4436	* @param int The line number information is requested for
4437	* @access private
4438	* @since 1.0.7.21
4439	*/
4440	function get_line_style($line) {
4441	//$style = null;
4442	$style = null;
4443	if (isset($this->highlight_extra_lines_styles[$line])) {
4444	$style = $this->highlight_extra_lines_styles[$line];
4445	} else { // if no "extra" style assigned
4446	$style = $this->highlight_extra_lines_style;
4447	}
4448
4449	return $style;
4450	}
4451
4452	/**
4453	* this functions creates an optimized regular expression list
4454	* of an array of strings.
4455	*
4456	* Example:
4457	* <code>$list = array('faa', 'foo', 'foobar');
4458	* => string 'f(aa\|oo(bar)?)'</code>
4459	*
4460	* @param $list array of (unquoted) strings
4461	* @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4462	* @return string for regular expression
4463	* @author Milian Wolff <[email protected]>
4464	* @since 1.0.8
4465	* @access private
4466	*/
4467	function optimize_regexp_list($list, $regexp_delimiter = '/') {
4468	$regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$',
4469	'(', ')', '{', '}', '=', '!', '<', '>', '\|', ':', $regexp_delimiter);
4470	sort($list);
4471	$regexp_list = array('');
4472	$num_subpatterns = 0;
4473	$list_key = 0;
4474
4475	// the tokens which we will use to generate the regexp list
4476	$tokens = array();
4477	$prev_keys = array();
4478	// go through all entries of the list and generate the token list
4479	$cur_len = 0;
4480	for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4481	if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4482	// seems like the length of this pcre is growing exorbitantly
4483	$regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4484	$num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4485	$tokens = array();
4486	$cur_len = 0;
4487	}
4488	$level = 0;
4489	$entry = preg_quote((string) $list[$i], $regexp_delimiter);
4490	$pointer = &$tokens;
4491	// properly assign the new entry to the correct position in the token array
4492	// possibly generate smaller common denominator keys
4493	while (true) {
4494	// get the common denominator
4495	if (isset($prev_keys[$level])) {
4496	if ($prev_keys[$level] == $entry) {
4497	// this is a duplicate entry, skip it
4498	continue 2;
4499	}
4500	$char = 0;
4501	while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4502	&& $entry[$char] == $prev_keys[$level][$char]) {
4503	++$char;
4504	}
4505	if ($char > 0) {
4506	// this entry has at least some chars in common with the current key
4507	if ($char == strlen($prev_keys[$level])) {
4508	// current key is totally matched, i.e. this entry has just some bits appended
4509	$pointer = &$pointer[$prev_keys[$level]];
4510	} else {
4511	// only part of the keys match
4512	$new_key_part1 = substr($prev_keys[$level], 0, $char);
4513	$new_key_part2 = substr($prev_keys[$level], $char);
4514
4515	if (in_array($new_key_part1[0], $regex_chars)
4516	\|\| in_array($new_key_part2[0], $regex_chars)) {
4517	// this is bad, a regex char as first character
4518	$pointer[$entry] = array('' => true);
4519	array_splice($prev_keys, $level, count($prev_keys), $entry);
4520	$cur_len += strlen($entry);
4521	continue;
4522	} else {
4523	// relocate previous tokens
4524	$pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4525	unset($pointer[$prev_keys[$level]]);
4526	$pointer = &$pointer[$new_key_part1];
4527	// recreate key index
4528	array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4529	$cur_len += strlen($new_key_part2);
4530	}
4531	}
4532	++$level;
4533	$entry = substr($entry, $char);
4534	continue;
4535	}
4536	// else: fall trough, i.e. no common denominator was found
4537	}
4538	if ($level == 0 && !empty($tokens)) {
4539	// we can dump current tokens into the string and throw them away afterwards
4540	$new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4541	$new_subpatterns = substr_count($new_entry, '(?:');
4542	if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4543	$regexp_list[++$list_key] = $new_entry;
4544	$num_subpatterns = $new_subpatterns;
4545	} else {
4546	if (!empty($regexp_list[$list_key])) {
4547	$new_entry = '\|' . $new_entry;
4548	}
4549	$regexp_list[$list_key] .= $new_entry;
4550	$num_subpatterns += $new_subpatterns;
4551	}
4552	$tokens = array();
4553	$cur_len = 0;
4554	}
4555	// no further common denominator found
4556	$pointer[$entry] = array('' => true);
4557	array_splice($prev_keys, $level, count($prev_keys), $entry);
4558
4559	$cur_len += strlen($entry);
4560	break;
4561	}
4562	unset($list[$i]);
4563	}
4564	// make sure the last tokens get converted as well
4565	$new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4566	if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4567	if ( !empty($regexp_list[$list_key]) ) {
4568	++$list_key;
4569	}
4570	$regexp_list[$list_key] = $new_entry;
4571	} else {
4572	if (!empty($regexp_list[$list_key])) {
4573	$new_entry = '\|' . $new_entry;
4574	}
4575	$regexp_list[$list_key] .= $new_entry;
4576	}
4577	return $regexp_list;
4578	}
4579	/**
4580	* this function creates the appropriate regexp string of an token array
4581	* you should not call this function directly, @see $this->optimize_regexp_list().
4582	*
4583	* @param &$tokens array of tokens
4584	* @param $recursed bool to know wether we recursed or not
4585	* @return string
4586	* @author Milian Wolff <[email protected]>
4587	* @since 1.0.8
4588	* @access private
4589	*/
4590	function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4591	$list = '';
4592	foreach ($tokens as $token => $sub_tokens) {
4593	$list .= $token;
4594	$close_entry = isset($sub_tokens['']);
4595	unset($sub_tokens['']);
4596	if (!empty($sub_tokens)) {
4597	$list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4598	if ($close_entry) {
4599	// make sub_tokens optional
4600	$list .= '?';
4601	}
4602	}
4603	$list .= '\|';
4604	}
4605	if (!$recursed) {
4606	// do some optimizations
4607	// common trailing strings
4608	// BUGGY!
4609	//$list = preg_replace_callback('#(?<=^\|\:\|\\|)\w+?(\w+)(?:\\|.+\1)+(?=\\|)#', create_function(
4610	// '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\\|\|$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4611	// (?:p)? => p?
4612	$list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4613	// (?:a\|b\|c\|d\|...)? => [abcd...]?
4614	// TODO: a\|bb\|c => [ac]\|bb
4615	static $callback_2;
4616	if (!isset($callback_2)) {
4617	$callback_2 = create_function('$matches', 'return "[" . str_replace("\|", "", $matches[1]) . "]";');
4618	}
4619	$list = preg_replace_callback('#\(\?\:((?:.\\|)+.)\)#', $callback_2, $list);
4620	}
4621	// return $list without trailing pipe
4622	return substr($list, 0, -1);
4623	}
4624	} // End Class GeSHi
4625
4626
4627	if (!function_exists('geshi_highlight')) {
4628	/**
4629	* Easy way to highlight stuff. Behaves just like highlight_string
4630	*
4631	* @param string The code to highlight
4632	* @param string The language to highlight the code in
4633	* @param string The path to the language files. You can leave this blank if you need
4634	* as from version 1.0.7 the path should be automatically detected
4635	* @param boolean Whether to return the result or to echo
4636	* @return string The code highlighted (if $return is true)
4637	* @since 1.0.2
4638	*/
4639	function geshi_highlight($string, $language, $path = null, $return = false) {
4640	$geshi = new GeSHi($string, $language, $path);
4641	$geshi->set_header_type(GESHI_HEADER_NONE);
4642
4643	if ($return) {
4644	return '<code>' . $geshi->parse_code() . '</code>';
4645	}
4646
4647	echo '<code>' . $geshi->parse_code() . '</code>';
4648
4649	if ($geshi->error()) {
4650	return false;
4651	}
4652	return true;
4653	}
4654	}
4655
4656	?>

Note: See TracBrowser for help on using the repository browser.

Download in other formats: