source: documentation/trunk/packages/dokuwiki-2011-05-25a/inc/geshi.php@ 25027

Last change on this file since 25027 was 25027, checked in by jmt12, 12 years ago

Adding the packages directory, and within it a configured version of dokuwiki all ready to run

File size: 195.9 KB
Line 
1<?php
2/**
3 * GeSHi - Generic Syntax Highlighter
4 *
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6 * documentation at http://qbnz.com/highlighter/documentation.php for more
7 * information about how to use this class.
8 *
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/
10 * directory.
11 *
12 * This file is part of GeSHi.
13 *
14 * GeSHi is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * GeSHi is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with GeSHi; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 *
28 * @package geshi
29 * @subpackage core
30 * @author Nigel McNie <[email protected]>, Benny Baumann <[email protected]>
31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
32 * @license http://gnu.org/copyleft/gpl.html GNU GPL
33 *
34 */
35
36//
37// GeSHi Constants
38// You should use these constant names in your programs instead of
39// their values - you never know when a value may change in a future
40// version
41//
42
43/** The version of this GeSHi file */
44define('GESHI_VERSION', '1.0.8.8');
45
46// Define the root directory for the GeSHi code tree
47if (!defined('GESHI_ROOT')) {
48 /** The root directory for GeSHi */
49 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
50}
51/** The language file directory for GeSHi
52 @access private */
53define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
54
55// Define if GeSHi should be paranoid about security
56if (!defined('GESHI_SECURITY_PARANOID')) {
57 /** Tells GeSHi to be paranoid about security settings */
58 define('GESHI_SECURITY_PARANOID', false);
59}
60
61// Line numbers - use with enable_line_numbers()
62/** Use no line numbers when building the result */
63define('GESHI_NO_LINE_NUMBERS', 0);
64/** Use normal line numbers when building the result */
65define('GESHI_NORMAL_LINE_NUMBERS', 1);
66/** Use fancy line numbers when building the result */
67define('GESHI_FANCY_LINE_NUMBERS', 2);
68
69// Container HTML type
70/** Use nothing to surround the source */
71define('GESHI_HEADER_NONE', 0);
72/** Use a "div" to surround the source */
73define('GESHI_HEADER_DIV', 1);
74/** Use a "pre" to surround the source */
75define('GESHI_HEADER_PRE', 2);
76/** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
77define('GESHI_HEADER_PRE_VALID', 3);
78/**
79 * Use a "table" to surround the source:
80 *
81 * <table>
82 * <thead><tr><td colspan="2">$header</td></tr></thead>
83 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
84 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
85 * </table>
86 *
87 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
88 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
89 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
90 */
91define('GESHI_HEADER_PRE_TABLE', 4);
92
93// Capatalisation constants
94/** Lowercase keywords found */
95define('GESHI_CAPS_NO_CHANGE', 0);
96/** Uppercase keywords found */
97define('GESHI_CAPS_UPPER', 1);
98/** Leave keywords found as the case that they are */
99define('GESHI_CAPS_LOWER', 2);
100
101// Link style constants
102/** Links in the source in the :link state */
103define('GESHI_LINK', 0);
104/** Links in the source in the :hover state */
105define('GESHI_HOVER', 1);
106/** Links in the source in the :active state */
107define('GESHI_ACTIVE', 2);
108/** Links in the source in the :visited state */
109define('GESHI_VISITED', 3);
110
111// Important string starter/finisher
112// Note that if you change these, they should be as-is: i.e., don't
113// write them as if they had been run through htmlentities()
114/** The starter for important parts of the source */
115define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116/** The ender for important parts of the source */
117define('GESHI_END_IMPORTANT', '<END GeSHi>');
118
119/**#@+
120 * @access private
121 */
122// When strict mode applies for a language
123/** Strict mode never applies (this is the most common) */
124define('GESHI_NEVER', 0);
125/** Strict mode *might* apply, and can be enabled or
126 disabled by {@link GeSHi->enable_strict_mode()} */
127define('GESHI_MAYBE', 1);
128/** Strict mode always applies */
129define('GESHI_ALWAYS', 2);
130
131// Advanced regexp handling constants, used in language files
132/** The key of the regex array defining what to search for */
133define('GESHI_SEARCH', 0);
134/** The key of the regex array defining what bracket group in a
135 matched search to use as a replacement */
136define('GESHI_REPLACE', 1);
137/** The key of the regex array defining any modifiers to the regular expression */
138define('GESHI_MODIFIERS', 2);
139/** The key of the regex array defining what bracket group in a
140 matched search to put before the replacement */
141define('GESHI_BEFORE', 3);
142/** The key of the regex array defining what bracket group in a
143 matched search to put after the replacement */
144define('GESHI_AFTER', 4);
145/** The key of the regex array defining a custom keyword to use
146 for this regexp's html tag class */
147define('GESHI_CLASS', 5);
148
149/** Used in language files to mark comments */
150define('GESHI_COMMENTS', 0);
151
152/** Used to work around missing PHP features **/
153define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
154
155/** make sure we can call stripos **/
156if (!function_exists('stripos')) {
157 // the offset param of preg_match is not supported below PHP 4.3.3
158 if (GESHI_PHP_PRE_433) {
159 /**
160 * @ignore
161 */
162 function stripos($haystack, $needle, $offset = null) {
163 if (!is_null($offset)) {
164 $haystack = substr($haystack, $offset);
165 }
166 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
167 return $match[0][1];
168 }
169 return false;
170 }
171 }
172 else {
173 /**
174 * @ignore
175 */
176 function stripos($haystack, $needle, $offset = null) {
177 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
178 return $match[0][1];
179 }
180 return false;
181 }
182 }
183}
184
185/** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
186 regular expressions. Set this to false if your PCRE lib is up to date
187 @see GeSHi->optimize_regexp_list()
188 **/
189define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
190/** it's also important not to generate too long regular expressions
191 be generous here... but keep in mind, that when reaching this limit we
192 still have to close open patterns. 12k should do just fine on a 16k limit.
193 @see GeSHi->optimize_regexp_list()
194 **/
195define('GESHI_MAX_PCRE_LENGTH', 12288);
196
197//Number format specification
198/** Basic number format for integers */
199define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
200/** Enhanced number format for integers like seen in C */
201define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
202/** Number format to highlight binary numbers with a suffix "b" */
203define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
204/** Number format to highlight binary numbers with a prefix % */
205define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
206/** Number format to highlight binary numbers with a prefix 0b (C) */
207define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
208/** Number format to highlight octal numbers with a leading zero */
209define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
210/** Number format to highlight octal numbers with a prefix 0o (logtalk) */
211define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+
212/** Number format to highlight octal numbers with a suffix of o */
213define('GESHI_NUMBER_OCT_SUFFIX', 1024); //[0-7]+[oO]
214/** Number format to highlight hex numbers with a prefix 0x */
215define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
216/** Number format to highlight hex numbers with a suffix of h */
217define('GESHI_NUMBER_HEX_SUFFIX', 8192); //[0-9][0-9a-fA-F]*h
218/** Number format to highlight floating-point numbers without support for scientific notation */
219define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
220/** Number format to highlight floating-point numbers without support for scientific notation */
221define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
222/** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
223define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
224/** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
225define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
226//Custom formats are passed by RX array
227
228// Error detection - use these to analyse faults
229/** No sourcecode to highlight was specified
230 * @deprecated
231 */
232define('GESHI_ERROR_NO_INPUT', 1);
233/** The language specified does not exist */
234define('GESHI_ERROR_NO_SUCH_LANG', 2);
235/** GeSHi could not open a file for reading (generally a language file) */
236define('GESHI_ERROR_FILE_NOT_READABLE', 3);
237/** The header type passed to {@link GeSHi->set_header_type()} was invalid */
238define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
239/** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
240define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
241/**#@-*/
242
243
244/**
245 * The GeSHi Class.
246 *
247 * Please refer to the documentation for GeSHi 1.0.X that is available
248 * at http://qbnz.com/highlighter/documentation.php for more information
249 * about how to use this class.
250 *
251 * @package geshi
252 * @author Nigel McNie <[email protected]>, Benny Baumann <[email protected]>
253 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
254 */
255class GeSHi {
256 /**#@+
257 * @access private
258 */
259 /**
260 * The source code to highlight
261 * @var string
262 */
263 var $source = '';
264
265 /**
266 * The language to use when highlighting
267 * @var string
268 */
269 var $language = '';
270
271 /**
272 * The data for the language used
273 * @var array
274 */
275 var $language_data = array();
276
277 /**
278 * The path to the language files
279 * @var string
280 */
281 var $language_path = GESHI_LANG_ROOT;
282
283 /**
284 * The error message associated with an error
285 * @var string
286 * @todo check err reporting works
287 */
288 var $error = false;
289
290 /**
291 * Possible error messages
292 * @var array
293 */
294 var $error_messages = array(
295 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
296 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
297 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
298 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
299 );
300
301 /**
302 * Whether highlighting is strict or not
303 * @var boolean
304 */
305 var $strict_mode = false;
306
307 /**
308 * Whether to use CSS classes in output
309 * @var boolean
310 */
311 var $use_classes = false;
312
313 /**
314 * The type of header to use. Can be one of the following
315 * values:
316 *
317 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
318 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
319 * - GESHI_HEADER_NONE: No header is outputted.
320 *
321 * @var int
322 */
323 var $header_type = GESHI_HEADER_PRE;
324
325 /**
326 * Array of permissions for which lexics should be highlighted
327 * @var array
328 */
329 var $lexic_permissions = array(
330 'KEYWORDS' => array(),
331 'COMMENTS' => array('MULTI' => true),
332 'REGEXPS' => array(),
333 'ESCAPE_CHAR' => true,
334 'BRACKETS' => true,
335 'SYMBOLS' => false,
336 'STRINGS' => true,
337 'NUMBERS' => true,
338 'METHODS' => true,
339 'SCRIPT' => true
340 );
341
342 /**
343 * The time it took to parse the code
344 * @var double
345 */
346 var $time = 0;
347
348 /**
349 * The content of the header block
350 * @var string
351 */
352 var $header_content = '';
353
354 /**
355 * The content of the footer block
356 * @var string
357 */
358 var $footer_content = '';
359
360 /**
361 * The style of the header block
362 * @var string
363 */
364 var $header_content_style = '';
365
366 /**
367 * The style of the footer block
368 * @var string
369 */
370 var $footer_content_style = '';
371
372 /**
373 * Tells if a block around the highlighted source should be forced
374 * if not using line numbering
375 * @var boolean
376 */
377 var $force_code_block = false;
378
379 /**
380 * The styles for hyperlinks in the code
381 * @var array
382 */
383 var $link_styles = array();
384
385 /**
386 * Whether important blocks should be recognised or not
387 * @var boolean
388 * @deprecated
389 * @todo REMOVE THIS FUNCTIONALITY!
390 */
391 var $enable_important_blocks = false;
392
393 /**
394 * Styles for important parts of the code
395 * @var string
396 * @deprecated
397 * @todo As above - rethink the whole idea of important blocks as it is buggy and
398 * will be hard to implement in 1.2
399 */
400 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
401
402 /**
403 * Whether CSS IDs should be added to the code
404 * @var boolean
405 */
406 var $add_ids = false;
407
408 /**
409 * Lines that should be highlighted extra
410 * @var array
411 */
412 var $highlight_extra_lines = array();
413
414 /**
415 * Styles of lines that should be highlighted extra
416 * @var array
417 */
418 var $highlight_extra_lines_styles = array();
419
420 /**
421 * Styles of extra-highlighted lines
422 * @var string
423 */
424 var $highlight_extra_lines_style = 'background-color: #ffc;';
425
426 /**
427 * The line ending
428 * If null, nl2br() will be used on the result string.
429 * Otherwise, all instances of \n will be replaced with $line_ending
430 * @var string
431 */
432 var $line_ending = null;
433
434 /**
435 * Number at which line numbers should start at
436 * @var int
437 */
438 var $line_numbers_start = 1;
439
440 /**
441 * The overall style for this code block
442 * @var string
443 */
444 var $overall_style = 'font-family:monospace;';
445
446 /**
447 * The style for the actual code
448 * @var string
449 */
450 var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
451
452 /**
453 * The overall class for this code block
454 * @var string
455 */
456 var $overall_class = '';
457
458 /**
459 * The overall ID for this code block
460 * @var string
461 */
462 var $overall_id = '';
463
464 /**
465 * Line number styles
466 * @var string
467 */
468 var $line_style1 = 'font-weight: normal; vertical-align:top;';
469
470 /**
471 * Line number styles for fancy lines
472 * @var string
473 */
474 var $line_style2 = 'font-weight: bold; vertical-align:top;';
475
476 /**
477 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
478 * @var string
479 */
480 var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
481
482 /**
483 * Flag for how line numbers are displayed
484 * @var boolean
485 */
486 var $line_numbers = GESHI_NO_LINE_NUMBERS;
487
488 /**
489 * Flag to decide if multi line spans are allowed. Set it to false to make sure
490 * each tag is closed before and reopened after each linefeed.
491 * @var boolean
492 */
493 var $allow_multiline_span = true;
494
495 /**
496 * The "nth" value for fancy line highlighting
497 * @var int
498 */
499 var $line_nth_row = 0;
500
501 /**
502 * The size of tab stops
503 * @var int
504 */
505 var $tab_width = 8;
506
507 /**
508 * Should we use language-defined tab stop widths?
509 * @var int
510 */
511 var $use_language_tab_width = false;
512
513 /**
514 * Default target for keyword links
515 * @var string
516 */
517 var $link_target = '';
518
519 /**
520 * The encoding to use for entity encoding
521 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
522 * @var string
523 */
524 var $encoding = 'utf-8';
525
526 /**
527 * Should keywords be linked?
528 * @var boolean
529 */
530 var $keyword_links = true;
531
532 /**
533 * Currently loaded language file
534 * @var string
535 * @since 1.0.7.22
536 */
537 var $loaded_language = '';
538
539 /**
540 * Wether the caches needed for parsing are built or not
541 *
542 * @var bool
543 * @since 1.0.8
544 */
545 var $parse_cache_built = false;
546
547 /**
548 * Work around for Suhosin Patch with disabled /e modifier
549 *
550 * Note from suhosins author in config file:
551 * <blockquote>
552 * The /e modifier inside <code>preg_replace()</code> allows code execution.
553 * Often it is the cause for remote code execution exploits. It is wise to
554 * deactivate this feature and test where in the application it is used.
555 * The developer using the /e modifier should be made aware that he should
556 * use <code>preg_replace_callback()</code> instead
557 * </blockquote>
558 *
559 * @var array
560 * @since 1.0.8
561 */
562 var $_kw_replace_group = 0;
563 var $_rx_key = 0;
564
565 /**
566 * some "callback parameters" for handle_multiline_regexps
567 *
568 * @since 1.0.8
569 * @access private
570 * @var string
571 */
572 var $_hmr_before = '';
573 var $_hmr_replace = '';
574 var $_hmr_after = '';
575 var $_hmr_key = 0;
576
577 /**#@-*/
578
579 /**
580 * Creates a new GeSHi object, with source and language
581 *
582 * @param string The source code to highlight
583 * @param string The language to highlight the source with
584 * @param string The path to the language file directory. <b>This
585 * is deprecated!</b> I've backported the auto path
586 * detection from the 1.1.X dev branch, so now it
587 * should be automatically set correctly. If you have
588 * renamed the language directory however, you will
589 * still need to set the path using this parameter or
590 * {@link GeSHi->set_language_path()}
591 * @since 1.0.0
592 */
593 function GeSHi($source = '', $language = '', $path = '') {
594 if (!empty($source)) {
595 $this->set_source($source);
596 }
597 if (!empty($language)) {
598 $this->set_language($language);
599 }
600 $this->set_language_path($path);
601 }
602
603 /**
604 * Returns an error message associated with the last GeSHi operation,
605 * or false if no error has occured
606 *
607 * @return string|false An error message if there has been an error, else false
608 * @since 1.0.0
609 */
610 function error() {
611 if ($this->error) {
612 //Put some template variables for debugging here ...
613 $debug_tpl_vars = array(
614 '{LANGUAGE}' => $this->language,
615 '{PATH}' => $this->language_path
616 );
617 $msg = str_replace(
618 array_keys($debug_tpl_vars),
619 array_values($debug_tpl_vars),
620 $this->error_messages[$this->error]);
621
622 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
623 }
624 return false;
625 }
626
627 /**
628 * Gets a human-readable language name (thanks to Simon Patterson
629 * for the idea :))
630 *
631 * @return string The name for the current language
632 * @since 1.0.2
633 */
634 function get_language_name() {
635 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
636 return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
637 }
638 return $this->language_data['LANG_NAME'];
639 }
640
641 /**
642 * Sets the source code for this object
643 *
644 * @param string The source code to highlight
645 * @since 1.0.0
646 */
647 function set_source($source) {
648 $this->source = $source;
649 $this->highlight_extra_lines = array();
650 }
651
652 /**
653 * Sets the language for this object
654 *
655 * @note since 1.0.8 this function won't reset language-settings by default anymore!
656 * if you need this set $force_reset = true
657 *
658 * @param string The name of the language to use
659 * @since 1.0.0
660 */
661 function set_language($language, $force_reset = false) {
662 if ($force_reset) {
663 $this->loaded_language = false;
664 }
665
666 //Clean up the language name to prevent malicious code injection
667 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
668
669 $language = strtolower($language);
670
671 //Retreive the full filename
672 $file_name = $this->language_path . $language . '.php';
673 if ($file_name == $this->loaded_language) {
674 // this language is already loaded!
675 return;
676 }
677
678 $this->language = $language;
679
680 $this->error = false;
681 $this->strict_mode = GESHI_NEVER;
682
683 //Check if we can read the desired file
684 if (!is_readable($file_name)) {
685 $this->error = GESHI_ERROR_NO_SUCH_LANG;
686 return;
687 }
688
689 // Load the language for parsing
690 $this->load_language($file_name);
691 }
692
693 /**
694 * Sets the path to the directory containing the language files. Note
695 * that this path is relative to the directory of the script that included
696 * geshi.php, NOT geshi.php itself.
697 *
698 * @param string The path to the language directory
699 * @since 1.0.0
700 * @deprecated The path to the language files should now be automatically
701 * detected, so this method should no longer be needed. The
702 * 1.1.X branch handles manual setting of the path differently
703 * so this method will disappear in 1.2.0.
704 */
705 function set_language_path($path) {
706 if(strpos($path,':')) {
707 //Security Fix to prevent external directories using fopen wrappers.
708 if(DIRECTORY_SEPARATOR == "\\") {
709 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
710 return;
711 }
712 } else {
713 return;
714 }
715 }
716 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
717 //Security Fix to prevent external directories using fopen wrappers.
718 return;
719 }
720 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
721 //Security Fix to prevent external directories using fopen wrappers.
722 return;
723 }
724 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
725 //Security Fix to prevent external directories using fopen wrappers.
726 return;
727 }
728 if ($path) {
729 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
730 $this->set_language($this->language); // otherwise set_language_path has no effect
731 }
732 }
733
734 /**
735 * Sets the type of header to be used.
736 *
737 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
738 * means more source code but more control over tab width and line-wrapping.
739 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
740 * control. Default is GESHI_HEADER_PRE.
741 *
742 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
743 * should be outputted.
744 *
745 * @param int The type of header to be used
746 * @since 1.0.0
747 */
748 function set_header_type($type) {
749 //Check if we got a valid header type
750 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
751 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
752 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
753 return;
754 }
755
756 //Set that new header type
757 $this->header_type = $type;
758 }
759
760 /**
761 * Sets the styles for the code that will be outputted
762 * when this object is parsed. The style should be a
763 * string of valid stylesheet declarations
764 *
765 * @param string The overall style for the outputted code block
766 * @param boolean Whether to merge the styles with the current styles or not
767 * @since 1.0.0
768 */
769 function set_overall_style($style, $preserve_defaults = false) {
770 if (!$preserve_defaults) {
771 $this->overall_style = $style;
772 } else {
773 $this->overall_style .= $style;
774 }
775 }
776
777 /**
778 * Sets the overall classname for this block of code. This
779 * class can then be used in a stylesheet to style this object's
780 * output
781 *
782 * @param string The class name to use for this block of code
783 * @since 1.0.0
784 */
785 function set_overall_class($class) {
786 $this->overall_class = $class;
787 }
788
789 /**
790 * Sets the overall id for this block of code. This id can then
791 * be used in a stylesheet to style this object's output
792 *
793 * @param string The ID to use for this block of code
794 * @since 1.0.0
795 */
796 function set_overall_id($id) {
797 $this->overall_id = $id;
798 }
799
800 /**
801 * Sets whether CSS classes should be used to highlight the source. Default
802 * is off, calling this method with no arguments will turn it on
803 *
804 * @param boolean Whether to turn classes on or not
805 * @since 1.0.0
806 */
807 function enable_classes($flag = true) {
808 $this->use_classes = ($flag) ? true : false;
809 }
810
811 /**
812 * Sets the style for the actual code. This should be a string
813 * containing valid stylesheet declarations. If $preserve_defaults is
814 * true, then styles are merged with the default styles, with the
815 * user defined styles having priority
816 *
817 * Note: Use this method to override any style changes you made to
818 * the line numbers if you are using line numbers, else the line of
819 * code will have the same style as the line number! Consult the
820 * GeSHi documentation for more information about this.
821 *
822 * @param string The style to use for actual code
823 * @param boolean Whether to merge the current styles with the new styles
824 * @since 1.0.2
825 */
826 function set_code_style($style, $preserve_defaults = false) {
827 if (!$preserve_defaults) {
828 $this->code_style = $style;
829 } else {
830 $this->code_style .= $style;
831 }
832 }
833
834 /**
835 * Sets the styles for the line numbers.
836 *
837 * @param string The style for the line numbers that are "normal"
838 * @param string|boolean If a string, this is the style of the line
839 * numbers that are "fancy", otherwise if boolean then this
840 * defines whether the normal styles should be merged with the
841 * new normal styles or not
842 * @param boolean If set, is the flag for whether to merge the "fancy"
843 * styles with the current styles or not
844 * @since 1.0.2
845 */
846 function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
847 //Check if we got 2 or three parameters
848 if (is_bool($style2)) {
849 $preserve_defaults = $style2;
850 $style2 = '';
851 }
852
853 //Actually set the new styles
854 if (!$preserve_defaults) {
855 $this->line_style1 = $style1;
856 $this->line_style2 = $style2;
857 } else {
858 $this->line_style1 .= $style1;
859 $this->line_style2 .= $style2;
860 }
861 }
862
863 /**
864 * Sets whether line numbers should be displayed.
865 *
866 * Valid values for the first parameter are:
867 *
868 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
869 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
870 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
871 *
872 * For fancy line numbers, the second parameter is used to signal which lines
873 * are to be fancy. For example, if the value of this parameter is 5 then every
874 * 5th line will be fancy.
875 *
876 * @param int How line numbers should be displayed
877 * @param int Defines which lines are fancy
878 * @since 1.0.0
879 */
880 function enable_line_numbers($flag, $nth_row = 5) {
881 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
882 && GESHI_FANCY_LINE_NUMBERS != $flag) {
883 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
884 }
885 $this->line_numbers = $flag;
886 $this->line_nth_row = $nth_row;
887 }
888
889 /**
890 * Sets wether spans and other HTML markup generated by GeSHi can
891 * span over multiple lines or not. Defaults to true to reduce overhead.
892 * Set it to false if you want to manipulate the output or manually display
893 * the code in an ordered list.
894 *
895 * @param boolean Wether multiline spans are allowed or not
896 * @since 1.0.7.22
897 */
898 function enable_multiline_span($flag) {
899 $this->allow_multiline_span = (bool) $flag;
900 }
901
902 /**
903 * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
904 *
905 * @see enable_multiline_span
906 * @return bool
907 */
908 function get_multiline_span() {
909 return $this->allow_multiline_span;
910 }
911
912 /**
913 * Sets the style for a keyword group. If $preserve_defaults is
914 * true, then styles are merged with the default styles, with the
915 * user defined styles having priority
916 *
917 * @param int The key of the keyword group to change the styles of
918 * @param string The style to make the keywords
919 * @param boolean Whether to merge the new styles with the old or just
920 * to overwrite them
921 * @since 1.0.0
922 */
923 function set_keyword_group_style($key, $style, $preserve_defaults = false) {
924 //Set the style for this keyword group
925 if (!$preserve_defaults) {
926 $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
927 } else {
928 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
929 }
930
931 //Update the lexic permissions
932 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
933 $this->lexic_permissions['KEYWORDS'][$key] = true;
934 }
935 }
936
937 /**
938 * Turns highlighting on/off for a keyword group
939 *
940 * @param int The key of the keyword group to turn on or off
941 * @param boolean Whether to turn highlighting for that group on or off
942 * @since 1.0.0
943 */
944 function set_keyword_group_highlighting($key, $flag = true) {
945 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
946 }
947
948 /**
949 * Sets the styles for comment groups. If $preserve_defaults is
950 * true, then styles are merged with the default styles, with the
951 * user defined styles having priority
952 *
953 * @param int The key of the comment group to change the styles of
954 * @param string The style to make the comments
955 * @param boolean Whether to merge the new styles with the old or just
956 * to overwrite them
957 * @since 1.0.0
958 */
959 function set_comments_style($key, $style, $preserve_defaults = false) {
960 if (!$preserve_defaults) {
961 $this->language_data['STYLES']['COMMENTS'][$key] = $style;
962 } else {
963 $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
964 }
965 }
966
967 /**
968 * Turns highlighting on/off for comment groups
969 *
970 * @param int The key of the comment group to turn on or off
971 * @param boolean Whether to turn highlighting for that group on or off
972 * @since 1.0.0
973 */
974 function set_comments_highlighting($key, $flag = true) {
975 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
976 }
977
978 /**
979 * Sets the styles for escaped characters. If $preserve_defaults is
980 * true, then styles are merged with the default styles, with the
981 * user defined styles having priority
982 *
983 * @param string The style to make the escape characters
984 * @param boolean Whether to merge the new styles with the old or just
985 * to overwrite them
986 * @since 1.0.0
987 */
988 function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
989 if (!$preserve_defaults) {
990 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
991 } else {
992 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
993 }
994 }
995
996 /**
997 * Turns highlighting on/off for escaped characters
998 *
999 * @param boolean Whether to turn highlighting for escape characters on or off
1000 * @since 1.0.0
1001 */
1002 function set_escape_characters_highlighting($flag = true) {
1003 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1004 }
1005
1006 /**
1007 * Sets the styles for brackets. If $preserve_defaults is
1008 * true, then styles are merged with the default styles, with the
1009 * user defined styles having priority
1010 *
1011 * This method is DEPRECATED: use set_symbols_style instead.
1012 * This method will be removed in 1.2.X
1013 *
1014 * @param string The style to make the brackets
1015 * @param boolean Whether to merge the new styles with the old or just
1016 * to overwrite them
1017 * @since 1.0.0
1018 * @deprecated In favour of set_symbols_style
1019 */
1020 function set_brackets_style($style, $preserve_defaults = false) {
1021 if (!$preserve_defaults) {
1022 $this->language_data['STYLES']['BRACKETS'][0] = $style;
1023 } else {
1024 $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1025 }
1026 }
1027
1028 /**
1029 * Turns highlighting on/off for brackets
1030 *
1031 * This method is DEPRECATED: use set_symbols_highlighting instead.
1032 * This method will be remove in 1.2.X
1033 *
1034 * @param boolean Whether to turn highlighting for brackets on or off
1035 * @since 1.0.0
1036 * @deprecated In favour of set_symbols_highlighting
1037 */
1038 function set_brackets_highlighting($flag) {
1039 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1040 }
1041
1042 /**
1043 * Sets the styles for symbols. If $preserve_defaults is
1044 * true, then styles are merged with the default styles, with the
1045 * user defined styles having priority
1046 *
1047 * @param string The style to make the symbols
1048 * @param boolean Whether to merge the new styles with the old or just
1049 * to overwrite them
1050 * @param int Tells the group of symbols for which style should be set.
1051 * @since 1.0.1
1052 */
1053 function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1054 // Update the style of symbols
1055 if (!$preserve_defaults) {
1056 $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1057 } else {
1058 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1059 }
1060
1061 // For backward compatibility
1062 if (0 == $group) {
1063 $this->set_brackets_style ($style, $preserve_defaults);
1064 }
1065 }
1066
1067 /**
1068 * Turns highlighting on/off for symbols
1069 *
1070 * @param boolean Whether to turn highlighting for symbols on or off
1071 * @since 1.0.0
1072 */
1073 function set_symbols_highlighting($flag) {
1074 // Update lexic permissions for this symbol group
1075 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1076
1077 // For backward compatibility
1078 $this->set_brackets_highlighting ($flag);
1079 }
1080
1081 /**
1082 * Sets the styles for strings. If $preserve_defaults is
1083 * true, then styles are merged with the default styles, with the
1084 * user defined styles having priority
1085 *
1086 * @param string The style to make the escape characters
1087 * @param boolean Whether to merge the new styles with the old or just
1088 * to overwrite them
1089 * @param int Tells the group of strings for which style should be set.
1090 * @since 1.0.0
1091 */
1092 function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1093 if (!$preserve_defaults) {
1094 $this->language_data['STYLES']['STRINGS'][$group] = $style;
1095 } else {
1096 $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1097 }
1098 }
1099
1100 /**
1101 * Turns highlighting on/off for strings
1102 *
1103 * @param boolean Whether to turn highlighting for strings on or off
1104 * @since 1.0.0
1105 */
1106 function set_strings_highlighting($flag) {
1107 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1108 }
1109
1110 /**
1111 * Sets the styles for strict code blocks. If $preserve_defaults is
1112 * true, then styles are merged with the default styles, with the
1113 * user defined styles having priority
1114 *
1115 * @param string The style to make the script blocks
1116 * @param boolean Whether to merge the new styles with the old or just
1117 * to overwrite them
1118 * @param int Tells the group of script blocks for which style should be set.
1119 * @since 1.0.8.4
1120 */
1121 function set_script_style($style, $preserve_defaults = false, $group = 0) {
1122 // Update the style of symbols
1123 if (!$preserve_defaults) {
1124 $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1125 } else {
1126 $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1127 }
1128 }
1129
1130 /**
1131 * Sets the styles for numbers. If $preserve_defaults is
1132 * true, then styles are merged with the default styles, with the
1133 * user defined styles having priority
1134 *
1135 * @param string The style to make the numbers
1136 * @param boolean Whether to merge the new styles with the old or just
1137 * to overwrite them
1138 * @param int Tells the group of numbers for which style should be set.
1139 * @since 1.0.0
1140 */
1141 function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1142 if (!$preserve_defaults) {
1143 $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1144 } else {
1145 $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1146 }
1147 }
1148
1149 /**
1150 * Turns highlighting on/off for numbers
1151 *
1152 * @param boolean Whether to turn highlighting for numbers on or off
1153 * @since 1.0.0
1154 */
1155 function set_numbers_highlighting($flag) {
1156 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1157 }
1158
1159 /**
1160 * Sets the styles for methods. $key is a number that references the
1161 * appropriate "object splitter" - see the language file for the language
1162 * you are highlighting to get this number. If $preserve_defaults is
1163 * true, then styles are merged with the default styles, with the
1164 * user defined styles having priority
1165 *
1166 * @param int The key of the object splitter to change the styles of
1167 * @param string The style to make the methods
1168 * @param boolean Whether to merge the new styles with the old or just
1169 * to overwrite them
1170 * @since 1.0.0
1171 */
1172 function set_methods_style($key, $style, $preserve_defaults = false) {
1173 if (!$preserve_defaults) {
1174 $this->language_data['STYLES']['METHODS'][$key] = $style;
1175 } else {
1176 $this->language_data['STYLES']['METHODS'][$key] .= $style;
1177 }
1178 }
1179
1180 /**
1181 * Turns highlighting on/off for methods
1182 *
1183 * @param boolean Whether to turn highlighting for methods on or off
1184 * @since 1.0.0
1185 */
1186 function set_methods_highlighting($flag) {
1187 $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1188 }
1189
1190 /**
1191 * Sets the styles for regexps. If $preserve_defaults is
1192 * true, then styles are merged with the default styles, with the
1193 * user defined styles having priority
1194 *
1195 * @param string The style to make the regular expression matches
1196 * @param boolean Whether to merge the new styles with the old or just
1197 * to overwrite them
1198 * @since 1.0.0
1199 */
1200 function set_regexps_style($key, $style, $preserve_defaults = false) {
1201 if (!$preserve_defaults) {
1202 $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1203 } else {
1204 $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1205 }
1206 }
1207
1208 /**
1209 * Turns highlighting on/off for regexps
1210 *
1211 * @param int The key of the regular expression group to turn on or off
1212 * @param boolean Whether to turn highlighting for the regular expression group on or off
1213 * @since 1.0.0
1214 */
1215 function set_regexps_highlighting($key, $flag) {
1216 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1217 }
1218
1219 /**
1220 * Sets whether a set of keywords are checked for in a case sensitive manner
1221 *
1222 * @param int The key of the keyword group to change the case sensitivity of
1223 * @param boolean Whether to check in a case sensitive manner or not
1224 * @since 1.0.0
1225 */
1226 function set_case_sensitivity($key, $case) {
1227 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1228 }
1229
1230 /**
1231 * Sets the case that keywords should use when found. Use the constants:
1232 *
1233 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1234 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1235 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1236 *
1237 * @param int A constant specifying what to do with matched keywords
1238 * @since 1.0.1
1239 */
1240 function set_case_keywords($case) {
1241 if (in_array($case, array(
1242 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1243 $this->language_data['CASE_KEYWORDS'] = $case;
1244 }
1245 }
1246
1247 /**
1248 * Sets how many spaces a tab is substituted for
1249 *
1250 * Widths below zero are ignored
1251 *
1252 * @param int The tab width
1253 * @since 1.0.0
1254 */
1255 function set_tab_width($width) {
1256 $this->tab_width = intval($width);
1257
1258 //Check if it fit's the constraints:
1259 if ($this->tab_width < 1) {
1260 //Return it to the default
1261 $this->tab_width = 8;
1262 }
1263 }
1264
1265 /**
1266 * Sets whether or not to use tab-stop width specifed by language
1267 *
1268 * @param boolean Whether to use language-specific tab-stop widths
1269 * @since 1.0.7.20
1270 */
1271 function set_use_language_tab_width($use) {
1272 $this->use_language_tab_width = (bool) $use;
1273 }
1274
1275 /**
1276 * Returns the tab width to use, based on the current language and user
1277 * preference
1278 *
1279 * @return int Tab width
1280 * @since 1.0.7.20
1281 */
1282 function get_real_tab_width() {
1283 if (!$this->use_language_tab_width ||
1284 !isset($this->language_data['TAB_WIDTH'])) {
1285 return $this->tab_width;
1286 } else {
1287 return $this->language_data['TAB_WIDTH'];
1288 }
1289 }
1290
1291 /**
1292 * Enables/disables strict highlighting. Default is off, calling this
1293 * method without parameters will turn it on. See documentation
1294 * for more details on strict mode and where to use it.
1295 *
1296 * @param boolean Whether to enable strict mode or not
1297 * @since 1.0.0
1298 */
1299 function enable_strict_mode($mode = true) {
1300 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1301 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1302 }
1303 }
1304
1305 /**
1306 * Disables all highlighting
1307 *
1308 * @since 1.0.0
1309 * @todo Rewrite with array traversal
1310 * @deprecated In favour of enable_highlighting
1311 */
1312 function disable_highlighting() {
1313 $this->enable_highlighting(false);
1314 }
1315
1316 /**
1317 * Enables all highlighting
1318 *
1319 * The optional flag parameter was added in version 1.0.7.21 and can be used
1320 * to enable (true) or disable (false) all highlighting.
1321 *
1322 * @since 1.0.0
1323 * @param boolean A flag specifying whether to enable or disable all highlighting
1324 * @todo Rewrite with array traversal
1325 */
1326 function enable_highlighting($flag = true) {
1327 $flag = $flag ? true : false;
1328 foreach ($this->lexic_permissions as $key => $value) {
1329 if (is_array($value)) {
1330 foreach ($value as $k => $v) {
1331 $this->lexic_permissions[$key][$k] = $flag;
1332 }
1333 } else {
1334 $this->lexic_permissions[$key] = $flag;
1335 }
1336 }
1337
1338 // Context blocks
1339 $this->enable_important_blocks = $flag;
1340 }
1341
1342 /**
1343 * Given a file extension, this method returns either a valid geshi language
1344 * name, or the empty string if it couldn't be found
1345 *
1346 * @param string The extension to get a language name for
1347 * @param array A lookup array to use instead of the default one
1348 * @since 1.0.5
1349 * @todo Re-think about how this method works (maybe make it private and/or make it
1350 * a extension->lang lookup?)
1351 * @todo static?
1352 */
1353 function get_language_name_from_extension( $extension, $lookup = array() ) {
1354 if ( !is_array($lookup) || empty($lookup)) {
1355 $lookup = array(
1356 'abap' => array('abap'),
1357 'actionscript' => array('as'),
1358 'ada' => array('a', 'ada', 'adb', 'ads'),
1359 'apache' => array('conf'),
1360 'asm' => array('ash', 'asm', 'inc'),
1361 'asp' => array('asp'),
1362 'bash' => array('sh'),
1363 'bf' => array('bf'),
1364 'c' => array('c', 'h'),
1365 'c_mac' => array('c', 'h'),
1366 'caddcl' => array(),
1367 'cadlisp' => array(),
1368 'cdfg' => array('cdfg'),
1369 'cobol' => array('cbl'),
1370 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1371 'csharp' => array('cs'),
1372 'css' => array('css'),
1373 'd' => array('d'),
1374 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1375 'diff' => array('diff', 'patch'),
1376 'dos' => array('bat', 'cmd'),
1377 'gdb' => array('kcrash', 'crash', 'bt'),
1378 'gettext' => array('po', 'pot'),
1379 'gml' => array('gml'),
1380 'gnuplot' => array('plt'),
1381 'groovy' => array('groovy'),
1382 'haskell' => array('hs'),
1383 'html4strict' => array('html', 'htm'),
1384 'ini' => array('ini', 'desktop'),
1385 'java' => array('java'),
1386 'javascript' => array('js'),
1387 'klonec' => array('kl1'),
1388 'klonecpp' => array('klx'),
1389 'latex' => array('tex'),
1390 'lisp' => array('lisp'),
1391 'lua' => array('lua'),
1392 'matlab' => array('m'),
1393 'mpasm' => array(),
1394 'mysql' => array('sql'),
1395 'nsis' => array(),
1396 'objc' => array(),
1397 'oobas' => array(),
1398 'oracle8' => array(),
1399 'oracle10' => array(),
1400 'pascal' => array('pas'),
1401 'perl' => array('pl', 'pm'),
1402 'php' => array('php', 'php5', 'phtml', 'phps'),
1403 'povray' => array('pov'),
1404 'providex' => array('pvc', 'pvx'),
1405 'prolog' => array('pl'),
1406 'python' => array('py'),
1407 'qbasic' => array('bi'),
1408 'reg' => array('reg'),
1409 'ruby' => array('rb'),
1410 'sas' => array('sas'),
1411 'scala' => array('scala'),
1412 'scheme' => array('scm'),
1413 'scilab' => array('sci'),
1414 'smalltalk' => array('st'),
1415 'smarty' => array(),
1416 'tcl' => array('tcl'),
1417 'vb' => array('bas'),
1418 'vbnet' => array(),
1419 'visualfoxpro' => array(),
1420 'whitespace' => array('ws'),
1421 'xml' => array('xml', 'svg', 'xrc'),
1422 'z80' => array('z80', 'asm', 'inc')
1423 );
1424 }
1425
1426 foreach ($lookup as $lang => $extensions) {
1427 if (in_array($extension, $extensions)) {
1428 return $lang;
1429 }
1430 }
1431 return '';
1432 }
1433
1434 /**
1435 * Given a file name, this method loads its contents in, and attempts
1436 * to set the language automatically. An optional lookup table can be
1437 * passed for looking up the language name. If not specified a default
1438 * table is used
1439 *
1440 * The language table is in the form
1441 * <pre>array(
1442 * 'lang_name' => array('extension', 'extension', ...),
1443 * 'lang_name' ...
1444 * );</pre>
1445 *
1446 * @param string The filename to load the source from
1447 * @param array A lookup array to use instead of the default one
1448 * @todo Complete rethink of this and above method
1449 * @since 1.0.5
1450 */
1451 function load_from_file($file_name, $lookup = array()) {
1452 if (is_readable($file_name)) {
1453 $this->set_source(file_get_contents($file_name));
1454 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1455 } else {
1456 $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1457 }
1458 }
1459
1460 /**
1461 * Adds a keyword to a keyword group for highlighting
1462 *
1463 * @param int The key of the keyword group to add the keyword to
1464 * @param string The word to add to the keyword group
1465 * @since 1.0.0
1466 */
1467 function add_keyword($key, $word) {
1468 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1469 $this->language_data['KEYWORDS'][$key][] = $word;
1470
1471 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1472 if ($this->parse_cache_built) {
1473 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1474 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1475 }
1476 }
1477 }
1478
1479 /**
1480 * Removes a keyword from a keyword group
1481 *
1482 * @param int The key of the keyword group to remove the keyword from
1483 * @param string The word to remove from the keyword group
1484 * @param bool Wether to automatically recompile the optimized regexp list or not.
1485 * Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1486 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1487 * or the removed keyword will stay in cache and still be highlighted! On the other hand
1488 * it might be too expensive to recompile the regexp list for every removal if you want to
1489 * remove a lot of keywords.
1490 * @since 1.0.0
1491 */
1492 function remove_keyword($key, $word, $recompile = true) {
1493 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1494 if ($key_to_remove !== false) {
1495 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1496
1497 //NEW in 1.0.8, optionally recompile keyword group
1498 if ($recompile && $this->parse_cache_built) {
1499 $this->optimize_keyword_group($key);
1500 }
1501 }
1502 }
1503
1504 /**
1505 * Creates a new keyword group
1506 *
1507 * @param int The key of the keyword group to create
1508 * @param string The styles for the keyword group
1509 * @param boolean Whether the keyword group is case sensitive ornot
1510 * @param array The words to use for the keyword group
1511 * @since 1.0.0
1512 */
1513 function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1514 $words = (array) $words;
1515 if (empty($words)) {
1516 // empty word lists mess up highlighting
1517 return false;
1518 }
1519
1520 //Add the new keyword group internally
1521 $this->language_data['KEYWORDS'][$key] = $words;
1522 $this->lexic_permissions['KEYWORDS'][$key] = true;
1523 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1524 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1525
1526 //NEW in 1.0.8, cache keyword regexp
1527 if ($this->parse_cache_built) {
1528 $this->optimize_keyword_group($key);
1529 }
1530 }
1531
1532 /**
1533 * Removes a keyword group
1534 *
1535 * @param int The key of the keyword group to remove
1536 * @since 1.0.0
1537 */
1538 function remove_keyword_group ($key) {
1539 //Remove the keyword group internally
1540 unset($this->language_data['KEYWORDS'][$key]);
1541 unset($this->lexic_permissions['KEYWORDS'][$key]);
1542 unset($this->language_data['CASE_SENSITIVE'][$key]);
1543 unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1544
1545 //NEW in 1.0.8
1546 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1547 }
1548
1549 /**
1550 * compile optimized regexp list for keyword group
1551 *
1552 * @param int The key of the keyword group to compile & optimize
1553 * @since 1.0.8
1554 */
1555 function optimize_keyword_group($key) {
1556 $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1557 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1558 $space_as_whitespace = false;
1559 if(isset($this->language_data['PARSER_CONTROL'])) {
1560 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1561 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1562 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1563 }
1564 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1565 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1566 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1567 }
1568 }
1569 }
1570 }
1571 if($space_as_whitespace) {
1572 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1573 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1574 str_replace(" ", "\\s+", $rxv);
1575 }
1576 }
1577 }
1578
1579 /**
1580 * Sets the content of the header block
1581 *
1582 * @param string The content of the header block
1583 * @since 1.0.2
1584 */
1585 function set_header_content($content) {
1586 $this->header_content = $content;
1587 }
1588
1589 /**
1590 * Sets the content of the footer block
1591 *
1592 * @param string The content of the footer block
1593 * @since 1.0.2
1594 */
1595 function set_footer_content($content) {
1596 $this->footer_content = $content;
1597 }
1598
1599 /**
1600 * Sets the style for the header content
1601 *
1602 * @param string The style for the header content
1603 * @since 1.0.2
1604 */
1605 function set_header_content_style($style) {
1606 $this->header_content_style = $style;
1607 }
1608
1609 /**
1610 * Sets the style for the footer content
1611 *
1612 * @param string The style for the footer content
1613 * @since 1.0.2
1614 */
1615 function set_footer_content_style($style) {
1616 $this->footer_content_style = $style;
1617 }
1618
1619 /**
1620 * Sets whether to force a surrounding block around
1621 * the highlighted code or not
1622 *
1623 * @param boolean Tells whether to enable or disable this feature
1624 * @since 1.0.7.20
1625 */
1626 function enable_inner_code_block($flag) {
1627 $this->force_code_block = (bool)$flag;
1628 }
1629
1630 /**
1631 * Sets the base URL to be used for keywords
1632 *
1633 * @param int The key of the keyword group to set the URL for
1634 * @param string The URL to set for the group. If {FNAME} is in
1635 * the url somewhere, it is replaced by the keyword
1636 * that the URL is being made for
1637 * @since 1.0.2
1638 */
1639 function set_url_for_keyword_group($group, $url) {
1640 $this->language_data['URLS'][$group] = $url;
1641 }
1642
1643 /**
1644 * Sets styles for links in code
1645 *
1646 * @param int A constant that specifies what state the style is being
1647 * set for - e.g. :hover or :visited
1648 * @param string The styles to use for that state
1649 * @since 1.0.2
1650 */
1651 function set_link_styles($type, $styles) {
1652 $this->link_styles[$type] = $styles;
1653 }
1654
1655 /**
1656 * Sets the target for links in code
1657 *
1658 * @param string The target for links in the code, e.g. _blank
1659 * @since 1.0.3
1660 */
1661 function set_link_target($target) {
1662 if (!$target) {
1663 $this->link_target = '';
1664 } else {
1665 $this->link_target = ' target="' . $target . '"';
1666 }
1667 }
1668
1669 /**
1670 * Sets styles for important parts of the code
1671 *
1672 * @param string The styles to use on important parts of the code
1673 * @since 1.0.2
1674 */
1675 function set_important_styles($styles) {
1676 $this->important_styles = $styles;
1677 }
1678
1679 /**
1680 * Sets whether context-important blocks are highlighted
1681 *
1682 * @param boolean Tells whether to enable or disable highlighting of important blocks
1683 * @todo REMOVE THIS SHIZ FROM GESHI!
1684 * @deprecated
1685 * @since 1.0.2
1686 */
1687 function enable_important_blocks($flag) {
1688 $this->enable_important_blocks = ( $flag ) ? true : false;
1689 }
1690
1691 /**
1692 * Whether CSS IDs should be added to each line
1693 *
1694 * @param boolean If true, IDs will be added to each line.
1695 * @since 1.0.2
1696 */
1697 function enable_ids($flag = true) {
1698 $this->add_ids = ($flag) ? true : false;
1699 }
1700
1701 /**
1702 * Specifies which lines to highlight extra
1703 *
1704 * The extra style parameter was added in 1.0.7.21.
1705 *
1706 * @param mixed An array of line numbers to highlight, or just a line
1707 * number on its own.
1708 * @param string A string specifying the style to use for this line.
1709 * If null is specified, the default style is used.
1710 * If false is specified, the line will be removed from
1711 * special highlighting
1712 * @since 1.0.2
1713 * @todo Some data replication here that could be cut down on
1714 */
1715 function highlight_lines_extra($lines, $style = null) {
1716 if (is_array($lines)) {
1717 //Split up the job using single lines at a time
1718 foreach ($lines as $line) {
1719 $this->highlight_lines_extra($line, $style);
1720 }
1721 } else {
1722 //Mark the line as being highlighted specially
1723 $lines = intval($lines);
1724 $this->highlight_extra_lines[$lines] = $lines;
1725
1726 //Decide on which style to use
1727 if ($style === null) { //Check if we should use default style
1728 unset($this->highlight_extra_lines_styles[$lines]);
1729 } else if ($style === false) { //Check if to remove this line
1730 unset($this->highlight_extra_lines[$lines]);
1731 unset($this->highlight_extra_lines_styles[$lines]);
1732 } else {
1733 $this->highlight_extra_lines_styles[$lines] = $style;
1734 }
1735 }
1736 }
1737
1738 /**
1739 * Sets the style for extra-highlighted lines
1740 *
1741 * @param string The style for extra-highlighted lines
1742 * @since 1.0.2
1743 */
1744 function set_highlight_lines_extra_style($styles) {
1745 $this->highlight_extra_lines_style = $styles;
1746 }
1747
1748 /**
1749 * Sets the line-ending
1750 *
1751 * @param string The new line-ending
1752 * @since 1.0.2
1753 */
1754 function set_line_ending($line_ending) {
1755 $this->line_ending = (string)$line_ending;
1756 }
1757
1758 /**
1759 * Sets what number line numbers should start at. Should
1760 * be a positive integer, and will be converted to one.
1761 *
1762 * <b>Warning:</b> Using this method will add the "start"
1763 * attribute to the &lt;ol&gt; that is used for line numbering.
1764 * This is <b>not</b> valid XHTML strict, so if that's what you
1765 * care about then don't use this method. Firefox is getting
1766 * support for the CSS method of doing this in 1.1 and Opera
1767 * has support for the CSS method, but (of course) IE doesn't
1768 * so it's not worth doing it the CSS way yet.
1769 *
1770 * @param int The number to start line numbers at
1771 * @since 1.0.2
1772 */
1773 function start_line_numbers_at($number) {
1774 $this->line_numbers_start = abs(intval($number));
1775 }
1776
1777 /**
1778 * Sets the encoding used for htmlspecialchars(), for international
1779 * support.
1780 *
1781 * NOTE: This is not needed for now because htmlspecialchars() is not
1782 * being used (it has a security hole in PHP4 that has not been patched).
1783 * Maybe in a future version it may make a return for speed reasons, but
1784 * I doubt it.
1785 *
1786 * @param string The encoding to use for the source
1787 * @since 1.0.3
1788 */
1789 function set_encoding($encoding) {
1790 if ($encoding) {
1791 $this->encoding = strtolower($encoding);
1792 }
1793 }
1794
1795 /**
1796 * Turns linking of keywords on or off.
1797 *
1798 * @param boolean If true, links will be added to keywords
1799 * @since 1.0.2
1800 */
1801 function enable_keyword_links($enable = true) {
1802 $this->keyword_links = (bool) $enable;
1803 }
1804
1805 /**
1806 * Setup caches needed for styling. This is automatically called in
1807 * parse_code() and get_stylesheet() when appropriate. This function helps
1808 * stylesheet generators as they rely on some style information being
1809 * preprocessed
1810 *
1811 * @since 1.0.8
1812 * @access private
1813 */
1814 function build_style_cache() {
1815 //Build the style cache needed to highlight numbers appropriate
1816 if($this->lexic_permissions['NUMBERS']) {
1817 //First check what way highlighting information for numbers are given
1818 if(!isset($this->language_data['NUMBERS'])) {
1819 $this->language_data['NUMBERS'] = 0;
1820 }
1821
1822 if(is_array($this->language_data['NUMBERS'])) {
1823 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1824 } else {
1825 $this->language_data['NUMBERS_CACHE'] = array();
1826 if(!$this->language_data['NUMBERS']) {
1827 $this->language_data['NUMBERS'] =
1828 GESHI_NUMBER_INT_BASIC |
1829 GESHI_NUMBER_FLT_NONSCI;
1830 }
1831
1832 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1833 //Rearrange style indices if required ...
1834 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1835 $this->language_data['STYLES']['NUMBERS'][$i] =
1836 $this->language_data['STYLES']['NUMBERS'][1<<$i];
1837 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1838 }
1839
1840 //Check if this bit is set for highlighting
1841 if($j&1) {
1842 //So this bit is set ...
1843 //Check if it belongs to group 0 or the actual stylegroup
1844 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1845 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1846 } else {
1847 if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1848 $this->language_data['NUMBERS_CACHE'][0] = 0;
1849 }
1850 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1851 }
1852 }
1853 }
1854 }
1855 }
1856 }
1857
1858 /**
1859 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1860 * This function makes stylesheet generators much faster as they do not need these caches.
1861 *
1862 * @since 1.0.8
1863 * @access private
1864 */
1865 function build_parse_cache() {
1866 // cache symbol regexp
1867 //As this is a costy operation, we avoid doing it for multiple groups ...
1868 //Instead we perform it for all symbols at once.
1869 //
1870 //For this to work, we need to reorganize the data arrays.
1871 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1872 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1873
1874 $this->language_data['SYMBOL_DATA'] = array();
1875 $symbol_preg_multi = array(); // multi char symbols
1876 $symbol_preg_single = array(); // single char symbols
1877 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1878 if (is_array($symbols)) {
1879 foreach ($symbols as $sym) {
1880 $sym = $this->hsc($sym);
1881 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1882 $this->language_data['SYMBOL_DATA'][$sym] = $key;
1883 if (isset($sym[1])) { // multiple chars
1884 $symbol_preg_multi[] = preg_quote($sym, '/');
1885 } else { // single char
1886 if ($sym == '-') {
1887 // don't trigger range out of order error
1888 $symbol_preg_single[] = '\-';
1889 } else {
1890 $symbol_preg_single[] = preg_quote($sym, '/');
1891 }
1892 }
1893 }
1894 }
1895 } else {
1896 $symbols = $this->hsc($symbols);
1897 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1898 $this->language_data['SYMBOL_DATA'][$symbols] = 0;
1899 if (isset($symbols[1])) { // multiple chars
1900 $symbol_preg_multi[] = preg_quote($symbols, '/');
1901 } else if ($symbols == '-') {
1902 // don't trigger range out of order error
1903 $symbol_preg_single[] = '\-';
1904 } else { // single char
1905 $symbol_preg_single[] = preg_quote($symbols, '/');
1906 }
1907 }
1908 }
1909 }
1910
1911 //Now we have an array with each possible symbol as the key and the style as the actual data.
1912 //This way we can set the correct style just the moment we highlight ...
1913 //
1914 //Now we need to rewrite our array to get a search string that
1915 $symbol_preg = array();
1916 if (!empty($symbol_preg_multi)) {
1917 rsort($symbol_preg_multi);
1918 $symbol_preg[] = implode('|', $symbol_preg_multi);
1919 }
1920 if (!empty($symbol_preg_single)) {
1921 rsort($symbol_preg_single);
1922 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
1923 }
1924 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
1925 }
1926
1927 // cache optimized regexp for keyword matching
1928 // remove old cache
1929 $this->language_data['CACHED_KEYWORD_LISTS'] = array();
1930 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
1931 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
1932 $this->lexic_permissions['KEYWORDS'][$key]) {
1933 $this->optimize_keyword_group($key);
1934 }
1935 }
1936
1937 // brackets
1938 if ($this->lexic_permissions['BRACKETS']) {
1939 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
1940 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
1941 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1942 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
1943 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
1944 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
1945 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
1946 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
1947 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
1948 );
1949 }
1950 else {
1951 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1952 '<| class="br0">&#91;|>',
1953 '<| class="br0">&#93;|>',
1954 '<| class="br0">&#40;|>',
1955 '<| class="br0">&#41;|>',
1956 '<| class="br0">&#123;|>',
1957 '<| class="br0">&#125;|>',
1958 );
1959 }
1960 }
1961
1962 //Build the parse cache needed to highlight numbers appropriate
1963 if($this->lexic_permissions['NUMBERS']) {
1964 //Check if the style rearrangements have been processed ...
1965 //This also does some preprocessing to check which style groups are useable ...
1966 if(!isset($this->language_data['NUMBERS_CACHE'])) {
1967 $this->build_style_cache();
1968 }
1969
1970 //Number format specification
1971 //All this formats are matched case-insensitively!
1972 static $numbers_format = array(
1973 GESHI_NUMBER_INT_BASIC =>
1974 '(?:(?<![0-9a-z_\.%])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1975 GESHI_NUMBER_INT_CSTYLE =>
1976 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1977 GESHI_NUMBER_BIN_SUFFIX =>
1978 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1979 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
1980 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1981 GESHI_NUMBER_BIN_PREFIX_0B =>
1982 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1983 GESHI_NUMBER_OCT_PREFIX =>
1984 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1985 GESHI_NUMBER_OCT_PREFIX_0O =>
1986 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1987 GESHI_NUMBER_OCT_SUFFIX =>
1988 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1989 GESHI_NUMBER_HEX_PREFIX =>
1990 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1991 GESHI_NUMBER_HEX_SUFFIX =>
1992 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1993 GESHI_NUMBER_FLT_NONSCI =>
1994 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1995 GESHI_NUMBER_FLT_NONSCI_F =>
1996 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1997 GESHI_NUMBER_FLT_SCI_SHORT =>
1998 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1999 GESHI_NUMBER_FLT_SCI_ZERO =>
2000 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2001 );
2002
2003 //At this step we have an associative array with flag groups for a
2004 //specific style or an string denoting a regexp given its index.
2005 $this->language_data['NUMBERS_RXCACHE'] = array();
2006 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2007 if(is_string($rxdata)) {
2008 $regexp = $rxdata;
2009 } else {
2010 //This is a bitfield of number flags to highlight:
2011 //Build an array, implode them together and make this the actual RX
2012 $rxuse = array();
2013 for($i = 1; $i <= $rxdata; $i<<=1) {
2014 if($rxdata & $i) {
2015 $rxuse[] = $numbers_format[$i];
2016 }
2017 }
2018 $regexp = implode("|", $rxuse);
2019 }
2020
2021 $this->language_data['NUMBERS_RXCACHE'][$key] =
2022 "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
2023 }
2024 }
2025
2026 $this->parse_cache_built = true;
2027 }
2028
2029 /**
2030 * Returns the code in $this->source, highlighted and surrounded by the
2031 * nessecary HTML.
2032 *
2033 * This should only be called ONCE, cos it's SLOW! If you want to highlight
2034 * the same source multiple times, you're better off doing a whole lot of
2035 * str_replaces to replace the &lt;span&gt;s
2036 *
2037 * @since 1.0.0
2038 */
2039 function parse_code () {
2040 // Start the timer
2041 $start_time = microtime();
2042
2043 // Replace all newlines to a common form.
2044 $code = str_replace("\r\n", "\n", $this->source);
2045 $code = str_replace("\r", "\n", $code);
2046
2047 // Firstly, if there is an error, we won't highlight
2048 if ($this->error) {
2049 //Escape the source for output
2050 $result = $this->hsc($this->source);
2051
2052 //This fix is related to SF#1923020, but has to be applied regardless of
2053 //actually highlighting symbols.
2054 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2055
2056 // Timing is irrelevant
2057 $this->set_time($start_time, $start_time);
2058 $this->finalise($result);
2059 return $result;
2060 }
2061
2062 // make sure the parse cache is up2date
2063 if (!$this->parse_cache_built) {
2064 $this->build_parse_cache();
2065 }
2066
2067 // Initialise various stuff
2068 $length = strlen($code);
2069 $COMMENT_MATCHED = false;
2070 $stuff_to_parse = '';
2071 $endresult = '';
2072
2073 // "Important" selections are handled like multiline comments
2074 // @todo GET RID OF THIS SHIZ
2075 if ($this->enable_important_blocks) {
2076 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2077 }
2078
2079 if ($this->strict_mode) {
2080 // Break the source into bits. Each bit will be a portion of the code
2081 // within script delimiters - for example, HTML between < and >
2082 $k = 0;
2083 $parts = array();
2084 $matches = array();
2085 $next_match_pointer = null;
2086 // we use a copy to unset delimiters on demand (when they are not found)
2087 $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2088 $i = 0;
2089 while ($i < $length) {
2090 $next_match_pos = $length + 1; // never true
2091 foreach ($delim_copy as $dk => $delimiters) {
2092 if(is_array($delimiters)) {
2093 foreach ($delimiters as $open => $close) {
2094 // make sure the cache is setup properly
2095 if (!isset($matches[$dk][$open])) {
2096 $matches[$dk][$open] = array(
2097 'next_match' => -1,
2098 'dk' => $dk,
2099
2100 'open' => $open, // needed for grouping of adjacent code blocks (see below)
2101 'open_strlen' => strlen($open),
2102
2103 'close' => $close,
2104 'close_strlen' => strlen($close),
2105 );
2106 }
2107 // Get the next little bit for this opening string
2108 if ($matches[$dk][$open]['next_match'] < $i) {
2109 // only find the next pos if it was not already cached
2110 $open_pos = strpos($code, $open, $i);
2111 if ($open_pos === false) {
2112 // no match for this delimiter ever
2113 unset($delim_copy[$dk][$open]);
2114 continue;
2115 }
2116 $matches[$dk][$open]['next_match'] = $open_pos;
2117 }
2118 if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2119 //So we got a new match, update the close_pos
2120 $matches[$dk][$open]['close_pos'] =
2121 strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2122
2123 $next_match_pointer =& $matches[$dk][$open];
2124 $next_match_pos = $matches[$dk][$open]['next_match'];
2125 }
2126 }
2127 } else {
2128 //So we should match an RegExp as Strict Block ...
2129 /**
2130 * The value in $delimiters is expected to be an RegExp
2131 * containing exactly 2 matching groups:
2132 * - Group 1 is the opener
2133 * - Group 2 is the closer
2134 */
2135 if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2136 preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2137 //We got a match ...
2138 if(isset($matches_rx['start']) && isset($matches_rx['end']))
2139 {
2140 $matches[$dk] = array(
2141 'next_match' => $matches_rx['start'][1],
2142 'dk' => $dk,
2143
2144 'close_strlen' => strlen($matches_rx['end'][0]),
2145 'close_pos' => $matches_rx['end'][1],
2146 );
2147 } else {
2148 $matches[$dk] = array(
2149 'next_match' => $matches_rx[1][1],
2150 'dk' => $dk,
2151
2152 'close_strlen' => strlen($matches_rx[2][0]),
2153 'close_pos' => $matches_rx[2][1],
2154 );
2155 }
2156 } else {
2157 // no match for this delimiter ever
2158 unset($delim_copy[$dk]);
2159 continue;
2160 }
2161
2162 if ($matches[$dk]['next_match'] <= $next_match_pos) {
2163 $next_match_pointer =& $matches[$dk];
2164 $next_match_pos = $matches[$dk]['next_match'];
2165 }
2166 }
2167 }
2168
2169 // non-highlightable text
2170 $parts[$k] = array(
2171 1 => substr($code, $i, $next_match_pos - $i)
2172 );
2173 ++$k;
2174
2175 if ($next_match_pos > $length) {
2176 // out of bounds means no next match was found
2177 break;
2178 }
2179
2180 // highlightable code
2181 $parts[$k][0] = $next_match_pointer['dk'];
2182
2183 //Only combine for non-rx script blocks
2184 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2185 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2186 $i = $next_match_pos + $next_match_pointer['open_strlen'];
2187 while (true) {
2188 $close_pos = strpos($code, $next_match_pointer['close'], $i);
2189 if ($close_pos == false) {
2190 break;
2191 }
2192 $i = $close_pos + $next_match_pointer['close_strlen'];
2193 if ($i == $length) {
2194 break;
2195 }
2196 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2197 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2198 // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2199 foreach ($matches as $submatches) {
2200 foreach ($submatches as $match) {
2201 if ($match['next_match'] == $i) {
2202 // a different block already matches here!
2203 break 3;
2204 }
2205 }
2206 }
2207 } else {
2208 break;
2209 }
2210 }
2211 } else {
2212 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2213 $i = $close_pos;
2214 }
2215
2216 if ($close_pos === false) {
2217 // no closing delimiter found!
2218 $parts[$k][1] = substr($code, $next_match_pos);
2219 ++$k;
2220 break;
2221 } else {
2222 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2223 ++$k;
2224 }
2225 }
2226 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2227 $num_parts = $k;
2228
2229 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2230 // when we have only one part, we don't have anything to highlight at all.
2231 // if we have a "maybe" strict language, this should be handled as highlightable code
2232 $parts = array(
2233 0 => array(
2234 0 => '',
2235 1 => ''
2236 ),
2237 1 => array(
2238 0 => null,
2239 1 => $parts[0][1]
2240 )
2241 );
2242 $num_parts = 2;
2243 }
2244
2245 } else {
2246 // Not strict mode - simply dump the source into
2247 // the array at index 1 (the first highlightable block)
2248 $parts = array(
2249 0 => array(
2250 0 => '',
2251 1 => ''
2252 ),
2253 1 => array(
2254 0 => null,
2255 1 => $code
2256 )
2257 );
2258 $num_parts = 2;
2259 }
2260
2261 //Unset variables we won't need any longer
2262 unset($code);
2263
2264 //Preload some repeatedly used values regarding hardquotes ...
2265 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2266 $hq_strlen = strlen($hq);
2267
2268 //Preload if line numbers are to be generated afterwards
2269 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2270 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2271 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2272
2273 //preload the escape char for faster checking ...
2274 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2275
2276 // this is used for single-line comments
2277 $sc_disallowed_before = "";
2278 $sc_disallowed_after = "";
2279
2280 if (isset($this->language_data['PARSER_CONTROL'])) {
2281 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2282 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2283 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2284 }
2285 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2286 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2287 }
2288 }
2289 }
2290
2291 //Fix for SF#1932083: Multichar Quotemarks unsupported
2292 $is_string_starter = array();
2293 if ($this->lexic_permissions['STRINGS']) {
2294 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2295 if (!isset($is_string_starter[$quotemark[0]])) {
2296 $is_string_starter[$quotemark[0]] = (string)$quotemark;
2297 } else if (is_string($is_string_starter[$quotemark[0]])) {
2298 $is_string_starter[$quotemark[0]] = array(
2299 $is_string_starter[$quotemark[0]],
2300 $quotemark);
2301 } else {
2302 $is_string_starter[$quotemark[0]][] = $quotemark;
2303 }
2304 }
2305 }
2306
2307 // Now we go through each part. We know that even-indexed parts are
2308 // code that shouldn't be highlighted, and odd-indexed parts should
2309 // be highlighted
2310 for ($key = 0; $key < $num_parts; ++$key) {
2311 $STRICTATTRS = '';
2312
2313 // If this block should be highlighted...
2314 if (!($key & 1)) {
2315 // Else not a block to highlight
2316 $endresult .= $this->hsc($parts[$key][1]);
2317 unset($parts[$key]);
2318 continue;
2319 }
2320
2321 $result = '';
2322 $part = $parts[$key][1];
2323
2324 $highlight_part = true;
2325 if ($this->strict_mode && !is_null($parts[$key][0])) {
2326 // get the class key for this block of code
2327 $script_key = $parts[$key][0];
2328 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2329 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2330 $this->lexic_permissions['SCRIPT']) {
2331 // Add a span element around the source to
2332 // highlight the overall source block
2333 if (!$this->use_classes &&
2334 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2335 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2336 } else {
2337 $attributes = ' class="sc' . $script_key . '"';
2338 }
2339 $result .= "<span$attributes>";
2340 $STRICTATTRS = $attributes;
2341 }
2342 }
2343
2344 if ($highlight_part) {
2345 // Now, highlight the code in this block. This code
2346 // is really the engine of GeSHi (along with the method
2347 // parse_non_string_part).
2348
2349 // cache comment regexps incrementally
2350 $next_comment_regexp_key = '';
2351 $next_comment_regexp_pos = -1;
2352 $next_comment_multi_pos = -1;
2353 $next_comment_single_pos = -1;
2354 $comment_regexp_cache_per_key = array();
2355 $comment_multi_cache_per_key = array();
2356 $comment_single_cache_per_key = array();
2357 $next_open_comment_multi = '';
2358 $next_comment_single_key = '';
2359 $escape_regexp_cache_per_key = array();
2360 $next_escape_regexp_key = '';
2361 $next_escape_regexp_pos = -1;
2362
2363 $length = strlen($part);
2364 for ($i = 0; $i < $length; ++$i) {
2365 // Get the next char
2366 $char = $part[$i];
2367 $char_len = 1;
2368
2369 // update regexp comment cache if needed
2370 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2371 $next_comment_regexp_pos = $length;
2372 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2373 $match_i = false;
2374 if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2375 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2376 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2377 // we have already matched something
2378 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2379 // this comment is never matched
2380 continue;
2381 }
2382 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2383 } else if (
2384 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2385 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2386 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2387 ) {
2388 $match_i = $match[0][1];
2389 if (GESHI_PHP_PRE_433) {
2390 $match_i += $i;
2391 }
2392
2393 $comment_regexp_cache_per_key[$comment_key] = array(
2394 'key' => $comment_key,
2395 'length' => strlen($match[0][0]),
2396 'pos' => $match_i
2397 );
2398 } else {
2399 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2400 continue;
2401 }
2402
2403 if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2404 $next_comment_regexp_pos = $match_i;
2405 $next_comment_regexp_key = $comment_key;
2406 if ($match_i === $i) {
2407 break;
2408 }
2409 }
2410 }
2411 }
2412
2413 $string_started = false;
2414
2415 if (isset($is_string_starter[$char])) {
2416 // Possibly the start of a new string ...
2417
2418 //Check which starter it was ...
2419 //Fix for SF#1932083: Multichar Quotemarks unsupported
2420 if (is_array($is_string_starter[$char])) {
2421 $char_new = '';
2422 foreach ($is_string_starter[$char] as $testchar) {
2423 if ($testchar === substr($part, $i, strlen($testchar)) &&
2424 strlen($testchar) > strlen($char_new)) {
2425 $char_new = $testchar;
2426 $string_started = true;
2427 }
2428 }
2429 if ($string_started) {
2430 $char = $char_new;
2431 }
2432 } else {
2433 $testchar = $is_string_starter[$char];
2434 if ($testchar === substr($part, $i, strlen($testchar))) {
2435 $char = $testchar;
2436 $string_started = true;
2437 }
2438 }
2439 $char_len = strlen($char);
2440 }
2441
2442 if ($string_started && ($i != $next_comment_regexp_pos)) {
2443 // Hand out the correct style information for this string
2444 $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2445 if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2446 !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2447 $string_key = 0;
2448 }
2449
2450 // parse the stuff before this
2451 $result .= $this->parse_non_string_part($stuff_to_parse);
2452 $stuff_to_parse = '';
2453
2454 if (!$this->use_classes) {
2455 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2456 } else {
2457 $string_attributes = ' class="st'.$string_key.'"';
2458 }
2459
2460 // now handle the string
2461 $string = "<span$string_attributes>" . GeSHi::hsc($char);
2462 $start = $i + $char_len;
2463 $string_open = true;
2464
2465 if(empty($this->language_data['ESCAPE_REGEXP'])) {
2466 $next_escape_regexp_pos = $length;
2467 }
2468
2469 do {
2470 //Get the regular ending pos ...
2471 $close_pos = strpos($part, $char, $start);
2472 if(false === $close_pos) {
2473 $close_pos = $length;
2474 }
2475
2476 if($this->lexic_permissions['ESCAPE_CHAR']) {
2477 // update escape regexp cache if needed
2478 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2479 $next_escape_regexp_pos = $length;
2480 foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2481 $match_i = false;
2482 if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2483 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2484 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2485 // we have already matched something
2486 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2487 // this comment is never matched
2488 continue;
2489 }
2490 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2491 } else if (
2492 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2493 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2494 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2495 ) {
2496 $match_i = $match[0][1];
2497 if (GESHI_PHP_PRE_433) {
2498 $match_i += $start;
2499 }
2500
2501 $escape_regexp_cache_per_key[$escape_key] = array(
2502 'key' => $escape_key,
2503 'length' => strlen($match[0][0]),
2504 'pos' => $match_i
2505 );
2506 } else {
2507 $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2508 continue;
2509 }
2510
2511 if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2512 $next_escape_regexp_pos = $match_i;
2513 $next_escape_regexp_key = $escape_key;
2514 if ($match_i === $start) {
2515 break;
2516 }
2517 }
2518 }
2519 }
2520
2521 //Find the next simple escape position
2522 if('' != $this->language_data['ESCAPE_CHAR']) {
2523 $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2524 if(false === $simple_escape) {
2525 $simple_escape = $length;
2526 }
2527 } else {
2528 $simple_escape = $length;
2529 }
2530 } else {
2531 $next_escape_regexp_pos = $length;
2532 $simple_escape = $length;
2533 }
2534
2535 if($simple_escape < $next_escape_regexp_pos &&
2536 $simple_escape < $length &&
2537 $simple_escape < $close_pos) {
2538 //The nexxt escape sequence is a simple one ...
2539 $es_pos = $simple_escape;
2540
2541 //Add the stuff not in the string yet ...
2542 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2543
2544 //Get the style for this escaped char ...
2545 if (!$this->use_classes) {
2546 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2547 } else {
2548 $escape_char_attributes = ' class="es0"';
2549 }
2550
2551 //Add the style for the escape char ...
2552 $string .= "<span$escape_char_attributes>" .
2553 GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2554
2555 //Get the byte AFTER the ESCAPE_CHAR we just found
2556 $es_char = $part[$es_pos + 1];
2557 if ($es_char == "\n") {
2558 // don't put a newline around newlines
2559 $string .= "</span>\n";
2560 $start = $es_pos + 2;
2561 } else if (ord($es_char) >= 128) {
2562 //This is an non-ASCII char (UTF8 or single byte)
2563 //This code tries to work around SF#2037598 ...
2564 if(function_exists('mb_substr')) {
2565 $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2566 $string .= $es_char_m . '</span>';
2567 } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2568 if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2569 "|\xE0[\xA0-\xBF][\x80-\xBF]".
2570 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2571 "|\xED[\x80-\x9F][\x80-\xBF]".
2572 "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2573 "|[\xF1-\xF3][\x80-\xBF]{3}".
2574 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2575 $part, $es_char_m, null, $es_pos + 1)) {
2576 $es_char_m = $es_char_m[0];
2577 } else {
2578 $es_char_m = $es_char;
2579 }
2580 $string .= $this->hsc($es_char_m) . '</span>';
2581 } else {
2582 $es_char_m = $this->hsc($es_char);
2583 }
2584 $start = $es_pos + strlen($es_char_m) + 1;
2585 } else {
2586 $string .= $this->hsc($es_char) . '</span>';
2587 $start = $es_pos + 2;
2588 }
2589 } else if ($next_escape_regexp_pos < $length &&
2590 $next_escape_regexp_pos < $close_pos) {
2591 $es_pos = $next_escape_regexp_pos;
2592 //Add the stuff not in the string yet ...
2593 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2594
2595 //Get the key and length of this match ...
2596 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2597 $escape_str = substr($part, $es_pos, $escape['length']);
2598 $escape_key = $escape['key'];
2599
2600 //Get the style for this escaped char ...
2601 if (!$this->use_classes) {
2602 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2603 } else {
2604 $escape_char_attributes = ' class="es' . $escape_key . '"';
2605 }
2606
2607 //Add the style for the escape char ...
2608 $string .= "<span$escape_char_attributes>" .
2609 $this->hsc($escape_str) . '</span>';
2610
2611 $start = $es_pos + $escape['length'];
2612 } else {
2613 //Copy the remainder of the string ...
2614 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2615 $start = $close_pos + $char_len;
2616 $string_open = false;
2617 }
2618 } while($string_open);
2619
2620 if ($check_linenumbers) {
2621 // Are line numbers used? If, we should end the string before
2622 // the newline and begin it again (so when <li>s are put in the source
2623 // remains XHTML compliant)
2624 // note to self: This opens up possibility of config files specifying
2625 // that languages can/cannot have multiline strings???
2626 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2627 }
2628
2629 $result .= $string;
2630 $string = '';
2631 $i = $start - 1;
2632 continue;
2633 } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2634 substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2635 // The start of a hard quoted string
2636 if (!$this->use_classes) {
2637 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2638 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2639 } else {
2640 $string_attributes = ' class="st_h"';
2641 $escape_char_attributes = ' class="es_h"';
2642 }
2643 // parse the stuff before this
2644 $result .= $this->parse_non_string_part($stuff_to_parse);
2645 $stuff_to_parse = '';
2646
2647 // now handle the string
2648 $string = '';
2649
2650 // look for closing quote
2651 $start = $i + $hq_strlen;
2652 while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2653 $start = $close_pos + 1;
2654 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2655 (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2656 // make sure this quote is not escaped
2657 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2658 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2659 // check wether this quote is escaped or if it is something like '\\'
2660 $escape_char_pos = $close_pos - 1;
2661 while ($escape_char_pos > 0
2662 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2663 --$escape_char_pos;
2664 }
2665 if (($close_pos - $escape_char_pos) & 1) {
2666 // uneven number of escape chars => this quote is escaped
2667 continue 2;
2668 }
2669 }
2670 }
2671 }
2672
2673 // found closing quote
2674 break;
2675 }
2676
2677 //Found the closing delimiter?
2678 if (!$close_pos) {
2679 // span till the end of this $part when no closing delimiter is found
2680 $close_pos = $length;
2681 }
2682
2683 //Get the actual string
2684 $string = substr($part, $i, $close_pos - $i + 1);
2685 $i = $close_pos;
2686
2687 // handle escape chars and encode html chars
2688 // (special because when we have escape chars within our string they may not be escaped)
2689 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2690 $start = 0;
2691 $new_string = '';
2692 while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2693 // hmtl escape stuff before
2694 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2695 // check if this is a hard escape
2696 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2697 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2698 // indeed, this is a hardescape
2699 $new_string .= "<span$escape_char_attributes>" .
2700 $this->hsc($hardescape) . '</span>';
2701 $start = $es_pos + strlen($hardescape);
2702 continue 2;
2703 }
2704 }
2705 // not a hard escape, but a normal escape
2706 // they come in pairs of two
2707 $c = 0;
2708 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2709 && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2710 && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2711 $c += 2;
2712 }
2713 if ($c) {
2714 $new_string .= "<span$escape_char_attributes>" .
2715 str_repeat($escaped_escape_char, $c) .
2716 '</span>';
2717 $start = $es_pos + $c;
2718 } else {
2719 // this is just a single lonely escape char...
2720 $new_string .= $escaped_escape_char;
2721 $start = $es_pos + 1;
2722 }
2723 }
2724 $string = $new_string . $this->hsc(substr($string, $start));
2725 } else {
2726 $string = $this->hsc($string);
2727 }
2728
2729 if ($check_linenumbers) {
2730 // Are line numbers used? If, we should end the string before
2731 // the newline and begin it again (so when <li>s are put in the source
2732 // remains XHTML compliant)
2733 // note to self: This opens up possibility of config files specifying
2734 // that languages can/cannot have multiline strings???
2735 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2736 }
2737
2738 $result .= "<span$string_attributes>" . $string . '</span>';
2739 $string = '';
2740 continue;
2741 } else {
2742 //Have a look for regexp comments
2743 if ($i == $next_comment_regexp_pos) {
2744 $COMMENT_MATCHED = true;
2745 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2746 $test_str = $this->hsc(substr($part, $i, $comment['length']));
2747
2748 //@todo If remove important do remove here
2749 if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2750 if (!$this->use_classes) {
2751 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2752 } else {
2753 $attributes = ' class="co' . $comment['key'] . '"';
2754 }
2755
2756 $test_str = "<span$attributes>" . $test_str . "</span>";
2757
2758 // Short-cut through all the multiline code
2759 if ($check_linenumbers) {
2760 // strreplace to put close span and open span around multiline newlines
2761 $test_str = str_replace(
2762 "\n", "</span>\n<span$attributes>",
2763 str_replace("\n ", "\n&nbsp;", $test_str)
2764 );
2765 }
2766 }
2767
2768 $i += $comment['length'] - 1;
2769
2770 // parse the rest
2771 $result .= $this->parse_non_string_part($stuff_to_parse);
2772 $stuff_to_parse = '';
2773 }
2774
2775 // If we haven't matched a regexp comment, try multi-line comments
2776 if (!$COMMENT_MATCHED) {
2777 // Is this a multiline comment?
2778 if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2779 $next_comment_multi_pos = $length;
2780 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2781 $match_i = false;
2782 if (isset($comment_multi_cache_per_key[$open]) &&
2783 ($comment_multi_cache_per_key[$open] >= $i ||
2784 $comment_multi_cache_per_key[$open] === false)) {
2785 // we have already matched something
2786 if ($comment_multi_cache_per_key[$open] === false) {
2787 // this comment is never matched
2788 continue;
2789 }
2790 $match_i = $comment_multi_cache_per_key[$open];
2791 } else if (($match_i = stripos($part, $open, $i)) !== false) {
2792 $comment_multi_cache_per_key[$open] = $match_i;
2793 } else {
2794 $comment_multi_cache_per_key[$open] = false;
2795 continue;
2796 }
2797 if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2798 $next_comment_multi_pos = $match_i;
2799 $next_open_comment_multi = $open;
2800 if ($match_i === $i) {
2801 break;
2802 }
2803 }
2804 }
2805 }
2806 if ($i == $next_comment_multi_pos) {
2807 $open = $next_open_comment_multi;
2808 $close = $this->language_data['COMMENT_MULTI'][$open];
2809 $open_strlen = strlen($open);
2810 $close_strlen = strlen($close);
2811 $COMMENT_MATCHED = true;
2812 $test_str_match = $open;
2813 //@todo If remove important do remove here
2814 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2815 $open == GESHI_START_IMPORTANT) {
2816 if ($open != GESHI_START_IMPORTANT) {
2817 if (!$this->use_classes) {
2818 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2819 } else {
2820 $attributes = ' class="coMULTI"';
2821 }
2822 $test_str = "<span$attributes>" . $this->hsc($open);
2823 } else {
2824 if (!$this->use_classes) {
2825 $attributes = ' style="' . $this->important_styles . '"';
2826 } else {
2827 $attributes = ' class="imp"';
2828 }
2829
2830 // We don't include the start of the comment if it's an
2831 // "important" part
2832 $test_str = "<span$attributes>";
2833 }
2834 } else {
2835 $test_str = $this->hsc($open);
2836 }
2837
2838 $close_pos = strpos( $part, $close, $i + $open_strlen );
2839
2840 if ($close_pos === false) {
2841 $close_pos = $length;
2842 }
2843
2844 // Short-cut through all the multiline code
2845 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2846 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2847 $test_str_match == GESHI_START_IMPORTANT) &&
2848 $check_linenumbers) {
2849
2850 // strreplace to put close span and open span around multiline newlines
2851 $test_str .= str_replace(
2852 "\n", "</span>\n<span$attributes>",
2853 str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2854 );
2855 } else {
2856 $test_str .= $rest_of_comment;
2857 }
2858
2859 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2860 $test_str_match == GESHI_START_IMPORTANT) {
2861 $test_str .= '</span>';
2862 }
2863
2864 $i = $close_pos + $close_strlen - 1;
2865
2866 // parse the rest
2867 $result .= $this->parse_non_string_part($stuff_to_parse);
2868 $stuff_to_parse = '';
2869 }
2870 }
2871
2872 // If we haven't matched a multiline comment, try single-line comments
2873 if (!$COMMENT_MATCHED) {
2874 // cache potential single line comment occurances
2875 if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2876 $next_comment_single_pos = $length;
2877 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2878 $match_i = false;
2879 if (isset($comment_single_cache_per_key[$comment_key]) &&
2880 ($comment_single_cache_per_key[$comment_key] >= $i ||
2881 $comment_single_cache_per_key[$comment_key] === false)) {
2882 // we have already matched something
2883 if ($comment_single_cache_per_key[$comment_key] === false) {
2884 // this comment is never matched
2885 continue;
2886 }
2887 $match_i = $comment_single_cache_per_key[$comment_key];
2888 } else if (
2889 // case sensitive comments
2890 ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2891 ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
2892 // non case sensitive
2893 (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2894 (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
2895 $comment_single_cache_per_key[$comment_key] = $match_i;
2896 } else {
2897 $comment_single_cache_per_key[$comment_key] = false;
2898 continue;
2899 }
2900 if ($match_i !== false && $match_i < $next_comment_single_pos) {
2901 $next_comment_single_pos = $match_i;
2902 $next_comment_single_key = $comment_key;
2903 if ($match_i === $i) {
2904 break;
2905 }
2906 }
2907 }
2908 }
2909 if ($next_comment_single_pos == $i) {
2910 $comment_key = $next_comment_single_key;
2911 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
2912 $com_len = strlen($comment_mark);
2913
2914 // This check will find special variables like $# in bash
2915 // or compiler directives of Delphi beginning {$
2916 if ((empty($sc_disallowed_before) || ($i == 0) ||
2917 (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
2918 (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
2919 (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
2920 {
2921 // this is a valid comment
2922 $COMMENT_MATCHED = true;
2923 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2924 if (!$this->use_classes) {
2925 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
2926 } else {
2927 $attributes = ' class="co' . $comment_key . '"';
2928 }
2929 $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
2930 } else {
2931 $test_str = $this->hsc($comment_mark);
2932 }
2933
2934 //Check if this comment is the last in the source
2935 $close_pos = strpos($part, "\n", $i);
2936 $oops = false;
2937 if ($close_pos === false) {
2938 $close_pos = $length;
2939 $oops = true;
2940 }
2941 $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
2942 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2943 $test_str .= "</span>";
2944 }
2945
2946 // Take into account that the comment might be the last in the source
2947 if (!$oops) {
2948 $test_str .= "\n";
2949 }
2950
2951 $i = $close_pos;
2952
2953 // parse the rest
2954 $result .= $this->parse_non_string_part($stuff_to_parse);
2955 $stuff_to_parse = '';
2956 }
2957 }
2958 }
2959 }
2960
2961 // Where are we adding this char?
2962 if (!$COMMENT_MATCHED) {
2963 $stuff_to_parse .= $char;
2964 } else {
2965 $result .= $test_str;
2966 unset($test_str);
2967 $COMMENT_MATCHED = false;
2968 }
2969 }
2970 // Parse the last bit
2971 $result .= $this->parse_non_string_part($stuff_to_parse);
2972 $stuff_to_parse = '';
2973 } else {
2974 $result .= $this->hsc($part);
2975 }
2976 // Close the <span> that surrounds the block
2977 if ($STRICTATTRS != '') {
2978 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
2979 $result .= '</span>';
2980 }
2981
2982 $endresult .= $result;
2983 unset($part, $parts[$key], $result);
2984 }
2985
2986 //This fix is related to SF#1923020, but has to be applied regardless of
2987 //actually highlighting symbols.
2988 /** NOTE: memorypeak #3 */
2989 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
2990
2991// // Parse the last stuff (redundant?)
2992// $result .= $this->parse_non_string_part($stuff_to_parse);
2993
2994 // Lop off the very first and last spaces
2995// $result = substr($result, 1, -1);
2996
2997 // We're finished: stop timing
2998 $this->set_time($start_time, microtime());
2999
3000 $this->finalise($endresult);
3001 return $endresult;
3002 }
3003
3004 /**
3005 * Swaps out spaces and tabs for HTML indentation. Not needed if
3006 * the code is in a pre block...
3007 *
3008 * @param string The source to indent (reference!)
3009 * @since 1.0.0
3010 * @access private
3011 */
3012 function indent(&$result) {
3013 /// Replace tabs with the correct number of spaces
3014 if (false !== strpos($result, "\t")) {
3015 $lines = explode("\n", $result);
3016 $result = null;//Save memory while we process the lines individually
3017 $tab_width = $this->get_real_tab_width();
3018 $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3019
3020 for ($key = 0, $n = count($lines); $key < $n; $key++) {
3021 $line = $lines[$key];
3022 if (false === strpos($line, "\t")) {
3023 continue;
3024 }
3025
3026 $pos = 0;
3027 $length = strlen($line);
3028 $lines[$key] = ''; // reduce memory
3029
3030 $IN_TAG = false;
3031 for ($i = 0; $i < $length; ++$i) {
3032 $char = $line[$i];
3033 // Simple engine to work out whether we're in a tag.
3034 // If we are we modify $pos. This is so we ignore HTML
3035 // in the line and only workout the tab replacement
3036 // via the actual content of the string
3037 // This test could be improved to include strings in the
3038 // html so that < or > would be allowed in user's styles
3039 // (e.g. quotes: '<' '>'; or similar)
3040 if ($IN_TAG) {
3041 if ('>' == $char) {
3042 $IN_TAG = false;
3043 }
3044 $lines[$key] .= $char;
3045 } else if ('<' == $char) {
3046 $IN_TAG = true;
3047 $lines[$key] .= '<';
3048 } else if ('&' == $char) {
3049 $substr = substr($line, $i + 3, 5);
3050 $posi = strpos($substr, ';');
3051 if (false === $posi) {
3052 ++$pos;
3053 } else {
3054 $pos -= $posi+2;
3055 }
3056 $lines[$key] .= $char;
3057 } else if ("\t" == $char) {
3058 $str = '';
3059 // OPTIMISE - move $strs out. Make an array:
3060 // $tabs = array(
3061 // 1 => '&nbsp;',
3062 // 2 => '&nbsp; ',
3063 // 3 => '&nbsp; &nbsp;' etc etc
3064 // to use instead of building a string every time
3065 $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3066 if (($pos & 1) || 1 == $tab_end_width) {
3067 $str .= substr($tab_string, 6, $tab_end_width);
3068 } else {
3069 $str .= substr($tab_string, 0, $tab_end_width+5);
3070 }
3071 $lines[$key] .= $str;
3072 $pos += $tab_end_width;
3073
3074 if (false === strpos($line, "\t", $i + 1)) {
3075 $lines[$key] .= substr($line, $i + 1);
3076 break;
3077 }
3078 } else if (0 == $pos && ' ' == $char) {
3079 $lines[$key] .= '&nbsp;';
3080 ++$pos;
3081 } else {
3082 $lines[$key] .= $char;
3083 ++$pos;
3084 }
3085 }
3086 }
3087 $result = implode("\n", $lines);
3088 unset($lines);//We don't need the lines separated beyond this --- free them!
3089 }
3090 // Other whitespace
3091 // BenBE: Fix to reduce the number of replacements to be done
3092 $result = preg_replace('/^ /m', '&nbsp;', $result);
3093 $result = str_replace(' ', ' &nbsp;', $result);
3094
3095 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3096 if ($this->line_ending === null) {
3097 $result = nl2br($result);
3098 } else {
3099 $result = str_replace("\n", $this->line_ending, $result);
3100 }
3101 }
3102 }
3103
3104 /**
3105 * Changes the case of a keyword for those languages where a change is asked for
3106 *
3107 * @param string The keyword to change the case of
3108 * @return string The keyword with its case changed
3109 * @since 1.0.0
3110 * @access private
3111 */
3112 function change_case($instr) {
3113 switch ($this->language_data['CASE_KEYWORDS']) {
3114 case GESHI_CAPS_UPPER:
3115 return strtoupper($instr);
3116 case GESHI_CAPS_LOWER:
3117 return strtolower($instr);
3118 default:
3119 return $instr;
3120 }
3121 }
3122
3123 /**
3124 * Handles replacements of keywords to include markup and links if requested
3125 *
3126 * @param string The keyword to add the Markup to
3127 * @return The HTML for the match found
3128 * @since 1.0.8
3129 * @access private
3130 *
3131 * @todo Get rid of ender in keyword links
3132 */
3133 function handle_keyword_replace($match) {
3134 $k = $this->_kw_replace_group;
3135 $keyword = $match[0];
3136
3137 $before = '';
3138 $after = '';
3139
3140 if ($this->keyword_links) {
3141 // Keyword links have been ebabled
3142
3143 if (isset($this->language_data['URLS'][$k]) &&
3144 $this->language_data['URLS'][$k] != '') {
3145 // There is a base group for this keyword
3146
3147 // Old system: strtolower
3148 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3149 // New system: get keyword from language file to get correct case
3150 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3151 strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3152 foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3153 if (strcasecmp($word, $keyword) == 0) {
3154 break;
3155 }
3156 }
3157 } else {
3158 $word = $keyword;
3159 }
3160
3161 $before = '<|UR1|"' .
3162 str_replace(
3163 array(
3164 '{FNAME}',
3165 '{FNAMEL}',
3166 '{FNAMEU}',
3167 '.'),
3168 array(
3169 str_replace('+', '%20', urlencode($this->hsc($word))),
3170 str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3171 str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3172 '<DOT>'),
3173 $this->language_data['URLS'][$k]
3174 ) . '">';
3175 $after = '</a>';
3176 }
3177 }
3178
3179 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3180 }
3181
3182 /**
3183 * handles regular expressions highlighting-definitions with callback functions
3184 *
3185 * @note this is a callback, don't use it directly
3186 *
3187 * @param array the matches array
3188 * @return The highlighted string
3189 * @since 1.0.8
3190 * @access private
3191 */
3192 function handle_regexps_callback($matches) {
3193 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3194 return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3195 }
3196
3197 /**
3198 * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3199 *
3200 * @note this is a callback, don't use it directly
3201 *
3202 * @param array the matches array
3203 * @return string
3204 * @since 1.0.8
3205 * @access private
3206 */
3207 function handle_multiline_regexps($matches) {
3208 $before = $this->_hmr_before;
3209 $after = $this->_hmr_after;
3210 if ($this->_hmr_replace) {
3211 $replace = $this->_hmr_replace;
3212 $search = array();
3213
3214 foreach (array_keys($matches) as $k) {
3215 $search[] = '\\' . $k;
3216 }
3217
3218 $before = str_replace($search, $matches, $before);
3219 $after = str_replace($search, $matches, $after);
3220 $replace = str_replace($search, $matches, $replace);
3221 } else {
3222 $replace = $matches[0];
3223 }
3224 return $before
3225 . '<|!REG3XP' . $this->_hmr_key .'!>'
3226 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3227 . '|>'
3228 . $after;
3229 }
3230
3231 /**
3232 * Takes a string that has no strings or comments in it, and highlights
3233 * stuff like keywords, numbers and methods.
3234 *
3235 * @param string The string to parse for keyword, numbers etc.
3236 * @since 1.0.0
3237 * @access private
3238 * @todo BUGGY! Why? Why not build string and return?
3239 */
3240 function parse_non_string_part($stuff_to_parse) {
3241 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3242
3243 // Highlight keywords
3244 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&";
3245 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3246 if ($this->lexic_permissions['STRINGS']) {
3247 $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3248 $disallowed_before .= $quotemarks;
3249 $disallowed_after .= $quotemarks;
3250 }
3251 $disallowed_before .= "])";
3252 $disallowed_after .= "])";
3253
3254 $parser_control_pergroup = false;
3255 if (isset($this->language_data['PARSER_CONTROL'])) {
3256 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3257 $x = 0; // check wether per-keyword-group parser_control is enabled
3258 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3259 $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3260 ++$x;
3261 }
3262 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3263 $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3264 ++$x;
3265 }
3266 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3267 }
3268 }
3269
3270 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3271 if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3272 $this->lexic_permissions['KEYWORDS'][$k]) {
3273
3274 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3275 $modifiers = $case_sensitive ? '' : 'i';
3276
3277 // NEW in 1.0.8 - per-keyword-group parser control
3278 $disallowed_before_local = $disallowed_before;
3279 $disallowed_after_local = $disallowed_after;
3280 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3281 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3282 $disallowed_before_local =
3283 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3284 }
3285
3286 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3287 $disallowed_after_local =
3288 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3289 }
3290 }
3291
3292 $this->_kw_replace_group = $k;
3293
3294 //NEW in 1.0.8, the cached regexp list
3295 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3296 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) {
3297 $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3298 // Might make a more unique string for putting the number in soon
3299 // Basically, we don't put the styles in yet because then the styles themselves will
3300 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3301 $stuff_to_parse = preg_replace_callback(
3302 "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php))$disallowed_after_local/$modifiers",
3303 array($this, 'handle_keyword_replace'),
3304 $stuff_to_parse
3305 );
3306 }
3307 }
3308 }
3309
3310 // Regular expressions
3311 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3312 if ($this->lexic_permissions['REGEXPS'][$key]) {
3313 if (is_array($regexp)) {
3314 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3315 // produce valid HTML when we match multiple lines
3316 $this->_hmr_replace = $regexp[GESHI_REPLACE];
3317 $this->_hmr_before = $regexp[GESHI_BEFORE];
3318 $this->_hmr_key = $key;
3319 $this->_hmr_after = $regexp[GESHI_AFTER];
3320 $stuff_to_parse = preg_replace_callback(
3321 "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3322 array($this, 'handle_multiline_regexps'),
3323 $stuff_to_parse);
3324 $this->_hmr_replace = false;
3325 $this->_hmr_before = '';
3326 $this->_hmr_after = '';
3327 } else {
3328 $stuff_to_parse = preg_replace(
3329 '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3330 $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3331 $stuff_to_parse);
3332 }
3333 } else {
3334 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3335 // produce valid HTML when we match multiple lines
3336 $this->_hmr_key = $key;
3337 $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3338 array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3339 $this->_hmr_key = '';
3340 } else {
3341 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3342 }
3343 }
3344 }
3345 }
3346
3347 // Highlight numbers. As of 1.0.8 we support different types of numbers
3348 $numbers_found = false;
3349 if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) {
3350 $numbers_found = true;
3351
3352 //For each of the formats ...
3353 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3354 //Check if it should be highlighted ...
3355 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3356 }
3357 }
3358
3359 //
3360 // Now that's all done, replace /[number]/ with the correct styles
3361 //
3362 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3363 if (!$this->use_classes) {
3364 $attributes = ' style="' .
3365 (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3366 $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3367 } else {
3368 $attributes = ' class="kw' . $k . '"';
3369 }
3370 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3371 }
3372
3373 if ($numbers_found) {
3374 // Put number styles in
3375 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3376 //Commented out for now, as this needs some review ...
3377 // if ($numbers_permissions & $id) {
3378 //Get the appropriate style ...
3379 //Checking for unset styles is done by the style cache builder ...
3380 if (!$this->use_classes) {
3381 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3382 } else {
3383 $attributes = ' class="nu'.$id.'"';
3384 }
3385
3386 //Set in the correct styles ...
3387 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3388 // }
3389 }
3390 }
3391
3392 // Highlight methods and fields in objects
3393 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3394 $oolang_spaces = "[\s]*";
3395 $oolang_before = "";
3396 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3397 if (isset($this->language_data['PARSER_CONTROL'])) {
3398 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3399 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3400 $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3401 }
3402 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3403 $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3404 }
3405 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3406 $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3407 }
3408 }
3409 }
3410
3411 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3412 if (false !== strpos($stuff_to_parse, $splitter)) {
3413 if (!$this->use_classes) {
3414 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3415 } else {
3416 $attributes = ' class="me' . $key . '"';
3417 }
3418 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3419 }
3420 }
3421 }
3422
3423 //
3424 // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3425 // You try it, and see what happens ;)
3426 // TODO: Fix lexic permissions not converting entities if shouldn't
3427 // be highlighting regardless
3428 //
3429 if ($this->lexic_permissions['BRACKETS']) {
3430 $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3431 $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3432 }
3433
3434
3435 //FIX for symbol highlighting ...
3436 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3437 //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3438 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3439 $global_offset = 0;
3440 for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3441 $symbol_match = $pot_symbols[$s_id][0][0];
3442 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3443 // already highlighted blocks _must_ include either < or >
3444 // so if this conditional applies, we have to skip this match
3445 // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3446 if(strpos($symbol_match, '<SEMI>') === false &&
3447 strpos($symbol_match, '<PIPE>') === false) {
3448 continue;
3449 }
3450 }
3451
3452 // if we reach this point, we have a valid match which needs to be highlighted
3453
3454 $symbol_length = strlen($symbol_match);
3455 $symbol_offset = $pot_symbols[$s_id][0][1];
3456 unset($pot_symbols[$s_id]);
3457 $symbol_end = $symbol_length + $symbol_offset;
3458 $symbol_hl = "";
3459
3460 // if we have multiple styles, we have to handle them properly
3461 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3462 $old_sym = -1;
3463 // Split the current stuff to replace into its atomic symbols ...
3464 preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3465 foreach ($sym_match_syms[0] as $sym_ms) {
3466 //Check if consequtive symbols belong to the same group to save output ...
3467 if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3468 && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3469 if (-1 != $old_sym) {
3470 $symbol_hl .= "|>";
3471 }
3472 $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3473 if (!$this->use_classes) {
3474 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3475 } else {
3476 $symbol_hl .= '<| class="sy' . $old_sym . '">';
3477 }
3478 }
3479 $symbol_hl .= $sym_ms;
3480 }
3481 unset($sym_match_syms);
3482
3483 //Close remaining tags and insert the replacement at the right position ...
3484 //Take caution if symbol_hl is empty to avoid doubled closing spans.
3485 if (-1 != $old_sym) {
3486 $symbol_hl .= "|>";
3487 }
3488 } else {
3489 if (!$this->use_classes) {
3490 $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3491 } else {
3492 $symbol_hl = '<| class="sy0">';
3493 }
3494 $symbol_hl .= $symbol_match . '|>';
3495 }
3496
3497 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3498
3499 // since we replace old text with something of different size,
3500 // we'll have to keep track of the differences
3501 $global_offset += strlen($symbol_hl) - $symbol_length;
3502 }
3503 }
3504 //FIX for symbol highlighting ...
3505
3506 // Add class/style for regexps
3507 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3508 if ($this->lexic_permissions['REGEXPS'][$key]) {
3509 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3510 $this->_rx_key = $key;
3511 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3512 array($this, 'handle_regexps_callback'),
3513 $stuff_to_parse);
3514 } else {
3515 if (!$this->use_classes) {
3516 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3517 } else {
3518 if (is_array($this->language_data['REGEXPS'][$key]) &&
3519 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3520 $attributes = ' class="' .
3521 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3522 } else {
3523 $attributes = ' class="re' . $key . '"';
3524 }
3525 }
3526 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3527 }
3528 }
3529 }
3530
3531 // Replace <DOT> with . for urls
3532 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3533 // Replace <|UR1| with <a href= for urls also
3534 if (isset($this->link_styles[GESHI_LINK])) {
3535 if ($this->use_classes) {
3536 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3537 } else {
3538 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3539 }
3540 } else {
3541 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3542 }
3543
3544 //
3545 // NOW we add the span thingy ;)
3546 //
3547
3548 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3549 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3550 return substr($stuff_to_parse, 1);
3551 }
3552
3553 /**
3554 * Sets the time taken to parse the code
3555 *
3556 * @param microtime The time when parsing started
3557 * @param microtime The time when parsing ended
3558 * @since 1.0.2
3559 * @access private
3560 */
3561 function set_time($start_time, $end_time) {
3562 $start = explode(' ', $start_time);
3563 $end = explode(' ', $end_time);
3564 $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3565 }
3566
3567 /**
3568 * Gets the time taken to parse the code
3569 *
3570 * @return double The time taken to parse the code
3571 * @since 1.0.2
3572 */
3573 function get_time() {
3574 return $this->time;
3575 }
3576
3577 /**
3578 * Merges arrays recursively, overwriting values of the first array with values of later arrays
3579 *
3580 * @since 1.0.8
3581 * @access private
3582 */
3583 function merge_arrays() {
3584 $arrays = func_get_args();
3585 $narrays = count($arrays);
3586
3587 // check arguments
3588 // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3589 for ($i = 0; $i < $narrays; $i ++) {
3590 if (!is_array($arrays[$i])) {
3591 // also array_merge_recursive returns nothing in this case
3592 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3593 return false;
3594 }
3595 }
3596
3597 // the first array is in the output set in every case
3598 $ret = $arrays[0];
3599
3600 // merege $ret with the remaining arrays
3601 for ($i = 1; $i < $narrays; $i ++) {
3602 foreach ($arrays[$i] as $key => $value) {
3603 if (is_array($value) && isset($ret[$key])) {
3604 // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3605 // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3606 $ret[$key] = $this->merge_arrays($ret[$key], $value);
3607 } else {
3608 $ret[$key] = $value;
3609 }
3610 }
3611 }
3612
3613 return $ret;
3614 }
3615
3616 /**
3617 * Gets language information and stores it for later use
3618 *
3619 * @param string The filename of the language file you want to load
3620 * @since 1.0.0
3621 * @access private
3622 * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3623 */
3624 function load_language($file_name) {
3625 if ($file_name == $this->loaded_language) {
3626 // this file is already loaded!
3627 return;
3628 }
3629
3630 //Prepare some stuff before actually loading the language file
3631 $this->loaded_language = $file_name;
3632 $this->parse_cache_built = false;
3633 $this->enable_highlighting();
3634 $language_data = array();
3635
3636 //Load the language file
3637 require $file_name;
3638
3639 // Perhaps some checking might be added here later to check that
3640 // $language data is a valid thing but maybe not
3641 $this->language_data = $language_data;
3642
3643 // Set strict mode if should be set
3644 $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3645
3646 // Set permissions for all lexics to true
3647 // so they'll be highlighted by default
3648 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3649 if (!empty($this->language_data['KEYWORDS'][$key])) {
3650 $this->lexic_permissions['KEYWORDS'][$key] = true;
3651 } else {
3652 $this->lexic_permissions['KEYWORDS'][$key] = false;
3653 }
3654 }
3655
3656 foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3657 $this->lexic_permissions['COMMENTS'][$key] = true;
3658 }
3659 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3660 $this->lexic_permissions['REGEXPS'][$key] = true;
3661 }
3662
3663 // for BenBE and future code reviews:
3664 // we can use empty here since we only check for existance and emptiness of an array
3665 // if it is not an array at all but rather false or null this will work as intended as well
3666 // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3667 if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3668 foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3669 // it's either true or false and maybe is true as well
3670 $perm = $value !== GESHI_NEVER;
3671 if ($flag == 'ALL') {
3672 $this->enable_highlighting($perm);
3673 continue;
3674 }
3675 if (!isset($this->lexic_permissions[$flag])) {
3676 // unknown lexic permission
3677 continue;
3678 }
3679 if (is_array($this->lexic_permissions[$flag])) {
3680 foreach ($this->lexic_permissions[$flag] as $key => $val) {
3681 $this->lexic_permissions[$flag][$key] = $perm;
3682 }
3683 } else {
3684 $this->lexic_permissions[$flag] = $perm;
3685 }
3686 }
3687 unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3688 }
3689
3690 //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3691 //You need to set one for HARDESCAPES only in this case.
3692 if(!isset($this->language_data['HARDCHAR'])) {
3693 $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3694 }
3695
3696 //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3697 $style_filename = substr($file_name, 0, -4) . '.style.php';
3698 if (is_readable($style_filename)) {
3699 //Clear any style_data that could have been set before ...
3700 if (isset($style_data)) {
3701 unset($style_data);
3702 }
3703
3704 //Read the Style Information from the style file
3705 include $style_filename;
3706
3707 //Apply the new styles to our current language styles
3708 if (isset($style_data) && is_array($style_data)) {
3709 $this->language_data['STYLES'] =
3710 $this->merge_arrays($this->language_data['STYLES'], $style_data);
3711 }
3712 }
3713 }
3714
3715 /**
3716 * Takes the parsed code and various options, and creates the HTML
3717 * surrounding it to make it look nice.
3718 *
3719 * @param string The code already parsed (reference!)
3720 * @since 1.0.0
3721 * @access private
3722 */
3723 function finalise(&$parsed_code) {
3724 // Remove end parts of important declarations
3725 // This is BUGGY!! My fault for bad code: fix coming in 1.2
3726 // @todo Remove this crap
3727 if ($this->enable_important_blocks &&
3728 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3729 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3730 }
3731
3732 // Add HTML whitespace stuff if we're using the <div> header
3733 if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3734 $this->indent($parsed_code);
3735 }
3736
3737 // purge some unnecessary stuff
3738 /** NOTE: memorypeak #1 */
3739 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3740
3741 // If we are using IDs for line numbers, there needs to be an overall
3742 // ID set to prevent collisions.
3743 if ($this->add_ids && !$this->overall_id) {
3744 $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3745 }
3746
3747 // Get code into lines
3748 /** NOTE: memorypeak #2 */
3749 $code = explode("\n", $parsed_code);
3750 $parsed_code = $this->header();
3751
3752 // If we're using line numbers, we insert <li>s and appropriate
3753 // markup to style them (otherwise we don't need to do anything)
3754 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3755 // If we're using the <pre> header, we shouldn't add newlines because
3756 // the <pre> will line-break them (and the <li>s already do this for us)
3757 $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3758
3759 // Set vars to defaults for following loop
3760 $i = 0;
3761
3762 // Foreach line...
3763 for ($i = 0, $n = count($code); $i < $n;) {
3764 //Reset the attributes for a new line ...
3765 $attrs = array();
3766
3767 // Make lines have at least one space in them if they're empty
3768 // BenBE: Checking emptiness using trim instead of relying on blanks
3769 if ('' == trim($code[$i])) {
3770 $code[$i] = '&nbsp;';
3771 }
3772
3773 // If this is a "special line"...
3774 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3775 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3776 // Set the attributes to style the line
3777 if ($this->use_classes) {
3778 //$attr = ' class="li2"';
3779 $attrs['class'][] = 'li2';
3780 $def_attr = ' class="de2"';
3781 } else {
3782 //$attr = ' style="' . $this->line_style2 . '"';
3783 $attrs['style'][] = $this->line_style2;
3784 // This style "covers up" the special styles set for special lines
3785 // so that styles applied to special lines don't apply to the actual
3786 // code on that line
3787 $def_attr = ' style="' . $this->code_style . '"';
3788 }
3789 } else {
3790 if ($this->use_classes) {
3791 //$attr = ' class="li1"';
3792 $attrs['class'][] = 'li1';
3793 $def_attr = ' class="de1"';
3794 } else {
3795 //$attr = ' style="' . $this->line_style1 . '"';
3796 $attrs['style'][] = $this->line_style1;
3797 $def_attr = ' style="' . $this->code_style . '"';
3798 }
3799 }
3800
3801 //Check which type of tag to insert for this line
3802 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3803 $start = "<pre$def_attr>";
3804 $end = '</pre>';
3805 } else {
3806 // Span or div?
3807 $start = "<div$def_attr>";
3808 $end = '</div>';
3809 }
3810
3811 ++$i;
3812
3813 // Are we supposed to use ids? If so, add them
3814 if ($this->add_ids) {
3815 $attrs['id'][] = "$this->overall_id-$i";
3816 }
3817
3818 //Is this some line with extra styles???
3819 if (in_array($i, $this->highlight_extra_lines)) {
3820 if ($this->use_classes) {
3821 if (isset($this->highlight_extra_lines_styles[$i])) {
3822 $attrs['class'][] = "lx$i";
3823 } else {
3824 $attrs['class'][] = "ln-xtra";
3825 }
3826 } else {
3827 array_push($attrs['style'], $this->get_line_style($i));
3828 }
3829 }
3830
3831 // Add in the line surrounded by appropriate list HTML
3832 $attr_string = '';
3833 foreach ($attrs as $key => $attr) {
3834 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3835 }
3836
3837 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3838 unset($code[$i - 1]);
3839 }
3840 } else {
3841 $n = count($code);
3842 if ($this->use_classes) {
3843 $attributes = ' class="de1"';
3844 } else {
3845 $attributes = ' style="'. $this->code_style .'"';
3846 }
3847 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3848 $parsed_code .= '<pre'. $attributes .'>';
3849 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3850 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3851 if ($this->use_classes) {
3852 $attrs = ' class="ln"';
3853 } else {
3854 $attrs = ' style="'. $this->table_linenumber_style .'"';
3855 }
3856 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3857 // get linenumbers
3858 // we don't merge it with the for below, since it should be better for
3859 // memory consumption this way
3860 // @todo: but... actually it would still be somewhat nice to merge the two loops
3861 // the mem peaks are at different positions
3862 for ($i = 0; $i < $n; ++$i) {
3863 $close = 0;
3864 // fancy lines
3865 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3866 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3867 // Set the attributes to style the line
3868 if ($this->use_classes) {
3869 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3870 } else {
3871 // This style "covers up" the special styles set for special lines
3872 // so that styles applied to special lines don't apply to the actual
3873 // code on that line
3874 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3875 .'<span style="' . $this->code_style .'">';
3876 }
3877 $close += 2;
3878 }
3879 //Is this some line with extra styles???
3880 if (in_array($i + 1, $this->highlight_extra_lines)) {
3881 if ($this->use_classes) {
3882 if (isset($this->highlight_extra_lines_styles[$i])) {
3883 $parsed_code .= "<span class=\"xtra lx$i\">";
3884 } else {
3885 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3886 }
3887 } else {
3888 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3889 }
3890 ++$close;
3891 }
3892 $parsed_code .= $this->line_numbers_start + $i;
3893 if ($close) {
3894 $parsed_code .= str_repeat('</span>', $close);
3895 } else if ($i != $n) {
3896 $parsed_code .= "\n";
3897 }
3898 }
3899 $parsed_code .= '</pre></td><td'.$attributes.'>';
3900 }
3901 $parsed_code .= '<pre'. $attributes .'>';
3902 }
3903 // No line numbers, but still need to handle highlighting lines extra.
3904 // Have to use divs so the full width of the code is highlighted
3905 $close = 0;
3906 for ($i = 0; $i < $n; ++$i) {
3907 // Make lines have at least one space in them if they're empty
3908 // BenBE: Checking emptiness using trim instead of relying on blanks
3909 if ('' == trim($code[$i])) {
3910 $code[$i] = '&nbsp;';
3911 }
3912 // fancy lines
3913 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3914 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3915 // Set the attributes to style the line
3916 if ($this->use_classes) {
3917 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3918 } else {
3919 // This style "covers up" the special styles set for special lines
3920 // so that styles applied to special lines don't apply to the actual
3921 // code on that line
3922 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3923 .'<span style="' . $this->code_style .'">';
3924 }
3925 $close += 2;
3926 }
3927 //Is this some line with extra styles???
3928 if (in_array($i + 1, $this->highlight_extra_lines)) {
3929 if ($this->use_classes) {
3930 if (isset($this->highlight_extra_lines_styles[$i])) {
3931 $parsed_code .= "<span class=\"xtra lx$i\">";
3932 } else {
3933 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3934 }
3935 } else {
3936 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3937 }
3938 ++$close;
3939 }
3940
3941 $parsed_code .= $code[$i];
3942
3943 if ($close) {
3944 $parsed_code .= str_repeat('</span>', $close);
3945 $close = 0;
3946 }
3947 elseif ($i + 1 < $n) {
3948 $parsed_code .= "\n";
3949 }
3950 unset($code[$i]);
3951 }
3952
3953 if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
3954 $parsed_code .= '</pre>';
3955 }
3956 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3957 $parsed_code .= '</td>';
3958 }
3959 }
3960
3961 $parsed_code .= $this->footer();
3962 }
3963
3964 /**
3965 * Creates the header for the code block (with correct attributes)
3966 *
3967 * @return string The header for the code block
3968 * @since 1.0.0
3969 * @access private
3970 */
3971 function header() {
3972 // Get attributes needed
3973 /**
3974 * @todo Document behaviour change - class is outputted regardless of whether
3975 * we're using classes or not. Same with style
3976 */
3977 $attributes = ' class="' . $this->_genCSSName($this->language);
3978 if ($this->overall_class != '') {
3979 $attributes .= " ".$this->_genCSSName($this->overall_class);
3980 }
3981 $attributes .= '"';
3982
3983 if ($this->overall_id != '') {
3984 $attributes .= " id=\"{$this->overall_id}\"";
3985 }
3986 if ($this->overall_style != '' && !$this->use_classes) {
3987 $attributes .= ' style="' . $this->overall_style . '"';
3988 }
3989
3990 $ol_attributes = '';
3991
3992 if ($this->line_numbers_start != 1) {
3993 $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
3994 }
3995
3996 // Get the header HTML
3997 $header = $this->header_content;
3998 if ($header) {
3999 if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
4000 $header = str_replace("\n", '', $header);
4001 }
4002 $header = $this->replace_keywords($header);
4003
4004 if ($this->use_classes) {
4005 $attr = ' class="head"';
4006 } else {
4007 $attr = " style=\"{$this->header_content_style}\"";
4008 }
4009 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4010 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4011 } else {
4012 $header = "<div$attr>$header</div>";
4013 }
4014 }
4015
4016 if (GESHI_HEADER_NONE == $this->header_type) {
4017 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4018 return "$header<ol$attributes$ol_attributes>";
4019 }
4020 return $header . ($this->force_code_block ? '<div>' : '');
4021 }
4022
4023 // Work out what to return and do it
4024 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4025 if ($this->header_type == GESHI_HEADER_PRE) {
4026 return "<pre$attributes>$header<ol$ol_attributes>";
4027 } else if ($this->header_type == GESHI_HEADER_DIV ||
4028 $this->header_type == GESHI_HEADER_PRE_VALID) {
4029 return "<div$attributes>$header<ol$ol_attributes>";
4030 } else if ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4031 return "<table$attributes>$header<tbody><tr class=\"li1\">";
4032 }
4033 } else {
4034 if ($this->header_type == GESHI_HEADER_PRE) {
4035 return "<pre$attributes>$header" .
4036 ($this->force_code_block ? '<div>' : '');
4037 } else {
4038 return "<div$attributes>$header" .
4039 ($this->force_code_block ? '<div>' : '');
4040 }
4041 }
4042 }
4043
4044 /**
4045 * Returns the footer for the code block.
4046 *
4047 * @return string The footer for the code block
4048 * @since 1.0.0
4049 * @access private
4050 */
4051 function footer() {
4052 $footer = $this->footer_content;
4053 if ($footer) {
4054 if ($this->header_type == GESHI_HEADER_PRE) {
4055 $footer = str_replace("\n", '', $footer);;
4056 }
4057 $footer = $this->replace_keywords($footer);
4058
4059 if ($this->use_classes) {
4060 $attr = ' class="foot"';
4061 } else {
4062 $attr = " style=\"{$this->footer_content_style}\"";
4063 }
4064 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4065 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4066 } else {
4067 $footer = "<div$attr>$footer</div>";
4068 }
4069 }
4070
4071 if (GESHI_HEADER_NONE == $this->header_type) {
4072 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4073 }
4074
4075 if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4076 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4077 return "</ol>$footer</div>";
4078 }
4079 return ($this->force_code_block ? '</div>' : '') .
4080 "$footer</div>";
4081 }
4082 elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4083 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4084 return "</tr></tbody>$footer</table>";
4085 }
4086 return ($this->force_code_block ? '</div>' : '') .
4087 "$footer</div>";
4088 }
4089 else {
4090 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4091 return "</ol>$footer</pre>";
4092 }
4093 return ($this->force_code_block ? '</div>' : '') .
4094 "$footer</pre>";
4095 }
4096 }
4097
4098 /**
4099 * Replaces certain keywords in the header and footer with
4100 * certain configuration values
4101 *
4102 * @param string The header or footer content to do replacement on
4103 * @return string The header or footer with replaced keywords
4104 * @since 1.0.2
4105 * @access private
4106 */
4107 function replace_keywords($instr) {
4108 $keywords = $replacements = array();
4109
4110 $keywords[] = '<TIME>';
4111 $keywords[] = '{TIME}';
4112 $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4113
4114 $keywords[] = '<LANGUAGE>';
4115 $keywords[] = '{LANGUAGE}';
4116 $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4117
4118 $keywords[] = '<VERSION>';
4119 $keywords[] = '{VERSION}';
4120 $replacements[] = $replacements[] = GESHI_VERSION;
4121
4122 $keywords[] = '<SPEED>';
4123 $keywords[] = '{SPEED}';
4124 if ($time <= 0) {
4125 $speed = 'N/A';
4126 } else {
4127 $speed = strlen($this->source) / $time;
4128 if ($speed >= 1024) {
4129 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4130 } else {
4131 $speed = sprintf("%.0f B/s", $speed);
4132 }
4133 }
4134 $replacements[] = $replacements[] = $speed;
4135
4136 return str_replace($keywords, $replacements, $instr);
4137 }
4138
4139 /**
4140 * Secure replacement for PHP built-in function htmlspecialchars().
4141 *
4142 * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4143 * for this replacement function.
4144 *
4145 * The INTERFACE for this function is almost the same as that for
4146 * htmlspecialchars(), with the same default for quote style; however, there
4147 * is no 'charset' parameter. The reason for this is as follows:
4148 *
4149 * The PHP docs say:
4150 * "The third argument charset defines character set used in conversion."
4151 *
4152 * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4153 * thus _needs_ to know (or asssume) a character set because the special
4154 * characters to be replaced could exist at different code points in
4155 * different character sets. (If indeed htmlspecialchars() works at
4156 * byte-value level that goes some way towards explaining why the
4157 * vulnerability would exist in this function, too, and not only in
4158 * htmlentities() which certainly is working at byte-value level.)
4159 *
4160 * This replacement function however works at character level and should
4161 * therefore be "immune" to character set differences - so no charset
4162 * parameter is needed or provided. If a third parameter is passed, it will
4163 * be silently ignored.
4164 *
4165 * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4166 * of PHP's '&#039;' for a single quote: this provides compatibility with
4167 * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4168 * (see comment by mikiwoz at yahoo dot co dot uk on
4169 * http://php.net/htmlspecialchars); it also matches the entity definition
4170 * for XML 1.0
4171 * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4172 * Like PHP we use a numeric character reference instead of '&apos;' for the
4173 * single quote. For the other special characters we use the named entity
4174 * references, as PHP is doing.
4175 *
4176 * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4177 *
4178 * @license http://www.gnu.org/copyleft/lgpl.html
4179 * GNU Lesser General Public License
4180 * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4181 * Wikka Development Team}
4182 *
4183 * @access private
4184 * @param string $string string to be converted
4185 * @param integer $quote_style
4186 * - ENT_COMPAT: escapes &, <, > and double quote (default)
4187 * - ENT_NOQUOTES: escapes only &, < and >
4188 * - ENT_QUOTES: escapes &, <, >, double and single quotes
4189 * @return string converted string
4190 * @since 1.0.7.18
4191 */
4192 function hsc($string, $quote_style = ENT_COMPAT) {
4193 // init
4194 static $aTransSpecchar = array(
4195 '&' => '&amp;',
4196 '"' => '&quot;',
4197 '<' => '&lt;',
4198 '>' => '&gt;',
4199
4200 //This fix is related to SF#1923020, but has to be applied
4201 //regardless of actually highlighting symbols.
4202
4203 //Circumvent a bug with symbol highlighting
4204 //This is required as ; would produce undesirable side-effects if it
4205 //was not to be processed as an entity.
4206 ';' => '<SEMI>', // Force ; to be processed as entity
4207 '|' => '<PIPE>' // Force | to be processed as entity
4208 ); // ENT_COMPAT set
4209
4210 switch ($quote_style) {
4211 case ENT_NOQUOTES: // don't convert double quotes
4212 unset($aTransSpecchar['"']);
4213 break;
4214 case ENT_QUOTES: // convert single quotes as well
4215 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4216 break;
4217 }
4218
4219 // return translated string
4220 return strtr($string, $aTransSpecchar);
4221 }
4222
4223 function _genCSSName($name){
4224 return (is_numeric($name[0]) ? '_' : '') . $name;
4225 }
4226
4227 /**
4228 * Returns a stylesheet for the highlighted code. If $economy mode
4229 * is true, we only return the stylesheet declarations that matter for
4230 * this code block instead of the whole thing
4231 *
4232 * @param boolean Whether to use economy mode or not
4233 * @return string A stylesheet built on the data for the current language
4234 * @since 1.0.0
4235 */
4236 function get_stylesheet($economy_mode = true) {
4237 // If there's an error, chances are that the language file
4238 // won't have populated the language data file, so we can't
4239 // risk getting a stylesheet...
4240 if ($this->error) {
4241 return '';
4242 }
4243
4244 //Check if the style rearrangements have been processed ...
4245 //This also does some preprocessing to check which style groups are useable ...
4246 if(!isset($this->language_data['NUMBERS_CACHE'])) {
4247 $this->build_style_cache();
4248 }
4249
4250 // First, work out what the selector should be. If there's an ID,
4251 // that should be used, the same for a class. Otherwise, a selector
4252 // of '' means that these styles will be applied anywhere
4253 if ($this->overall_id) {
4254 $selector = '#' . $this->_genCSSName($this->overall_id);
4255 } else {
4256 $selector = '.' . $this->_genCSSName($this->language);
4257 if ($this->overall_class) {
4258 $selector .= '.' . $this->_genCSSName($this->overall_class);
4259 }
4260 }
4261 $selector .= ' ';
4262
4263 // Header of the stylesheet
4264 if (!$economy_mode) {
4265 $stylesheet = "/**\n".
4266 " * GeSHi Dynamically Generated Stylesheet\n".
4267 " * --------------------------------------\n".
4268 " * Dynamically generated stylesheet for {$this->language}\n".
4269 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4270 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4271 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4272 " * --------------------------------------\n".
4273 " */\n";
4274 } else {
4275 $stylesheet = "/**\n".
4276 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4277 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4278 " */\n";
4279 }
4280
4281 // Set the <ol> to have no effect at all if there are line numbers
4282 // (<ol>s have margins that should be destroyed so all layout is
4283 // controlled by the set_overall_style method, which works on the
4284 // <pre> or <div> container). Additionally, set default styles for lines
4285 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4286 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4287 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4288 }
4289
4290 // Add overall styles
4291 // note: neglect economy_mode, empty styles are meaningless
4292 if ($this->overall_style != '') {
4293 $stylesheet .= "$selector {{$this->overall_style}}\n";
4294 }
4295
4296 // Add styles for links
4297 // note: economy mode does not make _any_ sense here
4298 // either the style is empty and thus no selector is needed
4299 // or the appropriate key is given.
4300 foreach ($this->link_styles as $key => $style) {
4301 if ($style != '') {
4302 switch ($key) {
4303 case GESHI_LINK:
4304 $stylesheet .= "{$selector}a:link {{$style}}\n";
4305 break;
4306 case GESHI_HOVER:
4307 $stylesheet .= "{$selector}a:hover {{$style}}\n";
4308 break;
4309 case GESHI_ACTIVE:
4310 $stylesheet .= "{$selector}a:active {{$style}}\n";
4311 break;
4312 case GESHI_VISITED:
4313 $stylesheet .= "{$selector}a:visited {{$style}}\n";
4314 break;
4315 }
4316 }
4317 }
4318
4319 // Header and footer
4320 // note: neglect economy_mode, empty styles are meaningless
4321 if ($this->header_content_style != '') {
4322 $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4323 }
4324 if ($this->footer_content_style != '') {
4325 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4326 }
4327
4328 // Styles for important stuff
4329 // note: neglect economy_mode, empty styles are meaningless
4330 if ($this->important_styles != '') {
4331 $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4332 }
4333
4334 // Simple line number styles
4335 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4336 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4337 }
4338 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4339 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4340 }
4341 // If there is a style set for fancy line numbers, echo it out
4342 if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4343 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4344 }
4345
4346 // note: empty styles are meaningless
4347 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4348 if ($styles != '' && (!$economy_mode ||
4349 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4350 $this->lexic_permissions['KEYWORDS'][$group]))) {
4351 $stylesheet .= "$selector.kw$group {{$styles}}\n";
4352 }
4353 }
4354 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4355 if ($styles != '' && (!$economy_mode ||
4356 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4357 $this->lexic_permissions['COMMENTS'][$group]) ||
4358 (!empty($this->language_data['COMMENT_REGEXP']) &&
4359 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4360 $stylesheet .= "$selector.co$group {{$styles}}\n";
4361 }
4362 }
4363 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4364 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4365 // NEW: since 1.0.8 we have to handle hardescapes
4366 if ($group === 'HARD') {
4367 $group = '_h';
4368 }
4369 $stylesheet .= "$selector.es$group {{$styles}}\n";
4370 }
4371 }
4372 foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4373 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4374 $stylesheet .= "$selector.br$group {{$styles}}\n";
4375 }
4376 }
4377 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4378 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4379 $stylesheet .= "$selector.sy$group {{$styles}}\n";
4380 }
4381 }
4382 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4383 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4384 // NEW: since 1.0.8 we have to handle hardquotes
4385 if ($group === 'HARD') {
4386 $group = '_h';
4387 }
4388 $stylesheet .= "$selector.st$group {{$styles}}\n";
4389 }
4390 }
4391 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4392 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4393 $stylesheet .= "$selector.nu$group {{$styles}}\n";
4394 }
4395 }
4396 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4397 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4398 $stylesheet .= "$selector.me$group {{$styles}}\n";
4399 }
4400 }
4401 // note: neglect economy_mode, empty styles are meaningless
4402 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4403 if ($styles != '') {
4404 $stylesheet .= "$selector.sc$group {{$styles}}\n";
4405 }
4406 }
4407 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4408 if ($styles != '' && (!$economy_mode ||
4409 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4410 $this->lexic_permissions['REGEXPS'][$group]))) {
4411 if (is_array($this->language_data['REGEXPS'][$group]) &&
4412 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4413 $stylesheet .= "$selector.";
4414 $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4415 $stylesheet .= " {{$styles}}\n";
4416 } else {
4417 $stylesheet .= "$selector.re$group {{$styles}}\n";
4418 }
4419 }
4420 }
4421 // Styles for lines being highlighted extra
4422 if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4423 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4424 }
4425 $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4426 foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4427 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4428 }
4429
4430 return $stylesheet;
4431 }
4432
4433 /**
4434 * Get's the style that is used for the specified line
4435 *
4436 * @param int The line number information is requested for
4437 * @access private
4438 * @since 1.0.7.21
4439 */
4440 function get_line_style($line) {
4441 //$style = null;
4442 $style = null;
4443 if (isset($this->highlight_extra_lines_styles[$line])) {
4444 $style = $this->highlight_extra_lines_styles[$line];
4445 } else { // if no "extra" style assigned
4446 $style = $this->highlight_extra_lines_style;
4447 }
4448
4449 return $style;
4450 }
4451
4452 /**
4453 * this functions creates an optimized regular expression list
4454 * of an array of strings.
4455 *
4456 * Example:
4457 * <code>$list = array('faa', 'foo', 'foobar');
4458 * => string 'f(aa|oo(bar)?)'</code>
4459 *
4460 * @param $list array of (unquoted) strings
4461 * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4462 * @return string for regular expression
4463 * @author Milian Wolff <[email protected]>
4464 * @since 1.0.8
4465 * @access private
4466 */
4467 function optimize_regexp_list($list, $regexp_delimiter = '/') {
4468 $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$',
4469 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4470 sort($list);
4471 $regexp_list = array('');
4472 $num_subpatterns = 0;
4473 $list_key = 0;
4474
4475 // the tokens which we will use to generate the regexp list
4476 $tokens = array();
4477 $prev_keys = array();
4478 // go through all entries of the list and generate the token list
4479 $cur_len = 0;
4480 for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4481 if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4482 // seems like the length of this pcre is growing exorbitantly
4483 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4484 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4485 $tokens = array();
4486 $cur_len = 0;
4487 }
4488 $level = 0;
4489 $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4490 $pointer = &$tokens;
4491 // properly assign the new entry to the correct position in the token array
4492 // possibly generate smaller common denominator keys
4493 while (true) {
4494 // get the common denominator
4495 if (isset($prev_keys[$level])) {
4496 if ($prev_keys[$level] == $entry) {
4497 // this is a duplicate entry, skip it
4498 continue 2;
4499 }
4500 $char = 0;
4501 while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4502 && $entry[$char] == $prev_keys[$level][$char]) {
4503 ++$char;
4504 }
4505 if ($char > 0) {
4506 // this entry has at least some chars in common with the current key
4507 if ($char == strlen($prev_keys[$level])) {
4508 // current key is totally matched, i.e. this entry has just some bits appended
4509 $pointer = &$pointer[$prev_keys[$level]];
4510 } else {
4511 // only part of the keys match
4512 $new_key_part1 = substr($prev_keys[$level], 0, $char);
4513 $new_key_part2 = substr($prev_keys[$level], $char);
4514
4515 if (in_array($new_key_part1[0], $regex_chars)
4516 || in_array($new_key_part2[0], $regex_chars)) {
4517 // this is bad, a regex char as first character
4518 $pointer[$entry] = array('' => true);
4519 array_splice($prev_keys, $level, count($prev_keys), $entry);
4520 $cur_len += strlen($entry);
4521 continue;
4522 } else {
4523 // relocate previous tokens
4524 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4525 unset($pointer[$prev_keys[$level]]);
4526 $pointer = &$pointer[$new_key_part1];
4527 // recreate key index
4528 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4529 $cur_len += strlen($new_key_part2);
4530 }
4531 }
4532 ++$level;
4533 $entry = substr($entry, $char);
4534 continue;
4535 }
4536 // else: fall trough, i.e. no common denominator was found
4537 }
4538 if ($level == 0 && !empty($tokens)) {
4539 // we can dump current tokens into the string and throw them away afterwards
4540 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4541 $new_subpatterns = substr_count($new_entry, '(?:');
4542 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4543 $regexp_list[++$list_key] = $new_entry;
4544 $num_subpatterns = $new_subpatterns;
4545 } else {
4546 if (!empty($regexp_list[$list_key])) {
4547 $new_entry = '|' . $new_entry;
4548 }
4549 $regexp_list[$list_key] .= $new_entry;
4550 $num_subpatterns += $new_subpatterns;
4551 }
4552 $tokens = array();
4553 $cur_len = 0;
4554 }
4555 // no further common denominator found
4556 $pointer[$entry] = array('' => true);
4557 array_splice($prev_keys, $level, count($prev_keys), $entry);
4558
4559 $cur_len += strlen($entry);
4560 break;
4561 }
4562 unset($list[$i]);
4563 }
4564 // make sure the last tokens get converted as well
4565 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4566 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4567 if ( !empty($regexp_list[$list_key]) ) {
4568 ++$list_key;
4569 }
4570 $regexp_list[$list_key] = $new_entry;
4571 } else {
4572 if (!empty($regexp_list[$list_key])) {
4573 $new_entry = '|' . $new_entry;
4574 }
4575 $regexp_list[$list_key] .= $new_entry;
4576 }
4577 return $regexp_list;
4578 }
4579 /**
4580 * this function creates the appropriate regexp string of an token array
4581 * you should not call this function directly, @see $this->optimize_regexp_list().
4582 *
4583 * @param &$tokens array of tokens
4584 * @param $recursed bool to know wether we recursed or not
4585 * @return string
4586 * @author Milian Wolff <[email protected]>
4587 * @since 1.0.8
4588 * @access private
4589 */
4590 function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4591 $list = '';
4592 foreach ($tokens as $token => $sub_tokens) {
4593 $list .= $token;
4594 $close_entry = isset($sub_tokens['']);
4595 unset($sub_tokens['']);
4596 if (!empty($sub_tokens)) {
4597 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4598 if ($close_entry) {
4599 // make sub_tokens optional
4600 $list .= '?';
4601 }
4602 }
4603 $list .= '|';
4604 }
4605 if (!$recursed) {
4606 // do some optimizations
4607 // common trailing strings
4608 // BUGGY!
4609 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4610 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4611 // (?:p)? => p?
4612 $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4613 // (?:a|b|c|d|...)? => [abcd...]?
4614 // TODO: a|bb|c => [ac]|bb
4615 static $callback_2;
4616 if (!isset($callback_2)) {
4617 $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
4618 }
4619 $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4620 }
4621 // return $list without trailing pipe
4622 return substr($list, 0, -1);
4623 }
4624} // End Class GeSHi
4625
4626
4627if (!function_exists('geshi_highlight')) {
4628 /**
4629 * Easy way to highlight stuff. Behaves just like highlight_string
4630 *
4631 * @param string The code to highlight
4632 * @param string The language to highlight the code in
4633 * @param string The path to the language files. You can leave this blank if you need
4634 * as from version 1.0.7 the path should be automatically detected
4635 * @param boolean Whether to return the result or to echo
4636 * @return string The code highlighted (if $return is true)
4637 * @since 1.0.2
4638 */
4639 function geshi_highlight($string, $language, $path = null, $return = false) {
4640 $geshi = new GeSHi($string, $language, $path);
4641 $geshi->set_header_type(GESHI_HEADER_NONE);
4642
4643 if ($return) {
4644 return '<code>' . $geshi->parse_code() . '</code>';
4645 }
4646
4647 echo '<code>' . $geshi->parse_code() . '</code>';
4648
4649 if ($geshi->error()) {
4650 return false;
4651 }
4652 return true;
4653 }
4654}
4655
4656?>
Note: See TracBrowser for help on using the repository browser.