source: trunk/gsdl/bin/script/expand_macros.pl@ 10510

Last change on this file since 10510 was 10510, checked in by kjdon, 19 years ago

a script that allows you to browse through macros and see what they expand to. Contributed by Jens Wille

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 19.4 KB
Line 
1#! /usr/bin/perl
2
3##################################################################################
4# #
5# expand_macros.pl -- recursively expand greenstone macros / 1080805 - 7140805 #
6# #
7# Copyright (C) 2005 Jens Wille <j_wille at gmx.net> #
8# #
9# This program is free software; you can redistribute it and/or #
10# modify it under the terms of the GNU General Public License #
11# as published by the Free Software Foundation; either version 2 #
12# of the License, or (at your option) any later version. #
13# #
14# This program is distributed in the hope that it will be useful, #
15# but WITHOUT ANY WARRANTY; without even the implied warranty of #
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
17# GNU General Public License for more details. #
18# #
19# You should have received a copy of the GNU General Public License #
20# along with this program; if not, write to the Free Software #
21# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #
22# #
23##################################################################################
24
25#
26# expand_macros.pl reads in specified greenstone macro files and prints the
27# definitions of requested macros and recursively the definitions of macros
28# used within these definitions.
29#
30# it also has an "interactive browse mode" where you can select which macro
31# (and from which package) to display next.
32#
33# you can even search for macros that use certain macros ("reverse" search)
34# - recursively!
35#
36
37use strict;
38use warnings;
39
40use Getopt::Long qw(GetOptions);
41
42use File::Basename qw(basename);
43use File::Spec::Functions qw(catdir catfile);
44
45use IO::Handle qw(autoflush);
46STDOUT->autoflush(1);
47
48
49### progname and version
50
51my $NAME = basename $0;
52my $VERSION = '0.12';
53
54
55### command line arguments
56
57# variable initialisation and default values
58my ($verbose, $depth, $short, $reverse, $browse, $paged, $pager)
59 = (0, '', 0, 0, 0, 0, 'less');
60
61my @macro_dirs = (catdir($ENV{'GSDLHOME'}, 'macros'));
62my @macros = ();
63
64# usage information and help text
65my $USG = <<HERE_USG;
66usage:
67 $NAME [generic-options] [-d <depth>] [-s] [-r] [<package>:]<macro> ...
68 $NAME [generic-options] {-b|-i} [-p]
69 $NAME [-h|-?|--help]
70
71 generic options are: [-v] [-e <directory>,...]
72HERE_USG
73
74my $HLP = <<HERE_HLP;
75$NAME: recursively expand greenstone macros (v$VERSION)
76
77$USG
78
79generic options:
80 -h, -?, --help display this help and exit
81 -v, --verbose output some extra information/warnings
82
83 {-e|--extra} <directory>,... paths to extra macro directories, comma-separated list (default directory: '$macro_dirs[0]')
84
85batch mode:
86 {-d|--depth} <depth> how deep to recurse through macros (default: '$depth', meaning 'infinite')
87
88 -s, --short short output, i.e. only macro names, no content
89 -ss really short, i.e. no content, no recursion (equivalent to '-s -d 0')
90
91 -r, --reverse "reverse" search, outputs the macros which _use_ the specified macro(s)
92
93 all non-option arguments will be treated as macro names (without surrounding '_')
94 you can restrict your query to a certain package by prepending the macro name with '<package-name>:'
95
96interactive browse mode:
97 -b, -i, --browse interactive browse mode, just try it ;-)
98
99 -p, --paged data output will be passed to a pager (default: '$pager')
100 --pager <pager> pass paged output to specified pager instead of above named default
101HERE_HLP
102
103# allow for bundling of options
104Getopt::Long::Configure ("bundling");
105
106# parse arguments
107GetOptions( 'help|h|?' => sub { print "$HLP\n"; exit 0 },
108 'verbose|v' => \$verbose,
109 'extra|e=s' => \@macro_dirs,
110 'depth|d=i' => \$depth,
111 'short|s+' => \$short,
112 'reverse|r' => \$reverse,
113 'browse|b|i' => \$browse,
114 'paged|p' => \$paged,
115 'pager=s' => \$pager,
116 '<>' => sub { push(@macros => @_) } )
117 or die "$USG\n";
118
119
120### some sanity checks
121
122# need one of our "actions": batch or browse
123# ("batch" requiring at least one macro name specified)
124die "$USG\n"
125 unless @macros || $browse;
126
127# need GSDLHOME for default macro directory
128# (does also allow to have the script in gsdl bin path)
129die "GSDLHOME not set! please change into the directory where greenstone has been installed and run/source the appropriate setup script.\n"
130 unless $ENV{'GSDLHOME'};
131
132# get the pager executable
133# no need to test if not in "paged" mode
134unless (! $paged || -x $pager) {
135 # get first pager executable in PATH
136 foreach my $path (split(':' => $ENV{'PATH'})) {
137 if (-x catfile($path, $pager)) {
138 $pager = catfile($path, $pager);
139 last;
140 }
141 }
142
143 # still no executable?
144 die "can't find pager '$pager'!\n"
145 unless -x $pager;
146}
147
148
149### action!
150
151# build hash of macro information
152my %db = build_db();
153die "macro db empty!\n"
154 unless %db;
155
156unless ($browse) {
157 # batch mode
158
159 my $n = 0;
160 foreach my $macro (@macros) {
161 unless ($reverse) {
162 # "normal" search
163
164 get_macro(\%db, $macro);
165 }
166 else {
167 # "reverse" search
168
169 print "*** $macro ***\n\n";
170
171 # get the macros that use the specified macro
172 my @refs = get_r_macros(\%db, $macro);
173 warn "no macro referencing '$macro'\n"
174 unless @refs;
175
176 # now recurse those macros
177 foreach my $m (@refs) {
178 get_macro(\%db, $m);
179 }
180 }
181
182 # print separator _between_ requested macros (i.e. everytime but the last time)
183 # (need to add extra newline for short display)
184 print(($short ? "\n" : ''), '-' x 80, "\n\n")
185 unless ++$n >= @macros;
186 }
187}
188else {
189 # interactive browse mode
190
191 # repeat until explicit exit
192 while (1) {
193 print "enter macro name (without package specification) [leave empty to quit]\n> ";
194 my $macro = <STDIN>;
195 die "\n"
196 unless defined $macro; # allow for exiting by hitting <ctrl>+d
197 chomp $macro;
198
199 exit 0
200 unless length $macro; # normal exit
201
202 # now get all packages for given macro, and begin recursion...
203 recurse_packages(\%db, $macro);
204 }
205}
206
207
208### that's it ;-)
209
210exit 0;
211
212
213### subroutines
214
215# <sub build_db>
216# build hash of macro information ("macro db")
217#
218# hash structure:
219# macro
220# -> package
221# -> 'file'
222# -> 'line'
223# -> 'content'
224#
225# usage:
226# %db = build_db()
227#
228# => db: returned hash ("macro db")
229#
230sub build_db {
231 my %db = ();
232 my @dm = ();
233 my ($n, $m) = (0, 0);
234
235 # get all macro files (*.dm) from specified directories
236 foreach my $dir (@macro_dirs) {
237 opendir(DIR, "$dir")
238 or die "can't read macro directory '$dir': $!\n";
239
240 push(@dm => map { $_ = catfile($dir, $_) } grep { /\.dm$/ } readdir(DIR));
241
242 closedir DIR;
243 }
244
245 # now parse each macro file and build hash
246 foreach my $dm (sort @dm) {
247 open(DM, "< $dm")
248 or die "can't open macro file '$dm': $!\n";
249
250 my ($name, $content, $curpkg, $contd) = ('', '', '', 0);
251 while (my $line = <DM>) {
252 chomp($line);
253 next unless length $line; # skip empty lines
254 next if $line =~ /^\s*$/; # skip "empty" lines
255 next if $line =~ /^\s*#/; # skip comments (i hope this doesn't
256 # affect cases we actually wanted to keep)
257
258 # is this sufficient???
259 if ($line =~ /^package\s+(\w+)/) {
260 # remember the current package we are in
261 $curpkg = $1;
262 }
263 # my understanding of greenstone macro names:
264 # - enclosed in underscores ('_')
265 # - starts with a letter ([:alpha:])
266 # - followed by alphanumeric characters ([:alnum:])
267 # (consequently, it doesn't start with a number and
268 # particularly isn't a macro parameter, i.e. something like _1_)
269 # - also we need to take care of escaped underscores ('\_')
270 #
271 # => does this fit???
272 elsif ($line =~ /(?<!\\)_([[:alpha:]][[:alnum:]]*)(?<!\\)_\s*\{\s*(.*)/) {
273 # start of macro definition
274 $n++;
275
276 $name = $1;
277 $content = $2 || '';
278
279 # is the macro definition already finished?
280 $contd = ($content =~ s/\s*(?<!\\)\}.*//) ? 0 : 1;
281
282 if (exists $db{$name}->{$curpkg}) {
283 # everytime a macro definition already exists, it's simply
284 # overwritten - but we can give a warning
285 # (this might also serve debugging purposes)
286 $m++;
287
288 warn <<HERE_WARN if $verbose;
289duplicate definition of macro '$curpkg:$name' at '$dm', line $.
290(previously defined at $db{$name}->{$curpkg}->{'file'}, line $db{$name}->{$curpkg}->{'line'})
291HERE_WARN
292 }
293
294 # store the information we got so far
295 $db{$name}->{$curpkg}->{'file'} = $dm;
296 $db{$name}->{$curpkg}->{'line'} = $.;
297 $db{$name}->{$curpkg}->{'content'} = [$content] if length $content;
298 }
299 elsif ($contd) {
300 # continuation of macro definition
301
302 # is the macro definition already finished?
303 $contd = ($line =~ s/\s*(?<!\\)\}.*//) ? 0 : 1;
304
305 # store additional content
306 push(@{$db{$name}->{$curpkg}->{'content'}} => $line);
307 }
308 else {
309 # something else...
310
311 ($name, $content) = ('', '');
312 }
313 }
314
315 close DM;
316 }
317
318 # print some statistics
319 print "$n total macro definitions, $m duplicates\n"
320 if $verbose;
321
322 # we stored all information there is so we can return it
323 return %db;
324}
325# </sub build_db>
326
327# <sub get_macro>
328# recursively print macro information
329#
330# usage:
331# get_macro($db, $macro[, $level])
332#
333# db: hash reference to macro db
334# macro: macro name (optionally including package specification)
335# level: recursion level (optional)
336#
337# => VOID CONTEXT
338#
339sub get_macro {
340 my ($db, $macro, $level) = @_;
341 $level ||= 0;
342
343 # indent output according to recursion level
344 my $indent = ' ' x $level;
345
346 # get all the packages which our macro is defined in
347 ($macro, my @packages) = get_packages($db, $macro, $indent);
348 return unless @packages;
349
350 # macro definitions may occur in several packages so we display them all
351 # (unless a certain package was explicitly specified)
352 foreach my $pkg (@packages) {
353 print "$indent* $pkg:$macro ($db->{$macro}->{$pkg}->{'file'}, line $db->{$macro}->{$pkg}->{'line'})\n";
354
355 next if $short > 1; # really short (no content, no recursion)
356
357 my $content = '';
358 # some macros are defined, but don't have any content
359 if (defined $db->{$macro}->{$pkg}->{'content'}) {
360 # for batch display we condense the output a little bit...
361 map { s/^\s*//; s/\s*$// } @{$db->{$macro}->{$pkg}->{'content'}};
362 # ...and put it on a single line
363 $content = join(' ' => @{$db->{$macro}->{$pkg}->{'content'}});
364 }
365 print "$indent { $content }\n\n"
366 unless $short;
367 # short display only, i.e. no content
368 # of the macro's definition
369
370 # only go (deeper) into referenced macros if we
371 # haven't reached the specified recursion level
372 if ($depth eq '' || $level < $depth) {
373 # get (referencing|referenced) macros...
374 my @refs = $reverse
375 ? get_r_macros($db, $macro)
376 : get_macros($content);
377
378 # ...and recurse above them (with increased recursion level)
379 foreach my $ref (@refs) {
380 get_macro($db, $ref, $level + 1);
381 }
382 }
383 }
384}
385# </sub get_macro>
386
387# <sub get_macros>
388# returns a list of macros extracted from a content string
389# or a boolean value if a macro name was specified
390#
391# usage:
392# @macros = get_macros($content)
393# $boolean = get_macros($content, $macro)
394#
395# content: content string
396# macro: macro name
397#
398# => macros: list of macros
399# => boolean: boolean value (true = 1 / false = empty list)
400#
401sub get_macros {
402 my ($content, $macro) = @_;
403 my @macro_list = ();
404 my %seen = ();
405
406 # get each macro reference in the string
407 # (for macro name considerations see above)
408 while ($content =~ /(?<!\\)_((?:[[:alpha:]]+:)?[[:alpha:]][[:alnum:]]*)(?<!\\)_/g) {
409 my $m = $1;
410
411 # we want to skip some macros that have no content anyway (defined
412 # from within the server) - unless we're doing a "reverse" search
413 next if $seen{$m}++ || ($m =~ /^(cgiarg.*|if|httpimg|gwcgi|(decoded)?compressedoptions)$/i
414 && ! $reverse);
415
416 if (defined $macro) {
417 # is this the macro we wanted? then the current
418 # macro uses it => return true
419 return 1 if $m =~ /^(?:[[:alpha:]]+:)?$macro$/;
420 }
421 else {
422 # add macro to our list
423 push(@macro_list => $m);
424 }
425 }
426
427 # return the list of used macros
428 # (this evaluates to false (empty list) if there are no further
429 # macro calls or if this macro doesn't use the sought-after macro)
430 return sort @macro_list;
431}
432# </sub get_macros>
433
434# <sub get_r_macros>
435# returns a list of macro names which reference ("use") the specified macro
436#
437# usage:
438# @macros = get_r_macros($db, $macro)
439#
440# db: hash reference to macro db
441# macro: macro name
442#
443# => macros: list of macros
444#
445sub get_r_macros {
446 my ($db, $macro) = @_;
447 my %refs = ();
448
449 # need to test each single macro's...
450 foreach my $m (sort keys %{$db}) {
451 # ...each single package
452 foreach my $p (sort keys %{$db->{$m}}) {
453 my $pm = "$p:$m"; # include package information in the macro name!
454
455 # does this macro have any content?
456 $refs{$pm}++ if defined $db->{$m}->{$p}->{'content'}
457 # then check if it uses our sought-after macro
458 && get_macros(join(' ' => @{$db->{$m}->{$p}->{'content'}}), $macro);
459 # need to stringify content!
460 }
461 }
462
463 # now we have all the macros which use our sought-after
464 return sort keys %refs;
465}
466# </sub get_r_macros>
467
468# <sub recurse_packages>
469# recurse all packages for a given macro
470#
471# usage:
472# recurse_packages($db, $macro)
473#
474# db: hash reference to macro db
475# macro: macro name (any package specification will be dropped)
476#
477# => VOID CONTEXT
478#
479sub recurse_packages {
480 my ($db, $macro) = @_;
481
482 # repeat until explicit break/exit
483 while (1) {
484 # get all the packages which our macro is defined in
485 ($macro, my @packages) = get_packages($db, $macro);
486 return unless @packages;
487
488 print "select package for macro '$macro' [leave empty to return]\n";
489 my $n = 0;
490 my $package = '';
491 # ask for user's selection...
492 do {
493 foreach my $pkg (@packages) {
494 printf " [%d]%s %s\n", ++$n, " " x (4 - length $n), $pkg;
495 }
496 print "> ";
497 $package = <STDIN>;
498 die "\n"
499 unless defined $package; # allow for exiting by hitting <ctrl>+d
500 chomp $package;
501 $n = 0;
502 # ...until we return...
503 } until ($package eq ''
504 # ...or a valid number is provided
505 || ($package =~ /^\d+$/ && $package > 0 && $package <= @packages));
506
507 return unless length $package; # return to previous stage
508
509 # set selected package
510 $package = $packages[$package - 1];
511
512 # some macros are defined, but don't have any content
513 my $content = defined $db->{$macro}->{$package}->{'content'}
514 ? join("\n" => @{$db->{$macro}->{$package}->{'content'}})
515 # now we want to retain the original structure
516 : '';
517
518 my $content_string = <<HERE_CONTENT;
519* $package:$macro ($db->{$macro}->{$package}->{'file'}, line $db->{$macro}->{$package}->{'line'})
520 { $content }
521HERE_CONTENT
522
523 if ($paged) {
524 # pass output to pager
525
526 open(LESS, "| $pager")
527 or die "can't open pipe to '$pager': $!";
528
529 print LESS "$content_string";
530
531 close LESS;
532 }
533 else {
534 # print to standard out...
535
536 print "\n$content_string\n";
537
538 # ...and wait for user reaction to continue
539 print "[press <enter> to continue]";
540 print "\n"
541 if <STDIN>;
542 }
543
544 # now on to the macros referenced within this one
545 recurse_macros($db, $content);
546 }
547}
548# </sub recurse_packages>
549
550# <sub get_packages>
551# returns list of packages for specified macro, also returns
552# modified macro name (without surrounding '_' and package specification)
553#
554# usage:
555# ($macro, @packages) = get_packages($db, $macro)
556#
557# db: hash reference to macro db
558# macro: macro name
559#
560# => macro: modified macro name
561# => packages: list of packages
562#
563sub get_packages {
564 my ($db, $macro, $indent) = @_;
565 $indent ||= '';
566
567 # remove surrounding '_'
568 $macro =~ s/^_//;
569 $macro =~ s/_$//;
570
571 # save original macro name (including package specification)
572 my $omacro = $macro;
573
574 my @packages = ();
575
576 # for macro name considerations see above
577 if ($macro =~ /^([[:alpha:]]+:)?[[:alpha:]][[:alnum:]]*$/) {
578 # valid macro name
579
580 # strip off package specification
581 my $package = ($macro =~ s/^([[:alpha:]]+)://) ? $1 : '';
582
583 if (exists $db->{$macro}) {
584 # valid/existing macro
585
586 if (length $package && ! $browse) {
587 # regard any package specification unless we're in browse mode
588
589 @packages = ($package)
590 if exists $db->{$macro}->{$package};
591 }
592 else {
593 # get all packages otherwise
594
595 @packages = sort keys %{$db->{$macro}};
596 }
597 }
598 }
599 else {
600 # invalid macro name
601
602 warn "invalid macro name '$macro'!\n";
603 return; # skip it
604 }
605
606 # no packages - no definition
607 unless (@packages) {
608 print "$indent- $omacro\n$indent no definition for macro!\n\n";
609 return; # skip it
610 }
611
612 # return modified macro name and packages found
613 return $macro, sort @packages;
614}
615# </sub get_packages>
616
617# <sub recurse_macros>
618# recurse all macros for a given content string
619#
620# usage:
621# recurse_macros($db, $content)
622#
623# db: hash reference to macro db
624# content: content string
625#
626# => VOID CONTEXT
627#
628sub recurse_macros {
629 my ($db, $content) = @_;
630
631 # repeat until explicit break/exit
632 while (1) {
633 # get all the macros referenced within the current one
634 @macros = get_macros($content);
635 return unless @macros;
636
637 print "select macro [leave empty to return]\n";
638 my $n = 0;
639 my $macro = '';
640 # ask for user's selection...
641 do {
642 foreach my $m (@macros) {
643 printf " [%d]%s %s\n", ++$n, " " x (4 - length $n), $m;
644 }
645 print "> ";
646 $macro = <STDIN>;
647 die "\n"
648 unless defined $macro; # allow for exiting by hitting <ctrl>+d
649 chomp $macro;
650 $n = 0;
651 # ...until we return...
652 } until ($macro eq ''
653 # ...or a valid number is provided
654 || ($macro =~ /^\d+$/ && $macro > 0 && $macro <= @macros));
655
656 return unless length $macro; # return to previous stage
657
658 # set selected macro
659 $macro = $macros[$macro - 1];
660
661 # now we want all the macro's packages again
662 recurse_packages($db, $macro);
663 }
664}
665# </sub recurse_macros>
Note: See TracBrowser for help on using the repository browser.