Changeset 14270 for gsdl/branches

Show
Ignore:
Timestamp:
25.07.2007 13:37:52 (13 years ago)
Author:
oranfry
Message:

merged selected changes to the gsdl trunk since r14217 into the 2.74 branch

Location:
gsdl/branches/gsdl-2.74
Files:
22 modified
1 copied

Legend:

Unmodified
Added
Removed
  • gsdl/branches/gsdl-2.74/bin/script/buildcol.pl

    r14197 r14270  
    7373 
    7474my $arguments = 
    75     [ { 'name' => "disable_OAI", 
    76     'desc' => "{buildcol.disable_OAI}", 
    77     'type' => "flag", 
    78     'reqd' => "no", 
    79     'modegli' => "2" }, 
    80       { 'name' => "remove_empty_classifications", 
     75    [ { 'name' => "remove_empty_classifications", 
    8176    'desc' => "{buildcol.remove_empty_classifications}", 
    8277    'type' => "flag", 
     
    205200    'type' => "flag", 
    206201    'reqd' => "no", 
    207     'hiddengli' => "yes" } 
     202    'hiddengli' => "yes" }, 
     203      { 'name' => "disable_OAI", 
     204          'desc' => "{buildcol.disable_OAI}", 
     205          'type' => "flag", 
     206          'reqd' => "no", 
     207          'modegli' => "2", 
     208      'hiddengli' => "yes" } 
    208209 
    209210#      { 'name' => "incremental_dlc", 
     
    353354    unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib/plugins"); 
    354355 
    355     # read the configuration file (for gs2) 
    356     $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg"); 
     356    # Read in the collection configuration file. 
    357357    my ($collectcfg, $buildtype);  
    358  
    359     if (-e $configfilename) { 
    360       $collectcfg = &colcfg::read_collect_cfg ($configfilename); 
    361       $gs_mode = "gs2";   
    362     }  
    363     else { 
    364  
    365       # If it is gs3 
    366       $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collectionConfig.xml"); 
    367  
    368       if (!-e $configfilename) { 
    369     &gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die; 
    370       } 
    371       else { 
     358    ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out); 
     359    if ($gs_mode eq "gs2") { 
     360        $collectcfg = &colcfg::read_collect_cfg ($configfilename); 
     361    } elsif ($gs_mode eq "gs3") { 
    372362    $collectcfg = &colcfg::read_collection_cfg_xml ($configfilename); 
    373     $gs_mode = "gs3"; 
    374       }  
    375     } 
    376      
     363    } 
     364 
    377365    if ($verbosity !~ /\d+/) { 
    378366    if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 
     
    441429        $remove_empty_classifications = 1; 
    442430    } 
    443     }     
     431    } 
     432     
    444433     
    445434    if (defined $collectcfg->{'create_images'} && $collectcfg->{'create_images'} =~ /^true$/i) { 
     
    465454    $gli = 0 unless defined $gli; 
    466455 
     456    # If the disable_OAI flag is not present, the option $disable_OAI with the value of 0 will be passed to basebuilder.pm 
    467457    $disable_OAI = 0 unless defined $disable_OAI; 
    468  
     458     
    469459    # New argument to track whether build is incremental 
    470460    $incremental = 0 unless defined $incremental; 
     
    537527    # if a builder class has been created for this collection, use it 
    538528    # otherwise, use the mg or mgpp builder 
    539     if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") { 
     529    if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuilder.pm") { 
     530    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib"; 
     531    $buildertype = "custombuilder"; 
     532    } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuilder.pm") { 
     533    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib"; 
     534    $buildertype = "custombuilder"; 
     535    } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") { 
    540536    $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib"; 
    541537    $buildertype = "${collection}builder"; 
  • gsdl/branches/gsdl-2.74/bin/script/downloadfrom.pl

    r12903 r14270  
    4747        'desc' => "{downloadfrom.download_mode.Web}", 
    4848    'downloadname' => "WebDownload" }, 
     49      { 'name' => "MediaWiki", 
     50        'desc' => "{downloadfrom.download_mode.MediaWiki}", 
     51        'downloadname' => "MediaWikiDownload" }, 
    4952      { 'name' => "OAI", 
    5053        'desc' => "{downloadfrom.download_mode.OAI}", 
  • gsdl/branches/gsdl-2.74/bin/script/gti.pl

    r13948 r14270  
    3939 
    4040 
    41 my $anonymous_cvs_root = ":pserver:cvs_anon\@cvs.scms.waikato.ac.nz:2402/usr/local/global-cvs/gsdl-src"; 
     41#my $anonymous_cvs_root = ":pserver:cvs_anon\@cvs.scms.waikato.ac.nz:2402/usr/local/global-cvs/gsdl-src"; 
     42#my $anonymous_svn_root = "http://http://svn.greenstone.org/gsdl/trunk/"; 
    4243my $gsdl_root_directory = "$ENV{'GSDLHOME'}"; 
    4344my $gti_log_file = &util::filename_cat($gsdl_root_directory, "etc", "gti.log"); 
     
    7980    # 'target_file' => "gsdl-documentation/tutorials/xml-source/tutorial_{target_language_code}.xml" }, 
    8081 
    81       # Greenstone.org 
     82      # new Greenstone.org 
    8283      { 'key' => "greenorg", 
    83     'file_type' => "macrofile", 
    84     'source_file' => "greenorg/macros/english.dm", 
    85     'target_file' => "greenorg/macros/{iso_639_1_target_language_name}.dm" } 
     84        'file_type' => "resource_bundle", 
     85        'source_file' => "greenstoneorg/website/classes/Gsc.properties", 
     86        'target_file' => "greenstoneorg/website/classes/Gsc_{iso_639_1_target_language_name}.properties"  
     87    # 'file_type' => "macrofile", 
     88    # 'source_file' => "greenorg/macros/english.dm", 
     89    # 'target_file' => "greenorg/macros/{iso_639_1_target_language_name}.dm"  
     90      } 
    8691    ]; 
    8792 
     
    111116    } 
    112117    if ($gti_command =~ /^get-first-n-chunks-requiring-work$/i) { 
    113     print &get_first_n_chunks_requiring_work(@gti_command_arguments); 
     118    print &get_first_n_chunks_requiring_work(@gti_command_arguments);         
    114119    } 
    115120    if ($gti_command =~ /^get-language-status$/i) { 
    116     print &get_language_status(@gti_command_arguments); 
     121    print &get_language_status(@gti_command_arguments);        
    117122    } 
    118123    if ($gti_command =~ /^search-chunks$/i) { 
     
    248253    my @source_file_lines = &read_file_lines($source_file_path); 
    249254    my %source_file_key_to_line_mapping = &build_key_to_line_mapping(\@source_file_lines, $translation_file_type); 
    250  
     255     
    251256    my $target_file_path = &util::filename_cat($gsdl_root_directory, $target_file); 
    252257    my @target_file_lines = &read_file_lines($target_file_path); 
     
    299304    my $source_file_chunk_date = $source_file_key_to_last_update_date_mapping{$chunk_key}; 
    300305    my $source_file_chunk_text = &make_text_xml_safe($source_file_key_to_text_mapping{$chunk_key}); 
    301  
     306     
     307    if(!defined $source_file_chunk_date){ 
     308        $source_file_chunk_date = ""; 
     309    } 
     310     
    302311    $xml_response .= "    <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n"; 
    303     $xml_response .= "      <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n"; 
     312    $xml_response .= "      <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n";    
    304313    $xml_response .= "      <TargetFileText></TargetFileText>\n"; 
    305314    $xml_response .= "    </Chunk>\n"; 
     
    325334    my $target_file_chunk_date = $target_file_key_to_last_update_date_mapping{$chunk_key}; 
    326335    my $target_file_chunk_text = &make_text_xml_safe($target_file_key_to_text_mapping{$chunk_key}); 
    327  
    328     $xml_response .= "    <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n"; 
     336     
     337    if(!defined $source_file_chunk_date){ 
     338        $source_file_chunk_date = ""; 
     339    } 
     340 
     341    $xml_response .= "    <Chunk key=\"" . &make_text_xml_safe($chunk_key) . "\">\n";    
    329342    $xml_response .= "      <SourceFileText date=\"$source_file_chunk_date\">$source_file_chunk_text</SourceFileText>\n"; 
    330343    $xml_response .= "      <TargetFileText date=\"$target_file_chunk_date\">$target_file_chunk_text</TargetFileText>\n"; 
     
    636649    # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist 
    637650    # unless ($translation_file_is_not_in_cvs) { 
    638     my $source_file_cvs_status = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root update $source_file 2>/dev/null`; 
     651    #my $source_file_cvs_status = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root update $source_file 2>/dev/null`; 
     652        my $source_file_cvs_status = `cd $gsdl_root_directory; svn status $source_file 2>/dev/null`; 
    639653    if ($source_file_cvs_status =~ /^C /) { 
    640654        &throw_fatal_error("Source file $source_file_path conflicts with the repository."); 
     
    753767    my $chunk_cvs_date = $key_to_cvs_date_mapping{$chunk_key}; 
    754768    $key_to_last_update_date_mapping{$chunk_key} = $chunk_cvs_date; 
    755  
     769                 
    756770    # If a comment date exists and it is after the CVS date, use that instead 
     771        # need to convert the comment date format to SVN format 
    757772    my $chunk_gti_comment = $key_to_gti_comment_mapping{$chunk_key}; 
    758773    if (defined($chunk_gti_comment) && $chunk_gti_comment =~ /(\d?\d-\D\D\D-\d\d\d\d)/) { 
    759         my $chunk_comment_date = $1; 
     774        my $chunk_comment_date = $1;             
    760775        if ((!defined($chunk_cvs_date) || &is_date_after($chunk_comment_date, $chunk_cvs_date))) { 
    761776        $key_to_last_update_date_mapping{$chunk_key} = $chunk_comment_date; 
     
    774789    # Use CVS to annotate each line of the file with the date it was last edited 
    775790    # The "2>/dev/null" is very important! If it is missing this will never return when run from the receptionist 
    776     my $cvs_annotated_file = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root annotate -F $filename 2>/dev/null`; 
     791    # my $cvs_annotated_file = `cd $gsdl_root_directory; cvs -d $anonymous_cvs_root annotate -F $filename 2>/dev/null`;     
     792    # my $cvs_annotated_file = `cd $gsdl_root_directory; export PATH=.:/research/lh92/programs/subversion/bin; svn annotate -v --force $filename`; 
     793    my $cvs_annotated_file = `cd $gsdl_root_directory; svn annotate -v $filename`; 
     794     
    777795    my @cvs_annotated_file_lines = split(/\n/, $cvs_annotated_file); 
    778796 
     
    780798    foreach my $cvs_annotated_file_line (@cvs_annotated_file_lines) { 
    781799    # Extract the date from the CVS annotation at the front 
    782     $cvs_annotated_file_line =~ s/^\S+\s+\(\S+\s+(\S+)\):\s//; 
    783     push(@cvs_annotated_file_lines_date, $1); 
    784     } 
    785  
     800        # cvs format : 07-Jun-02 
     801        # svn format : 2007-07-16 
     802    # $cvs_annotated_file_line =~ s/^\S+\s+\(\S+\s+(\S+)\):\s//; 
     803        $cvs_annotated_file_line =~ s/^\s+\S+\s+\S+\s(\S+)//;  
     804         
     805        push(@cvs_annotated_file_lines_date, $1); 
     806         
     807        # trim extra date information in svn annotation format 
     808        # 15:42:49 +1200 (Wed, 21 Jun 2006) 
     809        $cvs_annotated_file_line =~ s/^\s+\S+\s\S+\s\((.+?)\)\s//;  
     810    }     
     811     
    786812    # Build a key to line mapping for the CVS annotated file, for matching the chunk key to the CVS date 
    787813    my %key_to_line_mapping = &build_key_to_line_mapping(\@cvs_annotated_file_lines, $translation_file_type); 
    788  
     814         
    789815    my %key_to_cvs_date_mapping = (); 
    790816    foreach my $chunk_key (keys(%key_to_line_mapping)) { 
    791817    my $chunk_starting_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[0]; 
    792818    my $chunk_finishing_line = (split(/-/, $key_to_line_mapping{$chunk_key}))[1]; 
    793  
     819                 
    794820    # Find the date this chunk was last edited, from the CVS annotation 
    795     my $chunk_date = $cvs_annotated_file_lines_date[$chunk_starting_line]; 
     821    my $chunk_date = $cvs_annotated_file_lines_date[$chunk_starting_line];         
    796822    for (my $l = ($chunk_starting_line + 1); $l <= $chunk_finishing_line; $l++) { 
    797823        if (&is_date_after($cvs_annotated_file_lines_date[$l], $chunk_date)) { 
    798824        # This part of the chunk has been updated more recently 
    799825        $chunk_date = $cvs_annotated_file_lines_date[$l]; 
     826         
    800827        } 
    801828    } 
     
    861888    my $source_chunk_last_update_date = $source_file_key_to_last_update_date_mapping->{$chunk_key}; 
    862889    my $target_chunk_last_update_date = $target_file_key_to_last_update_date_mapping->{$chunk_key}; 
    863     if (defined($target_chunk_last_update_date) && &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date)) { 
     890         
     891        # print "key: $chunk_key\nsource date : $source_chunk_last_update_date\ntarget date : $target_chunk_last_update_date\nafter? ". &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date) . "\n\n";         
     892                     
     893        if (defined($target_chunk_last_update_date) && &is_date_after($source_chunk_last_update_date, $target_chunk_last_update_date)) { 
    864894        # &log_message("Chunk with key $chunk_key needs updating."); 
    865895        push(@target_file_keys_requiring_updating, $chunk_key); 
     
    903933 
    904934# Returns 1 if $date1 is after $date2, 0 otherwise 
    905 sub is_date_after 
     935sub is_date_after_cvs 
    906936{ 
    907937    my ($date1, $date2) = @_; 
     
    909939          "Jul", 7, "Aug", 8, "Sep", 9, "Oct", 10, "Nov", 11, "Dec", 12); 
    910940 
     941    if(!defined $date1) { 
     942        return 1; 
     943    } 
     944 
    911945    my @date1parts = split(/-/, $date1); 
    912946    my @date2parts = split(/-/, $date2); 
     
    915949    my $year1 = $date1parts[2]; 
    916950    if ($year1 < 80) { 
    917     $year1 += 2000; 
     951        $year1 += 2000; 
    918952    } 
    919953    my $year2 = $date2parts[2]; 
    920954    if ($year2 < 80) { 
    921     $year2 += 2000; 
     955        $year2 += 2000; 
    922956    } 
    923957 
     
    939973    } 
    940974 
     975    return 0; 
     976} 
     977 
     978sub is_date_after 
     979{ 
     980    my ($date1, $date2) = @_; 
     981     
     982    if(!defined $date1) { 
     983      return 1; 
     984    } 
     985    if(!defined $date2) { 
     986      return 0; 
     987    } 
     988     
     989    # 16-Aug-2006 
     990    if($date1=~ /(\d+?)-(\S\S\S)-(\d\d\d\d)/){ 
     991       my %months = ("Jan", "01", "Feb", "02", "Mar", "03", "Apr",  "04", "May",  "05", "Jun",  "06", 
     992          "Jul", "07", "Aug", "08", "Sep", "09", "Oct", "10", "Nov", "11", "Dec", "12"); 
     993       $date1=$3 . "-" . $months{$2} . "-" . $1; 
     994       # print "** converted date1: $date1\n"; 
     995    } 
     996    if($date2=~ /(\d+?)-(\S\S\S)-(\d\d\d\d)/){ 
     997       my %months = ("Jan", "01", "Feb", "02", "Mar", "03", "Apr",  "04", "May",  "05", "Jun",  "06", 
     998          "Jul", "07", "Aug", "08", "Sep", "09", "Oct", "10", "Nov", "11", "Dec", "12"); 
     999       $date2=$3 . "-" . $months{$2} . "-" . $1; 
     1000       # print "** converted date2: $date2\n"; 
     1001    } 
     1002     
     1003     
     1004    # 2006-08-16 
     1005    my @date1parts = split(/-/, $date1); 
     1006    my @date2parts = split(/-/, $date2); 
     1007     
     1008    # Compare year 
     1009    if ($date1parts[0] > $date2parts[0]) { 
     1010    return 1; 
     1011    } 
     1012    elsif ($date1parts[0] == $date2parts[0]) { 
     1013    # Year is the same, so compare month 
     1014    if ($date1parts[1] > $date2parts[1]) { 
     1015        return 1; 
     1016    } 
     1017    elsif ($date1parts[1] == $date2parts[1]) { 
     1018        # Month is the same, so compare day 
     1019        if ($date1parts[2] > $date2parts[2]) { 
     1020        return 1; 
     1021        } 
     1022    } 
     1023    }     
     1024     
    9411025    return 0; 
    9421026} 
  • gsdl/branches/gsdl-2.74/bin/script/mkcol.pl

    r14032 r14270  
    7676    'reqd' => "no" }, 
    7777      { 'name' => "gs3mode", 
    78     'desc' => "", 
     78    'desc' => "mkcol.gs3mode", 
    7979    'type' => "flag", 
    8080    'reqd' => "no" }, 
  • gsdl/branches/gsdl-2.74/cgi-bin/gliserver.pl

    r14025 r14270  
    11#!perl -w 
    2  
    32# Need to specify the full path of Perl above 
    43 
    54 
    6 use gsdlCGI; 
    75use strict; 
     6 
     7 
     8# Set this to 1 to work around IIS 6 craziness 
     9my $iis6_mode = 0; 
     10 
     11 
     12# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone 
     13#   directory rather than the cgi-bin directory, causing lots of stuff to fail 
     14if ($iis6_mode) 
     15{ 
     16    # Change into cgi-bin directory 
     17    chdir("cgi-bin"); 
     18} 
     19 
     20 
     21# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS) 
     22eval("require \"gsdlCGI.pm\""); 
     23if ($@) 
     24{ 
     25    print STDOUT "Content-type:text/plain\n\n"; 
     26    print STDOUT "ERROR: $@\n"; 
     27    exit 0; 
     28} 
    829 
    930 
     
    109130sub authenticate_user 
    110131{ 
    111      
    112132    my $gsdl_cgi = shift(@_); 
    113133    my $username = shift(@_); 
     
    244264    my $installation_status = ""; 
    245265 
     266    print STDOUT "Content-type:text/plain\n\n"; 
     267 
    246268    # Check that Java is installed and accessible 
    247269    my $java = $gsdl_cgi->get_java_path(); 
    248270    my $java_command = "$java -version 2>&1"; 
     271 
     272    # IIS 6: redirecting output from STDERR to STDOUT just doesn't work, so we have to let it go 
     273    #   directly out to the page 
     274    if ($iis6_mode) 
     275    { 
     276    $java_command = "java -version"; 
     277    } 
     278 
    249279    my $java_output = `$java_command`; 
    250280    my $java_status = $?; 
     
    265295 
    266296    if ($installation_ok) { 
    267     $gsdl_cgi->generate_ok_message($installation_status . "\nInstallation OK!"); 
     297    print STDOUT $installation_status . "\nInstallation OK!"; 
    268298    } 
    269299    else { 
    270     $gsdl_cgi->generate_error($installation_status); 
     300    print STDOUT $installation_status; 
    271301    } 
    272302} 
     
    563593    } 
    564594 
    565     print STDOUT "Content-type:text/plain\n\n"; 
    566595    foreach my $cgi_arg_name ($gsdl_cgi->param) { 
    567596    my $cgi_arg_value = $gsdl_cgi->clean_param($cgi_arg_name) || ""; 
     
    575604    } 
    576605 
     606    print STDOUT "Content-type:text/plain\n\n"; 
     607 
    577608    my $perl_command = "perl -S $script $perl_args 2>&1"; 
     609 
     610    # IIS 6: redirecting output from STDERR to STDOUT just doesn't work, so we have to let it go 
     611    #   directly out to the page 
     612    if ($iis6_mode) 
     613    { 
     614    $perl_command = "perl -S $script $perl_args"; 
     615    } 
     616 
    578617    my $perl_output = `$perl_command`; 
    579618    my $perl_status = $?; 
     
    582621    } 
    583622 
    584     print STDOUT "Content-type:text/plain\n\n"; 
    585     print STDOUT $perl_output; 
    586  
     623    if (defined($perl_output)) 
     624    { 
     625    print STDOUT $perl_output; 
     626    } 
    587627} 
    588628 
     
    728768    } 
    729769 
     770    print STDOUT "Content-type:text/plain\n\n"; 
     771 
    730772    my $perl_command = "perl -S $script $perl_args 2>&1"; 
     773 
     774    # IIS 6: redirecting output from STDERR to STDOUT just doesn't work, so we have to let it go 
     775    #   directly out to the page 
     776    if ($iis6_mode) 
     777    { 
     778    $perl_command = "perl -S $script $perl_args"; 
     779    } 
     780 
    731781    if (!open(PIN, "$perl_command |")) { 
    732782    $gsdl_cgi->generate_error("Unable to execute command: $perl_command"); 
    733783    } 
    734784 
    735     print STDOUT "Content-type:text/plain\n\n"; 
    736785    while (defined (my $perl_output_line = <PIN>)) { 
    737786    print STDOUT $perl_output_line; 
     
    799848 
    800849    # Read the uploaded data and write it out to file 
     850    # We have to pass the size of the uploaded data in the "fs" argument because IIS 6 seems to be 
     851    #   completely incapable of working this out otherwise (causing the old code to crash) 
    801852    my $buf; 
    802853    my $num_bytes = 0; 
     854    my $num_bytes_remaining = $gsdl_cgi->clean_param("fs"); 
     855    my $bytes_to_read = $num_bytes_remaining; 
     856    if ($bytes_to_read > 1024) { $bytes_to_read = 1024; } 
    803857    binmode(FOUT); 
    804     while (read(STDIN, $buf, 1024) > 0) { 
     858    while (read(STDIN, $buf, $bytes_to_read) > 0) { 
    805859    print FOUT $buf; 
    806860    $num_bytes += length($buf); 
     861    $num_bytes_remaining -= length($buf); 
     862    $bytes_to_read = $num_bytes_remaining; 
     863    if ($bytes_to_read > 1024) { $bytes_to_read = 1024; } 
    807864    } 
    808865    close(FOUT); 
  • gsdl/branches/gsdl-2.74/cgi-bin/gsdlCGI.pm

    r14024 r14270  
    101101    print STDOUT $full_mess; 
    102102 
    103     die $full_mess; 
     103    exit 0; 
    104104} 
    105105 
  • gsdl/branches/gsdl-2.74/macros/style.dm

    r13429 r14270  
    100100# _pagetitle_ 
    101101# _globalscripts_ 
    102 _htmlhead_ {<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"> 
     102_htmlhead_ {<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 
     103"http://www.w3.org/TR/html4/loose.dtd"> 
    103104 
    104105<html_htmlextra_> 
  • gsdl/branches/gsdl-2.74/perllib/basebuilder.pm

    r14212 r14270  
    2626package basebuilder; 
    2727 
     28use strict; 
     29no strict 'refs'; # allow filehandles to be variables and viceversa 
     30 
    2831use classify; 
    2932use cfgread; 
     
    5659    $outhandle, $no_text, $failhandle, $gli, $disable_OAI) = @_; 
    5760 
    58     $outhandle = STDERR unless defined $outhandle; 
     61    $outhandle = *STDERR unless defined $outhandle; 
    5962    $no_text = 0 unless defined $no_text; 
    60     $failhandle = STDERR unless defined $failhandle; 
     63    $failhandle = *STDERR unless defined $failhandle; 
    6164 
    6265    # create a builder object 
     
    8083 
    8184    $self->{'gli'} = 0 unless defined $self->{'gli'}; 
     85     
     86    # disable_OIA applies to greenstone 3 only and is only passed to &colcfg::write_build_cfg_xml (then cfgread4gs3::write_cfg_file) when writing the buildConfig.xml 
    8287    $self->{'disable_OAI'} = 0 unless defined $self->{'disable_OAI'}; 
    83      
    84     # read in the collection configuration file 
    85     my $colcfgname = "$ENV{'GSDLCOLLECTDIR'}/etc/collect.cfg"; 
    86     if (-e $colcfgname) { 
    87       ##$self->{'collect_cfg'} = &colcfg::read_collection_cfg_xml ($colcfgname); 
    88       $self->{'collect_cfg'} = &colcfg::read_collect_cfg ($colcfgname); 
    89       $gs_mode = "gs2"; 
    90     } 
    91     else { 
    92       my $colcfgname = "$ENV{'GSDLCOLLECTDIR'}/etc/collectionConfig.xml"; 
    93       if (!-e $colcfgname) { 
    94     die "mgbuilder::new - couldn't find collectionConfig.xml for collection $collection\n"; 
    95       } 
    96       else { 
    97     #$self->{'collect_cfg'} = &colcfg::read_collect_cfg ($colcfgname); 
    98         $self->{'collect_cfg'} = &colcfg::read_collection_cfg_xml ($colcfgname); 
    99     $gs_mode = "gs3"; 
    100       } 
     88 
     89    # Read in the collection configuration file. 
     90    my ($colcfgname); 
     91    ($colcfgname, $gs_mode) = &colcfg::get_collect_cfg_name($outhandle); 
     92    if ($gs_mode eq "gs2") { 
     93        $self->{'collect_cfg'} = &colcfg::read_collect_cfg ($colcfgname); 
     94    } elsif ($gs_mode eq "gs3") { 
     95    $self->{'collect_cfg'} = &colcfg::read_collection_cfg_xml ($colcfgname); 
    10196    } 
    10297     
     
    196191    my ($buildprocdir, $buildproctype); 
    197192    my $collection = $self->{'collection'}; 
    198     if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}buildproc.pm") { 
     193    if (-e "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib/custombuildproc.pm") { 
     194    $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/custom/${collection}/perllib"; 
     195    $buildproctype = "custombuildproc"; 
     196    } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/custombuildproc.pm") { 
     197    $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib"; 
     198    $buildproctype = "custombuildproc"; 
     199    } elsif (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}buildproc.pm") { 
    199200    $buildprocdir = "$ENV{'GSDLCOLLECTDIR'}/perllib"; 
    200201    $buildproctype = "${collection}buildproc"; 
     
    240241    $self->{'maxnumeric'} = $maxnumeric; 
    241242} 
    242 # It seems we don't need this sub 
    243 #sub set_disable_OAI { 
    244 #    my $disable_OAI = shift (@_); 
    245 #    my ($disable_OAI = @_; 
    246 # 
    247 #    $self->{'disable_OAI'} = $disable_OAI; 
    248 #} 
    249243sub set_strip_html { 
    250244    my $self = shift (@_); 
     
    279273    # and their directory names (includes subcolls and langs) 
    280274    $self->{'index_mapping'} = $self->create_index_mapping ($indexes); 
    281  
    282     my $indexmap = $self->{'index_mapping'}->{'indexmap'}; 
    283  
     275    
    284276    # build each of the indexes 
    285277    foreach my $index (@$indexes) { 
     
    351343    my ($handle); 
    352344    if ($self->{'debug'}) { 
    353     $handle = STDOUT; 
     345    $handle = *STDOUT; 
    354346    } else { 
    355347    if (!-e "$txt2db_exe" || !open (PIPEOUT, "| txt2db$exe \"$fulldbname\"")) { 
     
    357349        die "builder::make_infodatabase - couldn't run $txt2db_exe\n"; 
    358350    } 
    359     $handle = basebuilder::PIPEOUT; 
     351    $handle = *PIPEOUT; 
    360352    } 
    361353     
     
    439431    $build_cfg->{'numsections'} = $self->{'buildproc'}->get_num_sections(); 
    440432    $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes(); 
    441  
    442     # store whether to disable OAI service 
    443     $build_cfg->{'disable_OAI'} = $self->{'disable_OAI'}; 
    444          
     433     
    445434    # store the mapping between the index names and the directory names 
    446435    # the index map is used to determine what indexes there are, so any that are not built should not be put into the map. 
     
    478467 
    479468    if ($gs_mode eq "gs2") { 
    480       #&colcfg::write_build_cfg_xml("$self->{'build_dir'}/buildConfig.xml", $build_cfg, $self->{'collect_cfg'}); 
    481469      &colcfg::write_build_cfg("$self->{'build_dir'}/build.cfg", $build_cfg); 
    482470    } 
    483471    if ($gs_mode eq "gs3") { 
    484       #&colcfg::write_build_cfg("$self->{'build_dir'}/build.cfg", $build_cfg); 
    485       &colcfg::write_build_cfg_xml("$self->{'build_dir'}/buildConfig.xml", $build_cfg, $self->{'collect_cfg'}); 
     472      &colcfg::write_build_cfg_xml("$self->{'build_dir'}/buildConfig.xml", $build_cfg, $self->{'collect_cfg'}, $self->{'disable_OAI'}); 
    486473    }     
    487474 
  • gsdl/branches/gsdl-2.74/perllib/cfgread4gs3.pm

    r14200 r14270  
    337337    } 
    338338 
    339     print "*** collectionConfig.xml internal ***\n"; 
    340     &Display;  
     339    #print "*** collectionConfig.xml internal ***\n"; 
     340    #&Display;  
    341341    return $data; 
    342342} 
     
    350350# Create the buildConfig.xml file for a specific collection 
    351351sub write_cfg_file { 
    352     # this sub is called make_auxiliary_files() in basebuilder.pm 
     352    # this sub is called in make_auxiliary_files() in basebuilder.pm 
    353353    # the received args: $buildoutfile - destination file: buildConfig.xml 
    354354    #                    $buildcfg - all build options, eg, disable_OAI 
    355355    #                    $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in cfgread4gs3.pm. 
    356     my ($buildoutfile, $buildcfg, $collectcfg) = @_; 
     356    my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_; 
    357357    my $line = []; 
    358358 
    359359    if (!open (COLCFG, ">$buildoutfile")) { 
    360     print STDERR "cfgread::write_cfg_file couldn't write the cfg file $buildoutfile\n"; 
     360    print STDERR "cfgread4gs3::write_cfg_file couldn't write the build config file $buildoutfile\n"; 
    361361    die; 
    362362    } 
     
    391391 
    392392    # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIPMH.java class 
    393     # For each collection, we write the following serviceRack in the collection's buildConfig.xml file as follows if the 'disable_OAI' argument is not ticked in GLI (or equivalently, a 'disable_OAI' flag is not specified on the command line). There are also other configurations in the OAIConfig.xml. 
    394     if ($buildcfg->{'disable_OAI'} == 0) { 
     393    # For each collection, we write the following serviceRack in the collection's buildConfig.xml file if the 'disable_OAI' argument is not checked in the GLI (or equivalently, a 'disable_OAI' flag is not specified on the command line). There are also other configurations in the OAIConfig.xml. 
     394    if ($disable_OAI == 0) { 
    395395      &write_line('COLCFG', ["<serviceRack name=\"OAIPMH\">"]); 
    396396      if (defined $buildcfg->{'indexstem'}) { 
  • gsdl/branches/gsdl-2.74/perllib/classify.pm

    r14112 r14270  
    4949 
    5050    # find the classifier 
    51     my $customclassname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "custom", $ENV{'GSDLCOLLECTION'}, 
     51    my $customclassname; 
     52    if (defined($ENV{'GSDLCOLLECTION'})) 
     53    { 
     54    $customclassname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "custom", $ENV{'GSDLCOLLECTION'}, 
    5255                                              "perllib", "classify", "${classifier}.pm"); 
     56    } 
    5357    my $colclassname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "perllib", "classify", "${classifier}.pm"); 
    5458    my $mainclassname = &util::filename_cat($ENV{'GSDLHOME'}, "perllib", "classify", "${classifier}.pm"); 
    5559 
    56     if (-e $customclassname) { require $customclassname; } 
     60    if (defined($customclassname) && -e $customclassname) { require $customclassname; } 
    5761    elsif (-e $colclassname) { require $colclassname; } 
    5862    elsif (-e $mainclassname) { require $mainclassname; } 
  • gsdl/branches/gsdl-2.74/perllib/colcfg.pm

    r14115 r14270  
    100100} 
    101101sub write_build_cfg_xml { 
    102     my ($buildoutfile, $buildcfg, $collectcfg) = @_; 
     102    my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_; 
    103103 
    104     return &cfgread4gs3::write_cfg_file ($buildoutfile, $buildcfg, $collectcfg); 
     104    return &cfgread4gs3::write_cfg_file ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI); 
    105105} 
    106106 
     
    148148 
    149149    return &cfgread::read_cfg_file ($filename,  
    150            q/^(builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes)$/,  
     150           q/^(builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes)$/, 
    151151           q/^(indexmap|subcollectionmap|languagemap|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap)$/); 
    152152                     
     
    157157 
    158158    &cfgread::write_cfg_file($filename, $data, 
    159            q/^(builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes)$/,  
     159           q/^(builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes)$/, 
    160160           q/^(indexmap|subcollectionmap|languagemap|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap)$/);             
    161161} 
  • gsdl/branches/gsdl-2.74/perllib/plugin.pm

    r14112 r14270  
    4848 
    4949    # find the plugin 
    50     my $customplugname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "custom", $ENV{'GSDLCOLLECTION'},  
     50    my $customplugname; 
     51    if (defined($ENV{'GSDLCOLLECTION'})) 
     52    { 
     53    $customplugname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "custom", $ENV{'GSDLCOLLECTION'},  
    5154                                             'perllib', 'plugins', "${pluginname}.pm"); 
     55    } 
    5256    my $colplugname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, 'perllib', 'plugins',  
    5357                      "${pluginname}.pm"); 
    5458    my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, 'perllib', 'plugins',  
    5559                       "${pluginname}.pm"); 
    56     if (-e $customplugname) { require $customplugname; } 
     60    if (defined($customplugname) && -e $customplugname) { require $customplugname; } 
    5761    elsif (-e $colplugname) { require $colplugname; } 
    5862    elsif (-e $mainplugname) { require $mainplugname; } 
  • gsdl/branches/gsdl-2.74/perllib/plugins/HTMLPlug.pm

    r14089 r14270  
    11871187 
    11881188    foreach my $field (split /,/, $self->{'metadata_fields'}) { 
     1189        $field =~ s/^\s+//; # remove leading whitespace 
     1190        $field =~ s/\s+$//; # remove trailing whitespace 
     1191 
    11891192    # support tag<tagname> 
    11901193    if ($field =~ /^(.*?)<(.*?)>$/) { 
  • gsdl/branches/gsdl-2.74/perllib/plugins/MediaWikiPlug.pm

    r14108 r14270  
    2424# 
    2525########################################################################### 
    26 # This plugin is to process an HTML file where sections are divided by  
    27 # user-defined headings tags. As it is difficult to predict what user's definition 
    28 # this plugin allows to detect the user-defined titles up to three levels (level1, level2, level3...) 
    29 # as well as allows to get rid of user-defined Table of Content (TOC)... 
    30 # format:e.g. level1 (Abstract_title|ChapterTitle|Referencing Heading) level2(SectionHeading)... 
     26# This plugin is to process an HTML file from a MediaWiki website which downloaded by  
     27# the MediaWikiDownload plug. This plugin will trim MediaWiki functional sections like  
     28# login, discussion, history, etc. Only the navigation and search section could be preserved.  
     29# Searchbox will be modified to search the Greenstone collection instead of the website. 
     30# It also can automatically add the table of contents on the website's Main_Page to the  
     31# collection's Home page.  
    3132 
    3233package MediaWikiPlug; 
    3334 
    3435use HTMLPlug; 
    35 use ImagePlug; 
    36 use File::Copy; 
     36# use ImagePlug; 
     37# use File::Copy; 
     38use unicode; 
     39 
    3740 
    3841#use strict; # every perl program should have this! 
     
    4043 
    4144sub BEGIN { 
    42     @MediaWikiPlug::ISA = ('HTMLPlug'); 
     45    @MediaWikiPlug::ISA = ('HTMLPlug');         
    4346} 
    4447 
    4548my $arguments =  
    4649    [           
     50     # show the table of contents on collection's home page 
    4751     { 'name' => "show_toc", 
    4852       'desc' => "{MediaWikiPlug.show_toc}", 
    4953       'type' => "flag", 
    5054       'reqd' => "no"}, 
     55     # set to delete the table of contents section on each MediaWiki page 
     56     { 'name' => "delete_toc", 
     57       'desc' => "{MediaWikiPlug.delete_toc}", 
     58       'type' => "flag", 
     59       'reqd' => "no"}, 
     60     # regexp to match the table of contents 
    5161     { 'name' => "toc_exp", 
    5262       'desc' => "{MediaWikiPlug.toc_exp}", 
    5363       'type' => "regexp", 
    5464       'reqd' => "no", 
    55        'deft' => "" },    
    56      { 'name' => "delete_toc", 
    57        'desc' => "{MediaWikiPlug.delete_toc}", 
    58        'type' => "flag", 
    59        'reqd' => "no"}, 
     65       'deft' => "<table([^>]*)id=(\\\"|')toc(\\\"|')(.|\\n)*</table>\\n" },         
     66     # set to delete the navigation section 
    6067     { 'name' => "delete_nav", 
    6168       'desc' => "{MediaWikiPlug.delete_nav}", 
    6269       'type' => "flag", 
    6370       'reqd' => "no", 
    64        'deft' => ""},      
    65      { 'name' => "nav_exp", 
    66        'desc' => "{MediaWikiPlug.nav_exp}", 
     71       'deft' => ""},  
     72     # regexp to match the navigation section     
     73     { 'name' => "nav_div_exp", 
     74       'desc' => "{MediaWikiPlug.nav_div_exp}", 
    6775       'type' => "regexp", 
    6876       'reqd' => "no", 
    69        'deft' => "" }, 
    70      { 'name' => "tag_sections", 
    71        'desc' => "{MediaWikiPlug.tag_sections}", 
     77       'deft' => "<div([^>]*)id=(\\\"|')p-navigation(\\\"|')(.|\\n)*?<\/div>" }, 
     78     # set to delete the searchbox section 
     79     { 'name' => "delete_searchbox", 
     80       'desc' => "{MediaWikiPlug.delete_searchbox}", 
    7281       'type' => "flag", 
    73        'reqd' => "no"}, 
    74      { 'name' => "description_tags", 
    75        'desc' => "{HTMLPlug.description_tags}", 
    76        'type' => "flag", 
    77        'reqd' => "no"}         
     82       'reqd' => "no", 
     83       'deft' => ""}, 
     84     # regexp to match the searchbox section 
     85     { 'name' => "searchbox_div_exp", 
     86       'desc' => "{MediaWikiPlug.searchbox_div_exp}", 
     87       'type' => "regexp", 
     88       'reqd' => "no", 
     89       'deft' => "<div([^>]*)id=(\\\"|')p-search(\\\"|')(.|\\n)*?<\/div>"},      
     90     # regexp to match title suffix 
     91     # can't use the title_sub option in HTMLPlug instead 
     92     # because title_sub always matches from the begining       
     93     { 'name' => "remove_title_suffix_exp", 
     94       'desc' => "{MediaWikiPlug.remove_title_suffix_exp}", 
     95       'type' => "regexp", 
     96       'reqd' => "no", 
     97       'deft' => ""} 
    7898     ]; 
    79  
    8099 
    81100my $options = { 'name'     => "MediaWikiPlug", 
     
    85104        'args'     => $arguments }; 
    86105 
    87  
    88106sub new { 
    89107    my ($class) = shift (@_); 
     
    112130     
    113131    $head =~ m/<title>(.+)<\/title>/i; 
    114     my $doctitle = $1 if defined $1; 
     132    my $doctitle = $1 if defined $1;     
    115133     
    116134    if (defined $self->{'metadata_fields'} && $self->{'metadata_fields'}=~ /\S/) { 
     
    126144    # set the title here if we haven't found it yet 
    127145    if (!defined $doc_obj->get_metadata_element ($doc_obj->get_top_section(), "Title")) {     
    128     if (defined $doctitle && $doctitle =~ /\S/) { 
    129         $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Title", $doctitle); 
     146    if (defined $doctitle && $doctitle =~ /\S/) {                
     147            # remove suffix in title if required 
     148            my $remove_suffix_exp = $self->{'remove_title_suffix_exp'}; 
     149        if (defined $remove_suffix_exp && $remove_suffix_exp =~ /\S/){ 
     150           $doctitle =~ s/$remove_suffix_exp//i; 
     151        }        
     152        $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Title", $doctitle); 
    130153    } else { 
    131         $self->title_fallback($doc_obj,$doc_obj->get_top_section(),$file); 
     154        $self->title_fallback($doc_obj,$doc_obj->get_top_section(),$file); 
    132155    } 
    133     }  
    134      
    135     if(defined $base_dir && $base_dir ne ""){  
    136     # find and download stylesheet 
     156    } 
     157 
     158    # we are only interested in the column-contents div <div id="column-content"> 
     159    # remove header section, it may contain header images or additional search boxes 
     160    my $header_exp = "<div([^>]*)id=(\"|')container(\"|')([^>]*)>(.|\\n)*<div([^>]*)id=(\"|')column-content"; 
     161    $body_text =~ s/$header_exp/<div$1id='container'$4><div$6id='column-content/isg; 
     162     
     163    # remove timeline 
     164    $body_text =~ s/<div([^>]*)class=("|')smwtimeline("|')[\s\S]*?<\/div>//mg; 
     165     
     166    # remove extra bits 
     167    my $extra_bits = "Retrieved from(.+)</a>\""; 
     168    $body_text =~ s/$extra_bits//isg; 
     169     
     170    $body_text =~ s/(<p[^>]*><span[^>]*><o:p>&nbsp;<\/o:p><\/span><\/p>)//isg; 
     171    $body_text =~ s/(<p[^>]*><o:p>&nbsp;<\/o:p><\/p>)//isg; 
     172    $body_text =~ s/<!\[if !vml\]>/<![if vml]>/g;  
     173    $body_text =~ s/(&nbsp;)+/&nbsp;/sg; 
     174     
     175    # get rid of the [edit] buttons 
     176    $body_text =~ s/\[<a([^>]*)>edit<\/a>]//g; 
     177    # get rid of the last time edit information at the bottom 
     178    $body_text =~ s/<a href="([^>]*)edit([^>]*)"([^>]*?)>(\w+)<\/a> \d\d:\d\d,([\s|\w]*?)\(PST\)//g;     
     179    # get rid of the (Redirected from ...) 
     180    $body_text =~ s/\(Redirected from <a ([^>]*)>(\w|\s)*?<\/a>\)//isg;   
     181     
     182    # escape texts macros 
     183    $body_text =~ s/_([^\s]*)_/_<span>$1<\/span>_/isg; 
     184    # may change the links, like Greenstone_Documentation_All.html, then change back 
     185    $body_text =~ s/<a([^>]*)_<span>([^>]*)<\/span>_/<a$1_$2_/isg; 
     186     
     187    # define file delimiter for different platforms 
     188    my $file_delimiter; 
     189    if ($ENV{'GSDLOS'} =~ /^windows$/i) { 
     190       $file_delimiter = "\\"; 
     191    } else { 
     192       $file_delimiter = "/";            
     193    }     
     194     
     195    # IMPORTANT: different delimiter for $base_dir and $file 
     196    # $base_dir use forward slash for both windows and linux 
     197    # print "\nbase_dir : $base_dir\n\n"; # windows: C:/Program Files/Greenstone2.73/collect/wiki/import     
     198                                        # linux: /research/lh92/greenstone/greenstone2.73/collect/wiki/import 
     199    # $file use different delimiters : forward slash for linux; backward slash for windows 
     200    # print "\nfile : $file\n\n";         # windows: greenstone.sourceforge.net\wiki\index.php\Access_Processing_using_DBPlug.html     
     201                                        # linux: greenstone.sourceforge.net/wiki/index.php/Using_GreenstoneWiki.html 
     202     
     203    # get the base url for the MediaWiki website 
     204    my $safe_delimiter = &safe_escape_regexp($file_delimiter); 
     205    my @url_dirs=split($safe_delimiter, $file); 
     206    my $url_base = $url_dirs[0];     
     207         
     208    # Re-check css files associated with MediaWiki pages 
     209    if(defined $base_dir && $base_dir ne ""){    
    137210    my @css_files; 
    138211    my $css_file_count = 0; 
    139     # find all the style sheets imported with import statement 
     212     
     213    # find all the stylesheets imported with @import statement   
    140214    while($head =~ m"<style type=\"text/css\"(.+)import \"(.+)\""ig){ 
    141         $css_files[$css_file_count++] = $2 if defined $2; 
    142     }     
     215        $css_files[$css_file_count++] = $2 if defined $2; 
     216    } 
     217         
     218    # download the stylesheets if we haven't downloaded them yet 
     219        # add prefix to each style elmement, comment out the body element 
     220        # and copy the files to collection's images folder  
     221    for ($css_file_count = 0; $css_file_count < scalar(@css_files); $css_file_count++) {         
     222         
     223        my $css_file = $css_files[$css_file_count];        
     224         
     225        # remove prefix gli/cache directory                  
     226            $css_file =~ s/^(.+)gli(\\|\/)cache(\\|\/)//i; 
     227                         
     228            # change the \ delimiter in $css_file to / for consistency 
     229            $css_file =~ s/\\/\//isg; 
     230            if($css_file !~ /$url_base/) { 
     231              $css_file = $url_base . $css_file;   
     232            } 
     233             
     234            # trim the ? mark append to the end of a stylesheet 
     235        $css_file =~ s/\?(.+)$//isg;   
     236        
     237            my $css_file_path = &util::filename_cat($base_dir, $css_file);       
     238         
     239        # do nothing if we have already downloaded the css files 
     240        if (! -e $css_file_path) {       
     241          
     242             # check the stylesheet's directory in the import folder 
     243             # if the directory doesn't exist, create one             
     244         my @dirs = split(/\//i,$css_file);      
     245         my $path_check = "$base_dir/";             
     246         for (my $i = 0; $i < (scalar(@dirs)-1); $i++) { 
     247        $path_check .= $dirs[$i] . "/";  
     248        mkdir($path_check) if (! -d $path_check ); 
     249         } 
     250          
     251             # NOTE: wget needs configuration to directly access Internet 
     252             # These files should already downloaded if we used the MediaWikiDownload              
     253         # downloading             
     254         $css_file = "http://$css_file";         
     255             print "\ndownloading : " . $css_file . "\n\n"; 
     256         system("wget", "--non-verbose", "$css_file", "--output-document=$css_file_path"); 
     257         if ($? != 0) { 
     258              print "[ERROR] Download Failed! Make sure WGet connects to Internet directly \n"; 
     259              print "[ERROR] OR ues the MediaWikiDownload in the GLI DownloadPanel to download from a MediaWiki website\n"; 
     260              unlink("$css_file_path"); 
     261             } 
     262            } # done with download 
     263         
     264        # add a prefix "#wikispecificstyle" to each element 
     265        # because we want to preserve this website's formats and don't want to mess up with Greenstone formats 
     266            # so we will wrap the web page with a div with id = wikispecificstyle 
     267            my $css_content; 
     268        if(open(INPUT, "<$css_file_path")){      
     269        while(my $line = <INPUT>){ 
     270                    # comment out the body element because we change the body to div 
     271                    $line =~ s/^(\s*)body(\s*){(\s*)$/$1\/*body$2*\/{$3/isg; 
     272                                         
     273            if($line =~ m/^(.+)\{/i || $line =~ m/^(\s)*#/i){                     
     274            $line = "#wikispecificstyle " . $line; 
     275            } 
     276            $css_content .= $line; 
     277        } 
     278        close(INPUT);            
     279        open(OUTPUT, ">$css_file_path"); 
     280        print OUTPUT $css_content; 
     281        close(OUTPUT); 
     282        } 
     283             
     284            # Copy the modified stylesheets to collection's images folder 
     285            # for future customization 
     286            my $images_dir = $base_dir; 
     287            $images_dir =~ s/import$/images/; 
     288            $css_file =~ m/(.*)\/(.*)$/; 
     289            $images_dir = &util::filename_cat($images_dir, $2);             
     290             
     291            if(open(OUTPUT, ">$images_dir")){    
     292              print OUTPUT $css_content; 
     293              close(OUTPUT); 
     294            } 
     295    } 
     296    }     
     297     
     298     
     299    # by default, only preserve navigation box and search box 
     300    # others like toolbox, interaction, languages box, will be removed   
     301     
     302    # extract the larger part -- footer section 
     303    my $print_footer = "<div class=\"printfooter\">(.|\n)+</body>"; 
     304    $body_text =~ /$print_footer/; 
     305    my $footer = ""; 
     306    $footer = $& if defined $&; 
     307    $footer =~ s/<\/body>//isg; 
     308     
     309    # trim the comments first     
     310    $footer =~ s/<!--[\s\S]*?--[ \t\n\r]*>//isg; 
     311     
     312    # contain sections that are to be preserved 
     313    my $preserve_sections = "";    
     314     
     315    # process the navigation section     
     316    my $nav_match_exp = "<div([^>]*)id=(\"|')p-navigation(\"|')(.|\n)*?<\/div>"; 
     317    if (defined $self->{'nav_div_exp'}) { 
     318      $nav_match_exp = $self->{'nav_div_exp'} if ($self->{'nav_div_exp'} =~ /\S/) ; 
     319    } 
     320         
     321    if (defined $self->{'delete_nav'} && ($self->{'delete_nav'} eq "1")) {   
     322        # do nothing     
     323    } else {       
     324      if ($footer =~ m/$nav_match_exp/ig) { 
     325        $preserve_sections = $& ; 
     326      } else { 
     327        print $outhandle "Can't find the navigation section with : $nav_match_exp\n"; 
     328      } 
     329      # if($preserve_sections =~/\S/){ 
     330      #  $preserve_sections .= "</div>"; 
     331      # }             
     332    }           
     333             
     334    # process the searchbox section         
     335    my $searchbox_exp = "<div([^>]*)id=(\"|')p-search(\"|')(.|\\n)*?<\/div>"; 
     336    if(defined $self->{'searchbox_div_exp'}) {                 
     337        $searchbox_exp = $self->{'searchbox_div_exp'} if ($self->{'searchbox_div_exp'} =~ /\S/); 
     338    }     
     339                         
     340    my $searchbox_section = "";     
     341    $footer =~ m/$searchbox_exp/ig; 
     342    $searchbox_section = $& if defined $&;     
     343     
     344    # make the searchbox form work in Greenstone 
     345    if($searchbox_section =~ /\S/){         
     346        # replace action 
     347        $searchbox_section =~ s/action="([^>]*)"/action="_gwcgi_"/isg; 
     348                 
     349        # remove buttons 
     350        $searchbox_section =~ s/name="search"/name="q"/isg; 
     351        $searchbox_section =~ s/name="go"//isg; 
     352        $searchbox_section =~ s/name="fulltext"//isg; 
     353                 
     354        # get collection name from $base_dir for c param         
     355        $base_dir =~ m/\/collect\/(.+)\//i; 
     356        my $collection_name = ""; 
     357        $collection_name = $1 if defined $1; 
     358         
     359        # add Greenstone search params 
     360        my $hidden_params = "<input type=\"hidden\" name=\"a\" value=\"q\"/>\n"  
     361            ."<input type=\"hidden\" name=\"c\" value=\"$collection_name\"/>\n"; 
     362            # ."<input type=\"hidden\" name=\"fqf\" value=\"TX\"/>\n" 
     363            # ."<input type=\"hidden\" name=\"r\" value=\"1\">\n"; 
     364         
     365        $searchbox_section =~ s/<form([^>]*)>/<form$1>\n$hidden_params/isg;          
     366         
     367        # $searchbox_section .= "</div>"; 
     368    } else { 
     369      print $outhandle "Can't find the searchbox section with : $searchbox_section\n"; 
     370    }         
     371     
     372    # either delete or replace the searchbox  
     373    if(defined $self->{'delete_searchbox'} && $self->{'delete_searchbox'} eq "1") { 
     374        # do nothing         
     375    } else { 
     376        $preserve_sections .= "\n$searchbox_section\n"; 
     377    }     
     378     
     379     
     380    if($preserve_sections ne ""){ 
     381      $preserve_sections = "<div id=\"column-one\">\n" . $preserve_sections . "\n</div>\n"; 
     382    } 
     383    $preserve_sections = "</div></div></div>\n" . $preserve_sections . "\n</body>";     
     384     
     385    $body_text =~ s/$print_footer/$preserve_sections/isg; 
     386     
     387     
     388    # delete other forms in the page 
     389    my @forms; 
     390    my $form_count = 0; 
     391    while($body_text =~ m/<form([^>]*)name=("|')([^>]*)("|')/isg){ 
     392        next if($3 eq "q"); 
     393        $forms[$form_count++] = $&; 
     394    } 
     395    foreach my $form (@forms) {       
     396      $body_text =~ s/$form[\s\S]*?<\/form>//m; 
     397    }     
     398     
     399     
     400    # process links.  
     401    # because current WGET 1.10 the -k and -E option doesn't work together 
     402    # need to 'manually' convert the links to relative links 
     403    # Dealing with 3 types of links: 
     404    # -- outgoing links 
     405    #   -- if we have downloaded the target files, link to the internal version (relative link) 
     406    #   -- otherwise, link to the external version (absolute links) 
     407    # -- in-page links (relative link) 
     408     
     409    # NOTE: (important) 
     410    #   must use the MediaWikiDownload in GLI Download Panel to download files from a MediaWiki website 
     411    #   otherwise, the internal links may have problems 
     412     
     413    # remove the title attribute of <a> tag 
     414    $body_text =~ s/<a([^>]*)title="(.*?)"/<a$1/isg; 
     415     
     416    # extract all the links 
     417    my @links; 
     418    my $link_count = 0;     
     419    while($body_text =~ m/(href|src)="([^>\s]*)$url_base\/([^>\s]*)"/ig){         
     420        $links[$link_count++] = "$1=\"$2$url_base/$3\"";         
     421    } 
     422     
     423    foreach my $cur_link (@links) {      
     424        # escape greedy match + character 
     425        $cur_link =~ s/\+/\\+/isg; 
     426         
     427        $cur_link =~ m/(.+)"([^>]*)$url_base\/([^>\s]*)"/;           
     428        my $external_file_path = "$1\"http://$url_base/$3\""; 
     429            
     430        $body_text =~ s/$cur_link/$external_file_path/i;  
     431    } 
     432              
     433    # tag links to new wiki pages as red     
     434    $body_text =~ s/<a([^>]*)class="new"([^>]*)>/<a$1style="color:red"$2)>/gi; 
     435     
     436    # tag links to pages external of the MediaWiki website as blue 
     437    $body_text =~ s/<a([^>]*)class='external text'([^>]*)>/<a$1style="color:blue"$2)>/gi; 
     438         
     439     
     440    # process the table-of-contents section 
     441    # if 'show_toc' is set, add Main_Page's toc to the collection's About page, change extra.dm file      
     442    # 1. read _content_ macro from about.dm 
     443    # 2. append the toc, change all links to the Greenstone internal format for relative links  
     444    # 3. write to the extra.dm 
     445    # TODO: we assume the _about:content_ hasn't been specified before 
     446    #       so needs to add function to handle when the macro is already in the extra.dm        
     447    if($self->{'show_toc'}==1 && $file =~ m/Main_Page.(html|htm)$/){ 
     448     
     449      # extract toc of the Main_Page              
     450      my $mainpage_toc = "";   
     451      my $toc_exp = "<table([^>]*)id=(\"|')toc(\"|')(.|\\n)*</table>\\n"; 
     452      if($self->{'toc_exp'} =~ /\S/){ 
     453         $toc_exp = $self->{'toc_exp'};       
     454      } 
     455      if($body_text =~ /$toc_exp/){                          
     456        $mainpage_toc = $&; 
     457      } 
     458         
     459      if($mainpage_toc =~ /\S/) { 
     460         
     461        # change the in-page links to relative links, for example, change <a href="#section1"> to  
     462        # <a href="_httpquery_&a=extlink&rl=1&href=http://www.mediawikisite.com/Main_Page.html#section1">            
     463        my $file_url_format = $file; 
     464        $file_url_format =~ s/\\/\//isg;  
     465    $file_url_format = "http://" . $file_url_format; 
     466        
     467        # encode as URL, otherwise doesn't work on Windows 
     468        $file_url_format =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg; 
     469    $mainpage_toc =~ s/<a href="([^>"#]*)#([^>"]*)"/<a href="_httpquery_&a=extlink&rl=1&href=$file_url_format#$2"/isg; 
     470         
     471         
     472        # read the collection's extra.dm     
     473        my $macro_path = $base_dir; 
     474        $macro_path =~ s/import$/macros/;        
     475        my $extradm_file = &util::filename_cat($macro_path, "extra.dm");         
     476         
     477        my $extra_dm = ""; 
     478        if(open(INPUT, "<$extradm_file")){                   
     479        while(my $line = <INPUT>){ 
     480        $extra_dm .= $line; 
     481        }            
     482        } else { 
     483            print $outhandle "can't open file $extradm_file\n"; 
     484        } 
     485        close(INPUT); 
     486         
     487        # check whether we have changed the macros 
     488        my @packages = split("package ", $extra_dm); 
     489        my $about_package = ""; 
     490        foreach my $package (@packages) { 
     491          $about_package = "package " . $package if($package =~ /^about/); 
     492        }       
     493                 
     494        my $update_extra_dm = 0;         
     495         
     496        if( $about_package =~ /\S/ && $about_package =~ m/_content_(\s*){/ && $about_package =~ m/$mainpage_toc/){   
     497       print $outhandle "_content_ macro already changed!!!!\n"; 
     498    } 
     499        # if extra.dm doesn't have an "about package" 
     500        elsif ($about_package !~ /\S/) {           
     501          # read _content_ macro from $GSDLHOME/macros/about.dm file           
     502      my $global_about_package = &read_content_from_about_dm();      
     503             
     504          # create the extra _content_ macro for this collection            
     505          # add the original content of the _content_ macro 
     506          $global_about_package =~ m/{(.|\n)*<\/div>\n\n/; 
     507           
     508          # append the new about package to extra.dm 
     509          $extra_dm .= "\n\npackage about\n_content_$&\n\n"; 
     510          $extra_dm .= "<div class=\"section\">\n$mainpage_toc\n</div>\n</div>\n}"; 
     511           
     512          $update_extra_dm = 1; 
     513        }  
     514        # the about package exists, but either doesn't have the _content_ macro or  
     515        # the _content_ macro doesn't contain the toc 
     516        else {         
     517          # check if there is a content macro    
     518          my $content_macro_existed = 0; 
     519          $content_macro_existed = ($about_package =~ /(\s*|\n)_content_(\s*){/); 
     520             
     521          # if there is one 
     522          # append a new section div for toc to the end of the document section                     
     523          if($content_macro_existed ==1) { 
     524            $about_package =~ /(\s*|\n)_content_(\s*){(.|\n)*?}/; 
     525            my $content_macro = $&;                           
     526            my $new_content_macro = $content_macro; 
     527            $new_content_macro =~ s/<div[^>]*class="document">(.|\n)*<\/div>/<div$1class="document">$2\n\n<div class="section">\n$mainpage_toc\n<\/div>\n<\/div>/;               
     528            $extra_dm =~ s/$content_macro/$new_content_macro/mg;                                     
     529          } 
     530          # otherwise, append _content_ macro to the about package 
     531          else { 
     532            my $new_about_package = $about_package;             
     533            $content_macro = &read_content_from_about_dm(); 
     534            $content_macro =~ m/{(.|\n)*<\/div>\n\n/;             
     535             
     536            $new_about_package .= "\n\n_content_$&\n\n"; 
     537            $new_about_package .= "<div class=\"section\">\n$mainpage_toc\n</div>\n</div>\n}";               
     538            $extra_dm =~ s/$about_package/$new_about_package/mg;    
     539          }  
     540           
     541          # either the case, we need to update the extra.dm          
     542          $update_extra_dm = 1; 
     543         }           
     544                   
     545         if($update_extra_dm==1){ 
     546            # write to the extra.dm file of the collection 
     547            if (open(OUTPUT, ">$extradm_file")) { 
     548                print OUTPUT $extra_dm; 
     549            } else { 
     550                print "can't open $extradm_file\n"; 
     551            } 
     552            close(OUTPUT); 
     553         } 
     554      } else { 
     555        print $outhandle "Main_Page doesn't have a table-of-contents section\n"; 
     556      } 
     557    } 
    143558     
    144     # check whether the stylesheet exists 
    145     # if not, download it and copy to the collection's images folder 
    146     for($css_file_count = 0; $css_file_count < scalar(@css_files); $css_file_count++){ 
    147         my $css_file = $css_files[$css_file_count];              
    148         $css_file =~ s/^(.+)gli\/cache\///i; 
    149              
    150         my $css_file_path = "$base_dir/$css_file";       
    151          
    152         if (-e $css_file_path){ # the file already exists 
    153             next; 
    154         }   
    155          
    156         # check the css directory and create one if it's not there 
    157         my @dirs = split(/\//i,$css_file); 
    158         my $path_check = "$base_dir/"; 
    159         for(my $i = 0; $i < (scalar(@dirs)-1); $i++){                            
    160             $path_check .= $dirs[$i] . "/";          
    161             if(! -d $path_check ){ 
    162                 mkdir($path_check);  
    163             } 
    164         }        
    165          
    166         # download 
    167         $css_file = "http://$css_file";      
    168         system("wget", "--non-verbose", "$css_file", "--output-document=$css_file_path"); 
    169         if ($? != 0) {unlink("$css_file_path");} 
    170          
    171         # change every style element to #wikispecificstyle ... 
    172         if(open(INPUT, "<$css_file_path")){ 
    173             my $css_content; 
    174             while(my $line = <INPUT>){               
    175                 if($line =~ m/^(.+)\{/i){ 
    176                     $line = "#wikispecificstyle " . $line;               
    177                 } 
    178                 $css_content .= $line; 
    179             } 
    180             close(INPUT);            
    181             open(OUTPUT, ">$css_file_path"); 
    182             print OUTPUT $css_content; 
    183             close(OUTPUT); 
    184         } 
    185          
    186         # copy to images folder 
    187         # do not copy, because collection can only have one specific stylesheet 
    188         # better to add and modify the style sheets manually 
    189         # @dirs = split(/\//i,$base_dir);                        
    190         # my $collection_base_dir;       
    191         # for(my $i = 0; $i < (scalar(@dirs)-1); $i++){ 
    192         #   $collection_base_dir .= $dirs[$i] . "/";             
    193         # } 
    194         # my $images_folder = $collection_base_dir . "images/"; 
    195         # copy($css_file_path, $images_folder) || die "File cannot be copied."; 
     559    # If delete_toc is set, remove toc and tof contents.     
     560    if (defined $self->{'delete_toc'} && ($self->{'delete_toc'} == 1)){ 
     561    if (defined $self->{'toc_exp'} && $self->{'toc_exp'} =~ /\S/){ 
     562          # print "\nit matches toc_exp!!\n" if $body_text =~ /$self->{'toc_exp'}/; 
     563          if ($body_text =~ /$self->{'toc_exp'}/) { 
     564        $body_text =~ s/$self->{'toc_exp'}//i; 
     565          } 
    196566    } 
    197     } 
    198      
    199     # add sections around h2 tag 
    200     # wrap each section with <div id=\"wikispecificstyle\"></div> to get the wiki styles 
    201     # add search box with each section       
    202     if ($self->{'tag_sections'}) { 
    203     my @sections = ($body_text =~ /<h2>(.+)<\/h2>/gi); 
    204     for(my $i=1; $i < scalar(@sections); $i++){ 
    205         my $section_title = $sections[$i];       
    206         $section_title =~ s/<([^>]*)>//g; 
    207         $section_title =~ s/(^\s|\s$)//g; 
    208         my $section_metadata = "<Section>\n<Description>\n<Metadata name=\"Title\">$section_title</Metadata>\n</Description>\n"; 
    209         if($i !=1){ 
    210             $section_metadata = "</Section>\n" . $section_metadata; 
    211         } 
    212         $section_metadata = "\n<!--\n" . $section_metadata . "-->\n"; 
    213          
    214         $section_metadata .= "<div id=\"wikispecificstyle\">\n<div id=\"content\">\n"; 
    215         $section_metadata = "</div></div>\n" . $section_metadata if $i !=1; 
    216          
    217         $body_text =~ s/<h2>$sections[$i]<\/h2>/$section_metadata<h2>$sections[$i]<\/h2>/i; 
    218          
    219         if($i==scalar(@sections)-1) { 
    220             # $body_text =~ s/<div class=\"printfooter\">/<!--\n<\/Section>\n-->\n<div class=\"printfooter\">/i; 
    221             $body_text =~ s/<div class=\"printfooter\">/<\/div>\n<\/div>\n<!--\n<\/Section>\n-->\n<div class=\"printfooter\">/i; 
    222         } 
    223     } 
    224      }     
    225      
    226     # If delete_nav is enabled, it means to get rid of navigation contents. 
    227     # if (defined $self->{'delete_nav'} && ($self->{'delete_nav'} == 1)){ 
    228     #   if (defined $self->{'nav_exp'}&& $self->{'nav_exp'} =~ /\S/){ 
    229     #       print "it matches nav_exp!!\n" if $body_text =~ /$self->{'nav_exp'}/; 
    230     #       $body_text =~ s/$self->{'nav_exp'}//isg;         
    231     #   } 
    232     #} 
    233     my $searchbox = ""; 
    234     if (defined $self->{'delete_nav'} && ($self->{'delete_nav'} == 1)){ 
    235     my $nav_match_express; 
    236     if (defined $self->{'nav_exp'}&& $self->{'nav_exp'} =~ /\S/) { 
    237         $nav_match_express = $self->{'nav_exp'} ; 
    238     } else { # default setting for mediawiki 
    239         $nav_match_express = "<div class=\"printfooter\">(.|\n)*secs. -->"; 
    240     } 
    241      
    242     print "it matches nav_exp!!\n" if $body_text =~ /$self->{'nav_exp'}/; 
    243      
    244     # $body_text =~ m/<div class=\"printfooter\">(.|\n)*secs. -->/isg;     
    245     $body_text =~ m/$nav_match_express/isg; 
    246     my $navigate = $& if defined $&;       
    247      
    248     # find the search box and add it to the document page 
    249     if(defined $navigate && $navigate =~ /\S/){  
    250         $navigate =~ m/<div id="p-search" class="portlet">(.|\n)*<\/form>/; 
    251         $searchbox = $& . "\n<\/div>\n<\/div>"; 
    252         $searchbox =~ s/action="([^>]*)"/action="\/gsdl\/cgi-bin\/library"/isg; 
    253         $searchbox =~ s/name="search"/name="q"/isg; 
    254         $searchbox =~ s/name="go"//isg; 
    255         $searchbox =~ s/name="fulltext"//isg; 
    256         my $hidden_params = "<input type=\"hidden\" name=\"a\" value=\"q\"/>\n"  
    257                     ."<input type=\"hidden\" name=\"c\" value=\"wikitest\"/>\n" 
    258                     ."<input type=\"hidden\" name=\"fqf\" value=\"TX\"/>" 
    259                     ."<input type=\"hidden\" name=\"t\" value=\"1\">"; 
    260         $searchbox =~ s/<\/form>/$hidden_params<\/form>/isg;     
    261         $searchbox = "\n</div>\n</div><div id=\"wikispecificstyle\"><div id=\"column-one\">$searchbox</div></div>"; 
    262     } 
    263      
    264     # $body_text =~ s/<div class=\"printfooter\">(.|\n)*secs. -->/$searchbox/isg; 
    265     $body_text =~ s/$nav_match_express/$searchbox/isg; 
    266     } 
    267      
    268     if ($self->{'tag_sections'}) { 
    269         $body_text =~ s/<!--\n<\/Section>/$searchbox\n<!--\n<\/Section>/ig; 
    270     } 
    271      
    272     # Tidy up extra new lines 
    273     $body_text =~ s/(<p[^>]*><span[^>]*><o:p>&nbsp;<\/o:p><\/span><\/p>)//isg; 
    274     $body_text =~ s/(<p[^>]*><o:p>&nbsp;<\/o:p><\/p>)//isg; 
    275      
    276     $section_text .= "<!--\n<Section>\n-->\n"; 
    277     my $body = "<body".$body_text; 
    278      
    279     $$textref = $body; 
    280      
    281     # get the base dir for convert absolute links to relative links 
    282     $$textref =~ m"href=\"(.*?)/cache/(.*?)/"i; 
    283     my $basedir = $2;    
    284          
    285     $$textref =~ s/<!\[if !vml\]>/<![if vml]>/g;     
    286     $$textref =~ s/(&nbsp;)+/&nbsp;/sg;       
    287      
    288     # get rid of the [edit] button 
    289     $$textref =~ s/\[<a([^>]*)>edit<\/a>]//g; 
    290  
    291     # get rid of the last time edit information at the bottom 
    292     $$textref =~ s/<a href="(.+)edit(.*?)"(.*?)>(\w+)<\/a> \d\d:\d\d,(.*?)(PST)//g; 
    293      
    294     # get rid of the (Redirected from ...) 
    295     $$textref =~ s/(Redirected from <a ([^>]*)>(\w|\s)*<\/a>)//isg; 
    296      
    297     # escape macros 
    298     $$textref =~ s/_([^\s]*)_/_<span>$1<\/span>_/isg; 
    299     # may change the links, like Greenstone_Documentation_All.html, then change back 
    300     $$textref =~ s/<a([^>]*)_<span>([^>]*)<\/span>_/<a$1_$2_/isg;    
    301      
    302     # convert all the urls to relative url, because current wget 1.10 -k and -E option doesn't work together 
    303     # get rid of the title attribute of a tag 
    304     $$textref =~ s/<a([^>]*)title="(.*?)"/<a$1/isg; 
    305     # find the relative path of current directory          
    306     if($basedir ne ""){ 
    307         my @dirs=split("\/", $file); 
    308         my $dirnum = scalar(@dirs); 
    309         my $replace = ""; 
    310         for(my $i=0; $i<$dirnum-2; $i++){ 
    311             $replace .= "../"; 
    312         } 
    313         # test if the linked relative file exists, if not, link to the internet version 
    314         $$textref =~ s/(href|src)="([^>]*)$basedir\/([^>]*)"/$1="$replace$3"/gi;                 
    315         # my @total_links = ($$textref =~ m/(href|src)="([^>]*)$basedir\/([^>]*)"/gi); 
    316         # print $outhandle "\nnumber of total links: " . scalar(@total_links)."\n"; 
    317         # for(my $cur_link_no = 0; $cur_link_no < scalar(@total_links); $cur_link_no++){ 
    318          
    319         #while($$textref =~ m/(href|src)="([^>]*)$basedir\/([^>]*)"/gi){ 
    320             #$total_links[$cur_link_no] =~ m/(href|src)="([^>]*)$basedir\/([^>]*)"/i; 
    321         #   my $prefix = $1; 
    322         #   my $link = $&;  
    323         #   my $rel_file_name = $3;                      
    324         #   my $rel_link = "$replace$rel_file_name"; 
    325             # print $outhandle "catched link==> $link\nrelative link==> $rel_link\n"; 
    326         #   if(-e $rel_link){ 
    327         #       $rel_link = "$prefix=\"$rel_link\""; 
    328         #       $$textref =~ s/$link/$rel_link/i; 
    329         #   }else{ 
    330         #       my $ext_link = "$prefix=\"http:\/\/$basedir\/$rel_file_name\""; 
    331                 # print $outhandle "external link==> $ext_link\n"; 
    332         #       $$textref =~ s/$link/$ext_link/i; #s/$link/$prefix="http:\/\/$rel_file_name"/i; 
    333         #   } 
    334         #}           
    335              
    336          
    337         # tag the link to new wiki pages as red 
    338         $$textref =~ s/(href|src)="$replace([^>]*)&amp;action=edit([^>]*)"/$1="http:\/\/$basedir\/$2&amp;action=edit$3"/gi;      
    339         $$textref =~ s/<a([^>]*)class="new"([^>]*)>/<a$1style="color:red"$2)>/gi; 
    340          
    341         # tag the link to external pages as blue 
    342         $$textref =~ s/<a([^>]*)class='external text'([^>]*)>/<a$1style="color:blue"$2)>/gi;     
    343          
    344         #print $outhandle $$textref; 
    345     } 
    346      
    347     # if 'show_toc' is set, put the table of content on the Wiki Main_Page to the about page of the collection 
    348     # 1. read _content_ macro from about.dm 
    349     # 2. append the toc, change all links to the Greenstone internal format for relative links  
    350     # 3. write to the extra.dm 
    351     # TODO: currently we suppose the _about:content_ hasn't been specified before 
    352     #       so needs to add function to handle when the macro is already in the extra.dm     
    353     if($self->{'show_toc'}==1 && $file =~ m/Main_Page.(html|htm)$/){ 
    354         my $macro_path = $base_dir; 
    355         $macro_path =~ s/import$/macros/; 
    356         my $extra_dm; 
    357         my $extradm_file = "$macro_path/extra.dm"; 
    358         if(open(INPUT, "<$extradm_file")){       
    359             while(my $line = <INPUT>){ 
    360                 $extra_dm .= $line; 
    361         } 
    362             close(INPUT);                            
    363              
    364             if($extra_dm =~ m/package about/ && $extra_dm =~ m/_content_(\s)*{/){   
    365                 print $outhandle "already changed!!!!\n";                
    366             } else { 
    367                 # read _content_ macro from about.dm file 
    368                 my $about_macro = $ENV{'GSDLHOME'} . "/macros/about.dm";                 
    369                 my $about_page_content = ""; 
    370                 if(open(INPUT, "<$about_macro")){ 
    371                     while(my $line=<INPUT>){ 
    372                         $about_page_content .= $line; 
    373                     } 
    374                 }else{ 
    375                     print $outhandle "can't open file $about_macro\n"; 
    376                 }            
    377                 close(INPUT); 
    378                  
    379                 # extract the _content_ macro 
    380                 $about_page_content =~ m/_content_ {(.|\n)*<\/div>\n\n<\/div>\n}/i; 
    381                 $about_page_content = $&; 
    382                      
    383                 # extract toc of the Main_Page 
    384                 my $mainpage_content = ""; 
    385                 if($self->{'toc_exp'} =~ /\S/){ 
    386                     $$textref =~ /$self->{'toc_exp'}/; 
    387                     $mainpage_content = $&; 
    388                 } else { 
    389                     # $mainpage_content =~ s/<!-- start content -->(.|\n)*<!-- end content -->/$1/igs; 
    390                 } 
    391                 # print $outhandle "---------\n$$textref\n--------\n\n";                 
    392                 # print $outhandle "==========\n$mainpage_content\n==========\n\n"; 
    393                      
    394                 # add toc to the _content_ macro 
    395                 $about_page_content =~ m/{(.|\n)*<\/div>\n\n/; 
    396                 $extra_dm .= "package about\n_content_$&\n\n<div class=\"section\">\n$mainpage_content\n</div>\n</div>\n}"; 
    397                      
    398                 # change all links to the internal Greenstone relative link format 
    399                 $extra_dm =~ s/<a href="([^>]*)"/<a href="_httpquery_&a=extlink&rl=1&href=http:\/\/$basedir$1"/isg; 
    400                 $extra_dm =~ s/(\.\.\/)+/\//isg; 
    401                 # print $outhandle "to add---------\n$extra_dm\n--------\n"; 
    402                         
    403                 # write to the extra.dm file of the collection 
    404                 open(OUTPUT, ">$extradm_file"); 
    405                 print OUTPUT $extra_dm; 
    406                 close(OUTPUT);           
    407             }    
    408         } else {  
    409             print $outhandle "can't open file $extradm_file\n"; 
    410         } 
    411     } 
    412      
    413     # If delete_toc is enabled, it means to get rid of toc and tof contents. 
    414     # get rid of TOC and TOF sections and their title 
    415     if (defined $self->{'delete_toc'} && ($self->{'delete_toc'} == 1)){ 
    416         if (defined $self->{'toc_exp'} && $self->{'toc_exp'} =~ /\S/){ 
    417             # $body_text =~ s/<p class=(($self->{'toc_exp'})[^>]*)>(.+?)<\/p>//isg;      
    418             # print "it matches toc_exp!!\n" if $body_text =~ /$self->{'toc_exp'}/;      
    419             # $body_text =~ s/$self->{'toc_exp'}//i; 
    420             print "it matches toc_exp!!\n" if $$textref =~ /$self->{'toc_exp'}/;     
    421             $$textref =~ s/$self->{'toc_exp'}//i; 
    422         }    
    423     } 
    424      
    425     # To add a layer on top of the wiki page 
    426     # so as to keep the wiki style inside the wiki page 
    427     # and keep the Greenstone style at the same time     
    428     $$textref =~ s/<body([^>]*)>/$&\n<div id="wikispecificstyle">\n/is;     
    429     $$textref =~ s/<\/body>/<\/div><\/body>/is;  
    430      
    431     # tag with sections     
    432     $$textref =~ s/<body([^>]*)>/$&\n<!--\n<Section>\n<Description>\n<Metadata name=\"Title\">$doctitle<\/Metadata>\n<\/Description>\n-->\n/is; 
    433     $$textref =~ s/<\/body>/\n<!--\n<\/Section>\n-->\n/is; 
    434      
    435     #print $outhandle "\n\n$$textref\n\n"; 
    436      
    437     # use description tags     
    438     if ($self->{'description_tags'}) { 
    439         my $cursection = $doc_obj->get_top_section(); 
    440         # remove the html header - note that doing this here means any 
    441         # sections defined within the header will be lost (so all <Section> 
    442         # tags must appear within the body of the HTML) 
    443         my ($head_keep) = ($$textref =~ m/^(.*?)<body[^>]*>/is); 
    444  
    445         $$textref =~ s/^.*?<body[^>]*>//is; 
    446         $$textref =~ s/(<\/body[^>]*>|<\/html[^>]*>)//isg; 
    447  
    448         my $opencom = '(?:<!--|&lt;!(?:&mdash;|&#151;|--))'; 
    449         my $closecom = '(?:-->|(?:&mdash;|&#151;|--)&gt;)'; 
    450  
    451         my $lt = '(?:<|&lt;)'; 
    452         my $gt = '(?:>|&gt;)'; 
    453         my $quot = '(?:"|&quot;|&rdquo;|&ldquo;)'; 
    454  
    455         # my $dont_strip = ''; 
    456         # if ($self->{'no_strip_metadata_html'}) { 
    457         #    ($dont_strip = $self->{'no_strip_metadata_html'}) =~ s{,}{|}g; 
    458         # } 
    459  
    460         my $found_something = 0;  
    461         my $top = 1; 
    462         while ($$textref =~ s/^(.*?)$opencom(.*?)$closecom//s) { 
    463             my $text = $1; 
    464             my $comment = $2; 
    465             if (defined $text) { 
    466                 # text before a comment - note that getting to here 
    467                 # doesn't necessarily mean there are Section tags in 
    468                 # the document 
    469                 # print $outhandle "section text:\n$text\n"; 
    470                 $self->process_section(\$text, $base_dir, $file, $doc_obj, $cursection); 
    471             } 
    472             while ($comment =~ s/$lt(.*?)$gt//s) { 
    473                 my $tag = $1; 
    474                 if ($tag eq "Section") { 
    475                     $found_something = 1; 
    476                     $cursection = $doc_obj->insert_section($doc_obj->get_end_child($cursection)) unless $top; 
    477                     $top = 0; 
    478                 } elsif ($tag eq "/Section") { 
    479                     $found_something = 1; 
    480                     $cursection = $doc_obj->get_parent_section ($cursection); 
    481                 } elsif ($tag =~ /^Metadata name=$quot(.*?)$quot/s) { 
    482                     my $metaname = $1; 
    483                     my $accumulate = $tag =~ /mode=${quot}accumulate${quot}/ ? 1 : 0; 
    484                     $comment =~ s/^(.*?)$lt\/Metadata$gt//s; 
    485                     my $metavalue = $1; 
    486                     $metavalue =~ s/^\s+//; 
    487                     $metavalue =~ s/\s+$//; 
    488                     # assume that no metadata value intentionally includes 
    489                     # carriage returns or HTML tags (if they're there they 
    490                     # were probably introduced when converting to HTML from 
    491                     # some other format). 
    492                     # actually some people want to have html tags in their 
    493                     # metadata. 
    494                     $metavalue =~ s/[\cJ\cM]/ /sg; 
    495                     # $metavalue =~ s/<[^>]+>//sg unless $dont_strip && ($dont_strip eq 'all' || $metaname =~ /^($dont_strip)$/); 
    496                     $metavalue =~ s/\s+/ /sg; 
    497                     # print $outhandle "metaname = $metaname\nmetavalue = $metavalue\n"; 
    498                     if ($accumulate) { 
    499                         $doc_obj->add_utf8_metadata($cursection, $metaname, $metavalue); 
    500                     } else { 
    501                         $doc_obj->set_utf8_metadata_element($cursection, $metaname, $metavalue);     
    502                     } 
    503                 } elsif ($tag eq "Description" || $tag eq "/Description") { 
    504                     # do nothing with containing Description tags 
    505                 } else { 
    506                     # simple HTML tag (probably created by the conversion 
    507                     # to HTML from some other format) - we'll ignore it and 
    508                     # hope for the best ;-) 
    509                 } 
    510             } 
    511         }# end while 
    512  
    513         if ($cursection ne "") { 
    514             print $outhandle "HTMLPlug: WARNING: $file contains unmatched <Section></Section> tags\n"; 
    515         } 
    516  
    517         $$textref =~ s/^.*?<body[^>]*>//is; 
    518         $$textref =~ s/(<\/body[^>]*>|<\/html[^>]*>)//isg; 
    519         if ($$textref =~ /\S/) { 
    520             if (!$found_something) { 
    521                 if ($self->{'verbosity'} > 2) { 
    522                     print $outhandle "HTMLPlug: WARNING: $file appears to contain no Section tags so\n"; 
    523                     print $outhandle "          will be processed as a single section document\n"; 
    524                 } 
    525      
    526                 # go ahead and process single-section document 
    527                 $self->process_section($textref, $base_dir, $file, $doc_obj, $cursection); 
    528  
    529             } else { 
    530                 print $outhandle "HTMLPlug: WARNING: $file contains the following text outside\n"; 
    531                 print $outhandle "          of the final closing </Section> tag. This text will\n"; 
    532                 print $outhandle "          be ignored."; 
    533  
    534                 my ($text); 
    535                 if (length($$textref) > 30) { 
    536                     $text = substr($$textref, 0, 30) . "..."; 
    537                 } else { 
    538                     $text = $$textref; 
    539                 } 
    540                 $text =~ s/\n/ /isg; 
    541                 print $outhandle " ($text)\n"; 
    542             } 
    543         } elsif (!$found_something) { 
    544             if ($self->{'verbosity'} > 2) { 
    545             # may get to here if document contained no valid Section 
    546             # tags but did contain some comments. The text will have 
    547             # been processed already but we should print the warning 
    548             # as above and extract metadata 
    549             print $outhandle "HTMLPlug: WARNING: $file appears to contain no Section tags and\n"; 
    550             print $outhandle "          is blank or empty.  Metadata will be assigned if present.\n"; 
    551             } 
    552         } 
    553     } # if $self->{'description_tags'} 
    554     else { 
    555         # remove header and footer 
    556         # if (!$self->{'keep_head'}) { 
    557         #    $$textref =~ s/^.*?<body[^>]*>//is; 
    558         #    $$textref =~ s/(<\/body[^>]*>|<\/html[^>]*>)//isg; 
    559         # } 
    560  
    561         # single section document 
    562         # $self->process_section($textref, $base_dir, $file, $doc_obj, $cursection);         
    563          
    564         # Important: to get the relative links to work, 
    565         # 1: use the below statement instead of the above one 
    566         # 2. cannot have process_section method.  
    567         # why????? 
    568         $self->SUPER::process(@_); 
    569     } 
    570     return 1; 
     567    }         
     568     
     569    $$textref = "<body" . $body_text; 
     570     
     571    # Wrap the whole page with <div id="wikispecificstyle"></div> 
     572    # keep the style of this website and don't mess up with the Greenstone styles 
     573    $$textref =~ s/<body([^>]*)>/$&\n<div id="wikispecificstyle">\n/is; 
     574    $$textref =~ s/<\/body>/<\/div><\/body>/is;      
    571575            
    572     #$self->SUPER::process(@_); 
     576    $self->SUPER::process(@_); 
     577     
     578    return 1; 
    573579} 
    574  
    575  
    576  
    577 # note that process_section may be called multiple times for a single 
    578 # section (relying on the fact that add_utf8_text appends the text to any 
    579 # that may exist already). 
    580 # sub process_section { 
    581 #    my $self = shift (@_); 
    582 #    my ($textref, $base_dir, $file, $doc_obj, $cursection) = @_; 
    583  
    584     # trap links 
    585     # if (!$self->{'nolinks'}) { 
    586     # usemap="./#index" not handled correctly => change to "#index" 
    587     # $$textref =~ s/(<img[^>]*?usemap\s*=\s*[\"\']?)([^\"\'>\s]+)([\"\']?[^>]*>)/ 
    588         #$self->replace_usemap_links($1, $2, $3)/isge; 
    589  
    590     #$$textref =~ s/(<(?:a|area|frame|link|script)\s+[^>]*?\s*(?:href|src)\s*=\s*[\"\']?)([^\"\'>\s]+)([\"\']?[^>]*>)/ 
    591         #$self->replace_href_links ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 
    592     #} 
    593  
    594     # trap images 
    595  
    596     # allow spaces if inside quotes - jrm21 
    597     #$$textref =~ s/(<(?:img|embed|table|tr|td)[^>]*?(?:src|background)\s*=\s*)([\"\'][^\"\']+[\"\']|[^\s>]+)([^>]*>)/ 
    598     #$self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 
    599  
    600     # add text to document object 
    601     # turn \ into \\ so that the rest of greenstone doesn't think there 
    602     # is an escape code following. (Macro parsing loses them...) 
    603 #    $$textref =~ s/\\/\\\\/go; 
    604      
    605 #    $doc_obj->add_utf8_text($cursection, $$textref); 
    606 #} 
    607580 
    608581 
     
    651624} 
    652625 
     626sub safe_escape_regexp 
     627{ 
     628  my $regexp = shift (@_); 
     629   
     630  # if ($ENV{'GSDLOS'} =~ /^windows$/i) { 
     631    $regexp =~ s/\\/\\\\/isg;     
     632  #} else { 
     633    $regexp =~ s/\//\\\//isg;          
     634  #} 
     635  return $regexp; 
     636} 
     637 
     638sub read_content_from_about_dm 
     639{ 
     640  my $about_macro_file = &util::filename_cat($ENV{'GSDLHOME'}, "macros", "about.dm"); 
     641  my $about_page_content = ""; 
     642  if (open(INPUT, "<$about_macro_file")){ 
     643    while (my $line=<INPUT>){ 
     644      $about_page_content .= $line; 
     645    } 
     646  } else { 
     647    print $outhandle "can't open file $about_macro_file\n"; 
     648  }          
     649  close(INPUT); 
     650             
     651  # extract the _content_ macro 
     652  $about_page_content =~ m/_content_ {(.|\n)*<\/div>\n\n<\/div>\n}/i; 
     653  $about_page_content = $&; 
     654   
     655  return $about_page_content; 
     656} 
     657 
    6536581; 
  • gsdl/branches/gsdl-2.74/perllib/strings.properties

    r14198 r14270  
    6060 
    6161# -- buildcol.pl -- 
    62 buildcol.disable_OAI:tick to make it not providing the OAI service for this collection. 
    6362 
    6463buildcol.archivedir:Where the archives live. 
     
    153152downloadfrom.download_mode:The type of server to download from 
    154153downloadfrom.download_mode.Web:HTTP   
     154downloadfrom.download_mode.MediaWiki:MediaWiki website 
    155155downloadfrom.download_mode.OAI: Open Archives Initiative  
    156156downloadfrom.download_mode.z3950:z3950 server 
     
    547547GenericList.desc:A general and flexible list classifier with most of the abilities of AZCompactList, but with better Unicode, metadata and sorting capabilities. 
    548548GenericList.metadata:Metadata fields used for classification. Use '/' to separate the levels in the hierarchy and ';' to separate metadata fields within each level. 
     549GenericList.partition_name_length:The length of the partition name; defaults to a variable length from 1 up to 3 characters, depending on how many are required to distinguish the partition start from its end. This option only applies when partition_type_within_level is set to 'constant_size'. 
    549550GenericList.partition_size_within_level:The number of items in each partition (only applies when partition_type_within_level is set to 'constant_size'). 
    550551GenericList.partition_type_within_level:The type of partitioning done: either 'per_letter', 'constant_size', or 'none'. 
     
    861862 
    862863MARCXMLPlug.metadata_mapping_file:Name of file that includes mapping details from MARC values to Greenstone metadata names. Defaults to 'marctodc.txt' found in the site's etc directory. 
     864 
     865MediaWikiPlug.desc:Plugin for importing MediaWiki web pages 
     866 
     867MediaWikiPlug.show_toc: Add to the collection's About page the 'table of contents' on the MediaWiki website's main page. Needs to specify a Perl regular expression in toc_exp below to match the 'table of contents' section. 
     868 
     869MediaWikiPlug.delete_toc:Delete the 'table of contents' section on each HTML page. Needs to specify a Perl regular expression in toc_exp below to match the 'table of contents' section. 
     870 
     871MediaWikiPlug.toc_exp:A Perl regular expression to match the 'table of content'. The default value matches common MediaWiki web pages. 
     872 
     873MediaWikiPlug.delete_nav:Delete the navigation section. Needs to specify a Perl regular expression in nav_div_exp below. 
     874 
     875MediaWikiPlug.nav_div_exp:A Perl regular expression to match the navigation section. The default value matches common MediaWiki web pages.  
     876 
     877MediaWikiPlug.delete_searchbox:Delete the searchbox section. Needs to specify a Perl regular expression in searchbox_div_exp below. 
     878 
     879MediaWikiPlug.searchbox_div_id:A Perl regular expression to match the searchbox section. The default value matches common MediaWiki web pages. 
     880 
     881MediaWikiPlug.remove_title_suffix_exp:A Perl regular expression to trim the extracted title. For example, \\s-(.+) will trim title contents after "-". 
    863882 
    864883MetadataCSVPlug.desc:A plugin for metadata in comma-separated value format. The Filename field in the CSV file is used to determine which document the metadata belongs to. 
     
    10471066 
    10481067BasDownload.desc:Base class for Download modules 
     1068 
     1069MediaWikiDownload.desc:A module for downloading from MediaWiki websites 
     1070MediaWikiDownload.reject_filetype:Ignore url list, separate by comma, e.g.*cgi-bin*,*.ppt ignores hyperlinks that contain either 'cgi-bin' or '.ppt' 
     1071MediaWikiDownload.reject_filetype_disp:Ignore url list, separate by comma 
     1072MediaWikiDownload.exclude_directories:List of exclude directories (must be absolute path to the directory), e.g. /people,/documentation will exclude the 'people' and 'documentation' subdirectory under the currently crawling site. 
     1073MediaWikiDownload.exclude_directories_disp:List of exclude directories, separate by comma 
    10491074 
    10501075OAIDownload.desc:A module for downloading from OAI repositories 
  • gsdl/branches/gsdl-2.74/src/recpt/authenaction.cpp

    r14014 r14270  
    3333#include "infodbclass.h" 
    3434#include "gsdltimes.h" 
    35 #include "userdb.h" 
    3635 
    3736 
     
    129128 
    130129void authenaction::configure (const text_t &key, const text_tarray &cfgline) { 
    131   // get the password filename 
    132   if (cfgline.size() == 1) { 
    133     if (key == "usersfile") usersfile = cfgline[0]; 
    134     else if (key == "keyfile") keyfile = cfgline[0]; 
    135     else if (key == "keydecay") keydecay = cfgline[0].getint(); 
    136   } 
    137  
    138130  action::configure (key, cfgline); 
    139131} 
    140132 
    141133bool authenaction::init (ostream &logout) { 
    142  
    143134  if (gdbmhome.empty()) { 
    144135    logout << "ERROR (authenaction::init) gdbmhome is not set\n"; 
    145136    return false; 
    146137  } 
    147  
    148   if (usersfile.empty()) usersfile = filename_cat (gdbmhome, "etc", "users.db"); 
    149   if (keyfile.empty()) keyfile = filename_cat (gdbmhome, "etc", "key.db"); 
    150138 
    151139  return action::init (logout); 
     
    169157  if (args["uan"].empty()) return true; 
    170158 
    171   userdbclass *user_database = new userdbclass(usersfile); 
    172   keydbclass *key_database = new keydbclass(keyfile); 
    173  
    174159  // failure means we have to redirect to this action to get authentication 
    175160  // (if we are not already doing this) 
     
    188173  else args_us = "failed"; 
    189174 
    190   // make sure we have a username 
    191   if (!args_un.empty() && (user_database->get_user_info (args_un, thisuser) == ERRNO_SUCCEED)) { 
     175  // make sure we have a username                                                                                                  
     176  int status = user_database->get_user_info (args_un, thisuser); 
     177  if (!args_un.empty() && (status == ERRNO_SUCCEED)) { 
    192178    if (!args_pw.empty()) { 
    193179      // we are authenticating using a password 
     
    286272  } 
    287273 
    288   //close the database 
    289   user_database->closedatabase(); 
    290   key_database->closedatabase(); 
    291274  return true; 
    292275} 
  • gsdl/branches/gsdl-2.74/src/recpt/authenaction.h

    r7432 r14270  
    3333#include "action.h" 
    3434#include "text_t.h" 
     35#include "userdb.h" 
    3536#include "receptionist.h" 
    3637 
     
    4142class authenaction : public action { 
    4243protected: 
    43   text_t usersfile; 
    44   text_t keyfile; 
     44  userdbclass *user_database; 
     45  keydbclass *key_database; 
    4546  int keydecay; 
    4647 
     
    5051  authenaction (); 
    5152  virtual ~authenaction () {} 
     53 
     54  void set_userdb(userdbclass *udb) {user_database = udb;} 
     55 
     56  void set_keydb (keydbclass *kdb) {key_database = kdb;} 
    5257 
    5358  void set_receptionist (receptionist *therecpt) {recpt=therecpt;} 
  • gsdl/branches/gsdl-2.74/src/recpt/librarymain.cpp

    r12517 r14270  
    178178  recpt.add_action (adocumentaction); 
    179179 
     180  text_t userdbfile = filename_cat(gsdlhome, "etc", "users.db"); 
     181  userdbclass *udb = new userdbclass(userdbfile); 
     182 
     183  text_t keydbfile = filename_cat(gsdlhome, "etc", "key.db"); 
     184  keydbclass *kdb = new keydbclass(keydbfile); 
     185 
    180186#ifdef GSDL_USE_USERS_ACTION 
    181   recpt.add_action (new usersaction()); 
     187  usersaction *ausersaction = new usersaction(); 
     188  ausersaction->set_userdb(udb); 
     189  recpt.add_action (ausersaction); 
    182190#endif 
    183191 
     
    190198#ifdef GSDL_USE_AUTHEN_ACTION 
    191199  authenaction *aauthenaction = new authenaction(); 
     200  aauthenaction->set_userdb(udb); 
     201  aauthenaction->set_keydb(kdb); 
    192202  aauthenaction->set_receptionist(&recpt); 
    193203  recpt.add_action (aauthenaction); 
     
    272282  cgiwrapper (recpt, ""); 
    273283  delete cservers; 
     284  delete udb; 
     285  delete kdb; 
    274286 
    275287  // clean up the actions 
  • gsdl/branches/gsdl-2.74/src/recpt/userdb.cpp

    r14013 r14270  
    7777userdbclass::userdbclass(const text_t &userdbfilename) 
    7878{ 
    79   activated = (!userdb.opendatabase(userdbfilename, GDBM_WRCREAT, 1000, true)) ? false : true; 
     79  storeduserdbfilename = userdbfilename; 
     80  activated = (!userdb.opendatabase(storeduserdbfilename, GDBM_READER, 1000, true)) ? false : true; 
     81  if (activated == false)  
     82    { 
     83      activated = (!userdb.opendatabase(storeduserdbfilename, GDBM_WRCREAT, 1000, true)) ? false : true; 
     84      if (activated == true) 
     85        { 
     86          userdb.closedatabase(); 
     87          activated = (!userdb.opendatabase(storeduserdbfilename, GDBM_READER, 1000, true)) ? false : true; 
     88        } 
     89    } 
     90     
    8091  external_db = false; 
    8192} 
     
    227238      info["groups"] = userinfo.groups; 
    228239      info["comment"] = userinfo.comment; 
    229        
    230       return (userdb.setinfo (username, info)) ? ERRNO_SUCCEED : ERRNO_GDBMACTIONFILED ; 
     240      userdb.closedatabase(); 
     241      userdb.opendatabase(storeduserdbfilename, GDBM_WRCREAT, 1000, true); 
     242      int result = (userdb.setinfo (username, info)) ? ERRNO_SUCCEED : ERRNO_GDBMACTIONFILED; 
     243      userdb.closedatabase(); 
     244      userdb.opendatabase(storeduserdbfilename, GDBM_READER, 1000, true);          
     245      return  result; 
    231246    } 
    232247  return ERRNO_CONNECTIONFAILED; 
     
    290305  if (activated == true) 
    291306    { 
     307      userdb.closedatabase(); 
     308      userdb.opendatabase(storeduserdbfilename, GDBM_WRCREAT, 1000, true); 
    292309      userdb.deletekey (username); 
     310      userdb.closedatabase(); 
     311      userdb.opendatabase(storeduserdbfilename, GDBM_READER, 1000, true); 
    293312      return ERRNO_SUCCEED; 
    294313    } 
     
    335354  return ERRNO_CONNECTIONFAILED; 
    336355} 
    337  
    338 //an alernative way to colse the database if the class can't reach the destructor 
    339 void userdbclass::closedatabase() 
    340 { 
    341    userdb.closedatabase(); 
    342 } 
    343  
    344356//==========================================// 
    345357//       userdbclass functions (End)        // 
     
    351363keydbclass::keydbclass(const text_t &keydbfilename) 
    352364{ 
    353   activated = (!keydb.opendatabase(keydbfilename, GDBM_WRCREAT, 1000, true)) ? false : true; 
     365  storedkeydbfilename = keydbfilename; 
     366  activated = (!keydb.opendatabase(storedkeydbfilename, GDBM_READER, 1000, true)) ? false : true; 
     367  if (activated == false)  
     368    { 
     369      activated = (!keydb.opendatabase(storedkeydbfilename, GDBM_WRCREAT, 1000, true)) ? false : true; 
     370      if (activated == true) 
     371        { 
     372          keydb.closedatabase(); 
     373          activated = (!keydb.opendatabase(storedkeydbfilename, GDBM_READER, 1000, true)) ? false : true; 
     374        } 
     375    } 
    354376  external_db = false; 
    355377} 
     
    399421      keydata["time"] = time2text(time(NULL)); 
    400422       
     423      keydb.closedatabase(); 
     424      keydb.opendatabase(storedkeydbfilename, GDBM_WRCREAT, 1000, true); 
    401425      if (!keydb.setinfo (crypt_userkey, keydata))  
    402426        { 
    403427          userkey.clear(); // failed 
    404428        } 
     429      keydb.closedatabase(); 
     430      keydb.opendatabase(storedkeydbfilename, GDBM_READER, 1000, true);  
    405431       
    406432      return userkey; 
     
    434460            // succeeded, update the key's time 
    435461            info["time"] = time2text(time(NULL)); 
     462            keydb.closedatabase(); 
     463            keydb.opendatabase(storedkeydbfilename, GDBM_WRCREAT, 1000, true); 
    436464            keydb.setinfo (crypt_key, info); 
     465            keydb.closedatabase(); 
     466            keydb.opendatabase(storedkeydbfilename, GDBM_READER, 1000, true);  
    437467            return true; 
    438468          } 
     
    477507    } 
    478508} 
    479  
    480 //an alernative way to colse the database if the class can't reach the destructor 
    481 void keydbclass::closedatabase() 
    482 { 
    483    keydb.closedatabase(); 
    484 } 
    485509//==========================================// 
    486510//       keydbclass functions (End)         // 
  • gsdl/branches/gsdl-2.74/src/recpt/userdb.h

    r14015 r14270  
    6464  bool external_db; 
    6565  bool activated; 
     66  text_t storeduserdbfilename;  
    6667   
    6768 public: 
     
    114115  // on success 
    115116  int get_user_list (text_tarray &userlist); 
    116  
    117   //an alernative way to colse the database if the class can't reach the destructor 
    118   void closedatabase(); 
    119117}; 
    120118 
     
    126124  bool external_db; 
    127125  bool activated; 
     126   text_t storedkeydbfilename;  
    128127   
    129128 public: 
     
    146145  // use sparingly, it can be quite an expensive function 
    147146  void remove_old_keys (int keydecay); 
    148  
    149  //an alernative way to colse the database if the class can't reach the destructor 
    150   void closedatabase(); 
    151147}; 
    152148 
  • gsdl/branches/gsdl-2.74/src/recpt/usersaction.cpp

    r13844 r14270  
    147147 
    148148void usersaction::configure (const text_t &key, const text_tarray &cfgline) { 
    149   // get the password filename 
    150   if (cfgline.size() == 1) { 
    151     if (key == "usersfile") usersfile = cfgline[0]; 
    152   } 
    153  
    154149  action::configure (key, cfgline); 
    155150} 
     
    161156    return false; 
    162157  } 
    163  
    164   if (usersfile.empty()) usersfile = filename_cat (gdbmhome, "etc", "users.db"); 
    165158 
    166159  return action::init (logout); 
     
    193186                 outconvertclass &outconvert, ostream &textout,  
    194187                 ostream &logout) { 
    195  
    196   // open the user database (it will be used a lot) 
    197   user_database = new userdbclass(usersfile); 
    198188 
    199189  if (args["uma"] == "adduser" || args["uma"] == "edituser") { 
  • gsdl/branches/gsdl-2.74/src/recpt/usersaction.h

    r13844 r14270  
    3232#include "gsdlconf.h" 
    3333#include "action.h" 
     34#include "userdb.h" 
    3435#include "text_t.h" 
    35 #include "userdb.h" 
    3636 
    3737 
    3838class usersaction : public action { 
    3939protected: 
    40   text_t usersfile; 
    41   userdbclass* user_database; 
     40  userdbclass *user_database; 
    4241 
    4342public: 
     
    4847 
    4948  bool init (ostream &logout); 
     49 
     50  void set_userdb(userdbclass *udb) {user_database = udb;} 
    5051   
    5152  text_t get_action_name () {return "um";}