Changeset 25961

Show
Ignore:
Timestamp:
17.07.2012 15:22:12 (7 years ago)
Author:
kjdon
Message:

more cunning document types. gs3 has a new one, pagedhierarchy - for documents with internal structure and sequences of pages. new documenttype option, auto, will select the most appropriate doc type for each document. paged/hierarchy for gs2, paged/pagedhierarchy for gs3. documenttype option is displayed differently for gs2/3 modes.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/PagedImagePlugin.pm

    r24548 r25961  
    147147} 
    148148 
    149 my $type_list = 
    150     [ { 'name' => "paged", 
    151         'desc' => "{PagedImagePlugin.documenttype.paged}" }, 
     149my $gs2_type_list = 
     150    [ { 'name' => "auto", 
     151    'desc' => "{PagedImagePlugin.documenttype.auto2}" }, 
     152      { 'name' => "paged", 
     153        'desc' => "{PagedImagePlugin.documenttype.paged2}" }, 
    152154      { 'name' => "hierarchy", 
    153         'desc' => "{PagedImagePlugin.documenttype.hierarchy}" } ]; 
     155        'desc' => "{PagedImagePlugin.documenttype.hierarchy}" } 
     156    ]; 
     157 
     158my $gs3_type_list =      
     159    [ { 'name' => "auto", 
     160    'desc' => "{PagedImagePlugin.documenttype.auto3}" }, 
     161      { 'name' => "paged", 
     162        'desc' => "{PagedImagePlugin.documenttype.paged3}" }, 
     163      { 'name' => "hierarchy", 
     164        'desc' => "{PagedImagePlugin.documenttype.hierarchy}" },  
     165      { 'name' => "pagedhierarchy", 
     166        'desc' => "{PagedImagePlugin.documenttype.pagedhierarchy}" } 
     167    ]; 
    154168 
    155169my $arguments = 
     
    167181    'type' => "flag", 
    168182    'reqd' => "no" }, 
    169       { 'name' => "documenttype", 
    170     'desc' => "{PagedImagePlugin.documenttype}", 
    171     'type' => "enum", 
    172     'list' => $type_list, 
    173     'deft' => "paged", 
    174     'reqd' => "no" }, 
     183#      { 'name' => "documenttype", 
     184#   'desc' => "{PagedImagePlugin.documenttype}", 
     185#   'type' => "enum", 
     186#   'list' => $type_list, 
     187#   'deft' => "auto", 
     188#   'reqd' => "no" }, 
    175189      {'name' => "processing_tmp_files", 
    176190       'desc' => "{BasePlugin.processing_tmp_files}", 
    177191       'type' => "flag", 
    178192       'hiddengli' => "yes"} 
    179 ]; 
    180  
     193    ]; 
     194 
     195my $doc_type_opt = { 'name' => "documenttype", 
     196             'desc' => "{PagedImagePlugin.documenttype}", 
     197             'type' => "enum", 
     198             'deft' => "auto", 
     199             'reqd' => "no" }; 
    181200 
    182201my $options = { 'name'     => "PagedImagePlugin", 
     
    191210    push(@$pluginlist, $class); 
    192211 
     212    push(@{$hashArgOptLists->{"OptList"}},$options); 
     213    
     214    my $imc_self = new ImageConverter($pluginlist, $inputargs, $hashArgOptLists); 
     215     
     216    # we can use this plugin to check gs3 version 
     217    if ($imc_self->{'gs_version'} eq "3") { 
     218    $doc_type_opt->{'list'} = $gs3_type_list; 
     219    } 
     220    else { 
     221    $doc_type_opt->{'list'} = $gs2_type_list; 
     222    } 
     223    push(@$arguments,$doc_type_opt); 
     224    # now we add the args to the list for parsing 
    193225    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
    194     push(@{$hashArgOptLists->{"OptList"}},$options); 
    195      
    196     my $imc_self = new ImageConverter($pluginlist, $inputargs, $hashArgOptLists); 
     226     
    197227    my $rtf_self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists, 1); 
    198228    my $rxf_self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 
     
    212242    $rxf_self->{'parser'}->{'PluginObj'} = $self; 
    213243 
     244    print STDERR "doc type = $self->{'documenttype'}\n"; 
    214245    return bless $self, $class; 
    215246} 
     
    449480    $self->{'current_section'} = $doc_obj->get_top_section(); 
    450481    } elsif ($element eq "PageGroup" || $element eq "Page") { 
     482    if ($element eq "PageGroup") { 
     483        $self->{'has_internal_structure'} = 1; 
     484    } 
    451485    # create a new section as a child 
    452486    $self->{'current_section'} = $doc_obj->insert_section($doc_obj->get_end_child($self->{'current_section'})); 
     
    529563    $self->{'xml_file_dir'} = $dir; 
    530564    $self->{'num_pages'} = 0; 
     565    $self->{'has_internal_structure'} = 0; 
    531566 
    532567} 
     
    536571    my $doc_obj = $self->{'doc_obj'}; 
    537572     
     573    my $topsection = $doc_obj->get_top_section(); 
     574 
    538575    # add numpages metadata 
    539     my $topsection = $doc_obj->get_top_section(); 
    540  
    541576    $doc_obj->set_utf8_metadata_element ($topsection, 'NumPages', $self->{'num_pages'}); 
     577 
     578    # set the document type 
     579    print STDERR "close doc, doc type = $self->{'documenttype'}\n"; 
     580    my $final_doc_type = ""; 
     581    if ($self->{'documenttype'} eq "auto") { 
     582    if ($self->{'has_internal_structure'}) { 
     583        if ($self->{'gs_version'} eq "3") { 
     584        $final_doc_type = "pagedhierarchy"; 
     585        } 
     586        else { 
     587        $final_doc_type = "hierarchy"; 
     588        } 
     589    } else { 
     590        $final_doc_type = "paged"; 
     591    } 
     592    } else { 
     593    # set to what doc type option was set to 
     594    $final_doc_type = $self->{'documenttype'}; 
     595    } 
     596    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", $final_doc_type); 
     597    ### capiatalisation???? 
     598#    if ($self->{'documenttype'} eq 'paged') { 
     599    # set the gsdlthistype metadata to Paged - this ensures this document will 
     600    # be treated as a Paged doc, even if Titles are not numeric 
     601#   $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged"); 
     602#    } else { 
     603#   $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy"); 
     604#    } 
    542605 
    543606    $doc_obj->set_utf8_metadata_element($topsection,"MaxImageWidth",$self->{'MaxImageWidth'}); 
     
    554617 
    555618    my $topsection = $doc_obj->get_top_section(); 
    556  
    557     if ($self->{'documenttype'} eq 'paged') { 
    558     # set the gsdlthistype metadata to Paged - this ensures this document will 
    559     # be treated as a Paged doc, even if Titles are not numeric 
    560     $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged"); 
    561     } else { 
    562     $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy"); 
    563     } 
    564619 
    565620    my $plugin_filename_encoding = $self->{'filename_encoding'}; 
     
    631686    $self->set_initial_doc_fields($doc_obj, $filename_full_path, $processor, $metadata); 
    632687    my $topsection = $doc_obj->get_top_section(); 
     688    # simple item files are always paged unless user specified 
     689    if ($self->{'documenttype'} eq "auto") { 
     690    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "paged"); 
     691    } else { 
     692    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", $self->{'documenttype'}); 
     693    } 
    633694    open (ITEMFILE, $filename_full_path) || die "couldn't open $filename_full_path\n"; 
    634695    my $line = "";