Ignore:
Timestamp:
2012-07-17T15:22:12+12:00 (12 years ago)
Author:
kjdon
Message:

more cunning document types. gs3 has a new one, pagedhierarchy - for documents with internal structure and sequences of pages. new documenttype option, auto, will select the most appropriate doc type for each document. paged/hierarchy for gs2, paged/pagedhierarchy for gs3. documenttype option is displayed differently for gs2/3 modes.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/PagedImagePlugin.pm

    r24548 r25961  
    147147}
    148148
    149 my $type_list =
    150     [ { 'name' => "paged",
    151         'desc' => "{PagedImagePlugin.documenttype.paged}" },
     149my $gs2_type_list =
     150    [ { 'name' => "auto",
     151    'desc' => "{PagedImagePlugin.documenttype.auto2}" },
     152      { 'name' => "paged",
     153        'desc' => "{PagedImagePlugin.documenttype.paged2}" },
    152154      { 'name' => "hierarchy",
    153         'desc' => "{PagedImagePlugin.documenttype.hierarchy}" } ];
     155        'desc' => "{PagedImagePlugin.documenttype.hierarchy}" }
     156    ];
     157
     158my $gs3_type_list =     
     159    [ { 'name' => "auto",
     160    'desc' => "{PagedImagePlugin.documenttype.auto3}" },
     161      { 'name' => "paged",
     162        'desc' => "{PagedImagePlugin.documenttype.paged3}" },
     163      { 'name' => "hierarchy",
     164        'desc' => "{PagedImagePlugin.documenttype.hierarchy}" },
     165      { 'name' => "pagedhierarchy",
     166        'desc' => "{PagedImagePlugin.documenttype.pagedhierarchy}" }
     167    ];
    154168
    155169my $arguments =
     
    167181    'type' => "flag",
    168182    'reqd' => "no" },
    169       { 'name' => "documenttype",
    170     'desc' => "{PagedImagePlugin.documenttype}",
    171     'type' => "enum",
    172     'list' => $type_list,
    173     'deft' => "paged",
    174     'reqd' => "no" },
     183#      { 'name' => "documenttype",
     184#   'desc' => "{PagedImagePlugin.documenttype}",
     185#   'type' => "enum",
     186#   'list' => $type_list,
     187#   'deft' => "auto",
     188#   'reqd' => "no" },
    175189      {'name' => "processing_tmp_files",
    176190       'desc' => "{BasePlugin.processing_tmp_files}",
    177191       'type' => "flag",
    178192       'hiddengli' => "yes"}
    179 ];
    180 
     193    ];
     194
     195my $doc_type_opt = { 'name' => "documenttype",
     196             'desc' => "{PagedImagePlugin.documenttype}",
     197             'type' => "enum",
     198             'deft' => "auto",
     199             'reqd' => "no" };
    181200
    182201my $options = { 'name'     => "PagedImagePlugin",
     
    191210    push(@$pluginlist, $class);
    192211
     212    push(@{$hashArgOptLists->{"OptList"}},$options);
     213   
     214    my $imc_self = new ImageConverter($pluginlist, $inputargs, $hashArgOptLists);
     215   
     216    # we can use this plugin to check gs3 version
     217    if ($imc_self->{'gs_version'} eq "3") {
     218    $doc_type_opt->{'list'} = $gs3_type_list;
     219    }
     220    else {
     221    $doc_type_opt->{'list'} = $gs2_type_list;
     222    }
     223    push(@$arguments,$doc_type_opt);
     224    # now we add the args to the list for parsing
    193225    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    194     push(@{$hashArgOptLists->{"OptList"}},$options);
    195    
    196     my $imc_self = new ImageConverter($pluginlist, $inputargs, $hashArgOptLists);
     226   
    197227    my $rtf_self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists, 1);
    198228    my $rxf_self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
     
    212242    $rxf_self->{'parser'}->{'PluginObj'} = $self;
    213243
     244    print STDERR "doc type = $self->{'documenttype'}\n";
    214245    return bless $self, $class;
    215246}
     
    449480    $self->{'current_section'} = $doc_obj->get_top_section();
    450481    } elsif ($element eq "PageGroup" || $element eq "Page") {
     482    if ($element eq "PageGroup") {
     483        $self->{'has_internal_structure'} = 1;
     484    }
    451485    # create a new section as a child
    452486    $self->{'current_section'} = $doc_obj->insert_section($doc_obj->get_end_child($self->{'current_section'}));
     
    529563    $self->{'xml_file_dir'} = $dir;
    530564    $self->{'num_pages'} = 0;
     565    $self->{'has_internal_structure'} = 0;
    531566
    532567}
     
    536571    my $doc_obj = $self->{'doc_obj'};
    537572   
     573    my $topsection = $doc_obj->get_top_section();
     574
    538575    # add numpages metadata
    539     my $topsection = $doc_obj->get_top_section();
    540 
    541576    $doc_obj->set_utf8_metadata_element ($topsection, 'NumPages', $self->{'num_pages'});
     577
     578    # set the document type
     579    print STDERR "close doc, doc type = $self->{'documenttype'}\n";
     580    my $final_doc_type = "";
     581    if ($self->{'documenttype'} eq "auto") {
     582    if ($self->{'has_internal_structure'}) {
     583        if ($self->{'gs_version'} eq "3") {
     584        $final_doc_type = "pagedhierarchy";
     585        }
     586        else {
     587        $final_doc_type = "hierarchy";
     588        }
     589    } else {
     590        $final_doc_type = "paged";
     591    }
     592    } else {
     593    # set to what doc type option was set to
     594    $final_doc_type = $self->{'documenttype'};
     595    }
     596    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", $final_doc_type);
     597    ### capiatalisation????
     598#    if ($self->{'documenttype'} eq 'paged') {
     599    # set the gsdlthistype metadata to Paged - this ensures this document will
     600    # be treated as a Paged doc, even if Titles are not numeric
     601#   $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged");
     602#    } else {
     603#   $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy");
     604#    }
    542605
    543606    $doc_obj->set_utf8_metadata_element($topsection,"MaxImageWidth",$self->{'MaxImageWidth'});
     
    554617
    555618    my $topsection = $doc_obj->get_top_section();
    556 
    557     if ($self->{'documenttype'} eq 'paged') {
    558     # set the gsdlthistype metadata to Paged - this ensures this document will
    559     # be treated as a Paged doc, even if Titles are not numeric
    560     $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged");
    561     } else {
    562     $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy");
    563     }
    564619
    565620    my $plugin_filename_encoding = $self->{'filename_encoding'};
     
    631686    $self->set_initial_doc_fields($doc_obj, $filename_full_path, $processor, $metadata);
    632687    my $topsection = $doc_obj->get_top_section();
     688    # simple item files are always paged unless user specified
     689    if ($self->{'documenttype'} eq "auto") {
     690    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "paged");
     691    } else {
     692    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", $self->{'documenttype'});
     693    }
    633694    open (ITEMFILE, $filename_full_path) || die "couldn't open $filename_full_path\n";
    634695    my $line = "";
Note: See TracChangeset for help on using the changeset viewer.