Context Navigation

source: gsdl/trunk/perllib/basebuildproc.pm@ 17110

Last change on this file since 17110 was 17110, checked in by kjdon, 16 years ago
changed way cjk separation is done. Not done in plugins any more, but is now an indexoption. cnseg called from filter_text method. generate_index_options sets up the field in buildproc
Property svn:keywords set to `Author Date Id Revision`
File size: 18.3 KB

Rev	Line
[9919]	1	###########################################################################
	2	#
	3	# basebuildproc.pm --
	4	# A component of the Greenstone digital library software
	5	# from the New Zealand Digital Library Project at the
	6	# University of Waikato, New Zealand.
	7	#
	8	# Copyright (C) 1999 New Zealand Digital Library Project
	9	#
	10	# This program is free software; you can redistribute it and/or modify
	11	# it under the terms of the GNU General Public License as published by
	12	# the Free Software Foundation; either version 2 of the License, or
	13	# (at your option) any later version.
	14	#
	15	# This program is distributed in the hope that it will be useful,
	16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	# GNU General Public License for more details.
	19	#
	20	# You should have received a copy of the GNU General Public License
	21	# along with this program; if not, write to the Free Software
	22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	23	#
	24	###########################################################################
	25
	26	# This document processor outputs a document for indexing (should be
[15688]	27	# implemented by subclass) and storing in the database
[9919]	28
	29	package basebuildproc;
	30
	31	eval {require bytes};
	32
	33	use classify;
[15699]	34	use dbutil;
[9919]	35	use doc;
	36	use docproc;
[15696]	37	use strict; no strict 'subs';
[9919]	38	use util;
	39
	40	BEGIN {
	41	@basebuildproc::ISA = ('docproc');
	42	}
	43
[12844]	44	sub new()
	45	{
	46	my ($class, $collection, $source_dir, $build_dir, $keepold, $verbosity, $outhandle) = @_;
[9919]	47	my $self = new docproc ();
	48
	49	# outhandle is where all the debugging info goes
	50	# output_handle is where the output of the plugins is piped
[15688]	51	# to (i.e. mg, database etc.)
[9919]	52	$outhandle = STDERR unless defined $outhandle;
	53
	54	$self->{'collection'} = $collection;
	55	$self->{'source_dir'} = $source_dir;
[10159]	56	$self->{'build_dir'} = $build_dir;
	57	$self->{'keepold'} = $keepold;
	58	$self->{'verbosity'} = $verbosity;
	59	$self->{'outhandle'} = $outhandle;
[9919]	60
	61	$self->{'classifiers'} = [];
	62	$self->{'mode'} = "text";
	63	$self->{'assocdir'} = $build_dir;
[15688]	64	$self->{'dontdb'} = {};
[16222]	65	$self->{'store_metadata_coverage'} = "false";
[9919]	66
	67	$self->{'index'} = "section:text";
	68	$self->{'indexexparr'} = [];
	69
[17110]	70	$self->{'separate_cjk'} = 0;
	71
[10159]	72	my $found_num_data = 0;
	73	my $buildconfigfile = undef;
	74
	75	if ($keepold) {
	76	# For incremental building need to seed num_docs etc from values
	77	# stored in build.cfg (if present)
	78	$buildconfigfile = &util::filename_cat($build_dir, "build.cfg");
	79	if (-e $buildconfigfile) {
	80	$found_num_data = 1;
	81	}
	82	else {
	83	# try the index dir
	84	$buildconfigfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},
	85	"index", "build.cfg");
	86	if (-e $buildconfigfile) {
	87	$found_num_data = 1;
	88	}
	89	}
	90
[12844]	91	}
[10159]	92
[12844]	93	if ($found_num_data)
	94	{
	95	#print STDERR "Found_Num_Data!\n";
[10159]	96	my $buildcfg = &colcfg::read_build_cfg($buildconfigfile);
	97	$self->{'starting_num_docs'} = $buildcfg->{'numdocs'};
[12844]	98	#print STDERR "- num_docs: $self->{'starting_num_docs'}\n";
[10159]	99	$self->{'starting_num_sections'} = $buildcfg->{'numsections'};
[12844]	100	#print STDERR "- num_sections: $self->{'starting_num_sections'}\n";
[10159]	101	$self->{'starting_num_bytes'} = $buildcfg->{'numbytes'};
[12844]	102	#print STDERR "- num_bytes: $self->{'starting_num_bytes'}\n";
[10159]	103	}
[12844]	104	else
	105	{
	106	#print STDERR "NOT Found_Num_Data!\n";
	107	$self->{'starting_num_docs'} = 0;
[10159]	108	$self->{'starting_num_sections'} = 0;
	109	$self->{'starting_num_bytes'} = 0;
[12844]	110	}
[10159]	111
[9919]	112	$self->{'output_handle'} = "STDOUT";
[10159]	113	$self->{'num_docs'} = $self->{'starting_num_docs'};
	114	$self->{'num_sections'} = $self->{'starting_num_sections'};
	115	$self->{'num_bytes'} = $self->{'starting_num_bytes'};
	116
[9919]	117	$self->{'num_processed_bytes'} = 0;
	118	$self->{'store_text'} = 1;
	119
[15685]	120	# what level (section/document) the database - indexer intersection is
	121	$self->{'db_level'} = "section";
[9919]	122	#used by browse interface
	123	$self->{'doclist'} = [];
	124
	125	$self->{'indexing_text'} = 0;
	126
	127	return bless $self, $class;
	128
	129	}
	130
	131	sub reset {
	132	my $self = shift (@_);
[10159]	133
	134	$self->{'num_docs'} = $self->{'starting_num_docs'};
	135	$self->{'num_sections'} = $self->{'starting_num_sections'};
	136	$self->{'num_bytes'} = $self->{'starting_num_bytes'};
[9919]	137
	138	$self->{'num_processed_bytes'} = 0;
	139	}
	140
[10159]	141	sub zero_reset {
	142	my $self = shift (@_);
	143
	144	$self->{'num_docs'} = 0;
	145	$self->{'num_sections'} = 0;
	146	$self->{'num_bytes'} = 0;
	147
	148	$self->{'num_processed_bytes'} = 0;
	149	}
	150
[10419]	151	sub is_incremental_capable
[10304]	152	{
	153	# By default we return 'no' as the answer
	154	# Safer to assume non-incremental to start with, and then override in
	155	# inherited classes that are.
	156
	157	return 0;
	158	}
	159
[9919]	160	sub get_num_docs {
	161	my $self = shift (@_);
	162
	163	return $self->{'num_docs'};
	164	}
	165
	166	sub get_num_sections {
	167	my $self = shift (@_);
	168
	169	return $self->{'num_sections'};
	170	}
	171
	172	# num_bytes is the actual number of bytes in the collection
	173	# this is normally the same as what's processed during text compression
	174	sub get_num_bytes {
	175	my $self = shift (@_);
	176
	177	return $self->{'num_bytes'};
	178	}
	179
	180	# num_processed_bytes is the number of bytes actually passed
	181	# to mg for the current index
	182	sub get_num_processed_bytes {
	183	my $self = shift (@_);
	184
	185	return $self->{'num_processed_bytes'};
	186	}
	187
	188	sub set_output_handle {
	189	my $self = shift (@_);
	190	my ($handle) = @_;
	191
	192	$self->{'output_handle'} = $handle;
	193	}
	194
	195
	196	sub set_mode {
	197	my $self = shift (@_);
	198	my ($mode) = @_;
	199
	200	$self->{'mode'} = $mode;
	201	}
	202
[10159]	203	sub get_mode {
	204	my $self = shift (@_);
	205
	206	return $self->{'mode'};
	207	}
	208
[9919]	209	sub set_assocdir {
	210	my $self = shift (@_);
	211	my ($assocdir) = @_;
	212
	213	$self->{'assocdir'} = $assocdir;
	214	}
	215
[15688]	216	sub set_dontdb {
[9919]	217	my $self = shift (@_);
[15688]	218	my ($dontdb) = @_;
[9919]	219
[15688]	220	$self->{'dontdb'} = $dontdb;
[9919]	221	}
	222
[15725]	223	sub set_infodbtype
	224	{
	225	my $self = shift(@_);
	226	my $infodbtype = shift(@_);
	227	$self->{'infodbtype'} = $infodbtype;
	228	}
	229
[9919]	230	sub set_index {
	231	my $self = shift (@_);
	232	my ($index, $indexexparr) = @_;
	233
	234	$self->{'index'} = $index;
	235	$self->{'indexexparr'} = $indexexparr if defined $indexexparr;
	236	}
	237
	238	sub set_index_languages {
	239	my $self = shift (@_);
	240	my ($lang_meta, $langarr) = @_;
	241	$self->{'lang_meta'} = $lang_meta;
	242	$self->{'langarr'} = $langarr;
	243	}
	244
	245	sub get_index {
	246	my $self = shift (@_);
	247
	248	return $self->{'index'};
	249	}
	250
	251	sub set_classifiers {
	252	my $self = shift (@_);
	253	my ($classifiers) = @_;
	254
	255	$self->{'classifiers'} = $classifiers;
	256	}
	257
	258	sub set_indexing_text {
	259	my $self = shift (@_);
	260	my ($indexing_text) = @_;
	261
	262	$self->{'indexing_text'} = $indexing_text;
	263	}
	264
	265	sub get_indexing_text {
	266	my $self = shift (@_);
	267
	268	return $self->{'indexing_text'};
	269	}
	270
	271	sub set_store_text {
	272	my $self = shift (@_);
	273	my ($store_text) = @_;
	274
	275	$self->{'store_text'} = $store_text;
	276	}
[16222]	277
	278	sub set_store_metadata_coverage {
	279	my $self = shift (@_);
	280	my ($store_metadata_coverage) = @_;
	281
	282	$self->{'store_metadata_coverage'} = $store_metadata_coverage \|\| "";
	283	}
	284
[9919]	285	sub get_doc_list {
	286	my $self = shift(@_);
	287
	288	return @{$self->{'doclist'}};
	289	}
	290
[15685]	291	# the standard database level is section, but you may want to change it to document
	292	sub set_db_level {
[9919]	293	my $self= shift (@_);
[15685]	294	my ($db_level) = @_;
[9919]	295
[15685]	296	$self->{'db_level'} = $db_level;
[9919]	297	}
	298
[10469]	299	sub set_sections_index_document_metadata {
	300	my $self= shift (@_);
	301	my ($index_type) = @_;
	302
	303	$self->{'sections_index_document_metadata'} = $index_type;
	304	}
[17110]	305
	306	sub set_separate_cjk {
	307	my $self = shift (@_);
	308	my ($sep_cjk) = @_;
	309
	310	$self->{'separate_cjk'} = $sep_cjk;
	311	}
	312
[9919]	313	sub process {
	314	my $self = shift (@_);
	315	my $method = $self->{'mode'};
	316
	317	$self->$method(@_);
	318	}
	319
[17110]	320	# post process text depending on field. Currently don't do anything here
	321	# except cjk separation
	322	sub filter_text {
	323	my $self = shift (@_);
	324	my ($field, $text) = @_;
[14934]	325
[17110]	326	# lets do cjk seg here
	327	my $new_text =$text;
	328	if ($self->{'separate_cjk'}) {
	329	$new_text = &cnseg::segment($text);
	330	}
	331	return $new_text;
	332	}
[14934]	333
[17110]	334
[14934]	335	sub infodb_metadata_stats
	336	{
	337	my $self = shift (@_);
	338	my ($field) = @_;
	339
	340	# Keep some statistics relating to metadata sets used and
	341	# frequency of particular metadata fields within each set
	342
	343	# Union of metadata prefixes and frequency of fields
	344	# (both scoped for this document alone, and across whole collection)
	345
	346	if ($field =~ m/^(.+)\.(.*)$/) {
	347	my $prefix = $1;
	348	my $core_field = $2;
	349
	350	$self->{'doc_mdprefix_fields'}->{$prefix}->{$core_field}++;
	351	$self->{'mdprefix_fields'}->{$prefix}->{$core_field}++;
	352	}
	353	elsif ($field =~ m/^[[:upper:]]/) {
	354	# implicit 'ex' metadata set
	355
	356	$self->{'doc_mdprefix_fields'}->{'ex'}->{$field}++;
	357	$self->{'mdprefix_fields'}->{'ex'}->{$field}++;
	358	}
	359
	360	}
	361
	362
[9919]	363	sub infodb {
	364	my $self = shift (@_);
	365	my ($doc_obj, $filename) = @_;
	366
[15696]	367	# only output this document if it is a "indexed_doc" or "info_doc" (database only) document
[9919]	368	my $doctype = $doc_obj->get_doc_type();
[11793]	369	return if ($doctype ne "indexed_doc" && $doctype ne "info_doc");
[9919]	370
[11994]	371	my $archivedir = "";
	372	if (defined $filename)
	373	{
	374	# doc_obj derived directly from file
	375	my ($dir) = $filename =~ /^(.?)(?:\/\|\\)[^\/\\]$/;
	376	$dir = "" unless defined $dir;
	377	$dir =~ s/\\/\//g;
	378	$dir =~ s/^\/+//;
	379	$dir =~ s/\/+$//;
	380
	381	$archivedir = $dir;
	382
	383	# resolve the final filenames of the files associated with this document
	384	$self->assoc_files ($doc_obj, $archivedir);
	385	}
	386	else
	387	{
[15688]	388	# doc_obj reconstructed from database (has metadata, doc structure but no text)
[11994]	389	my $top_section = $doc_obj->get_top_section();
	390	$archivedir = $doc_obj->get_metadata_element($top_section,"archivedir");
	391	}
	392
[9919]	393	#add this document to the browse structure
	394	push(@{$self->{'doclist'}},$doc_obj->get_OID())
	395	unless ($doctype eq "classification");
	396
	397	# classify this document
	398	&classify::classify_doc ($self->{'classifiers'}, $doc_obj);
	399
	400	# this is another document
	401	$self->{'num_docs'} += 1 unless ($doctype eq "classification");
	402
	403	# is this a paged or a hierarchical document
	404	my ($thistype, $childtype) = $self->get_document_type ($doc_obj);
	405
	406	my $section = $doc_obj->get_top_section ();
	407	my $doc_OID = $doc_obj->get_OID();
	408	my $first = 1;
[15699]	409	my $infodb_handle = $self->{'output_handle'};
[14934]	410
	411	$self->{'doc_mdprefix_fields'} = {};
	412
[15695]	413	while (defined $section)
	414	{
	415	my $section_OID = $doc_OID;
	416	if ($section ne "")
	417	{
	418	$section_OID = $doc_OID . "." . $section;
	419	}
[15696]	420	my %section_infodb = ();
[15695]	421
[9919]	422	# update a few statistics
	423	$self->{'num_bytes'} += $doc_obj->get_text_length ($section);
	424	$self->{'num_sections'} += 1 unless ($doctype eq "classification");
	425
	426	# output the fact that this document is a document (unless doctype
	427	# has been set to something else from within a plugin
	428	my $dtype = $doc_obj->get_metadata_element ($section, "doctype");
	429	if (!defined $dtype \|\| $dtype !~ /\w/) {
[15697]	430	$section_infodb{"doctype"} = [ "doc" ];
[9919]	431	}
	432
[11994]	433	# Output whether this node contains text
	434	#
[15688]	435	# If doc_obj reconstructed from database file then no need to
[11994]	436	# explicitly add <hastxt> as this is preserved as metadata when
[15688]	437	# the database file is loaded in
[11994]	438	if (defined $filename)
	439	{
	440	# doc_obj derived directly from file
	441	if ($doc_obj->get_text_length($section) > 0) {
[15697]	442	$section_infodb{"hastxt"} = [ "1" ];
[11994]	443	} else {
[15697]	444	$section_infodb{"hastxt"} = [ "0" ];
[11994]	445	}
[9919]	446	}
	447
	448	# output all the section metadata
	449	my $metadata = $doc_obj->get_all_metadata ($section);
	450	foreach my $pair (@$metadata) {
	451	my ($field, $value) = (@$pair);
	452
	453	if ($field ne "Identifier" && $field !~ /^gsdl/ &&
	454	defined $value && $value ne "") {
	455
	456	# escape problematic stuff
	457	$value =~ s/\\/\\\\/g;
	458	$value =~ s/\n/\\n/g;
	459	$value =~ s/\r/\\r/g;
	460
	461	# special case for URL metadata
	462	if ($field =~ /^URL$/i) {
[15725]	463	&dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $value, { 'section' => [ $section_OID ] });
[9919]	464	}
	465
[15688]	466	if (!defined $self->{'dontdb'}->{$field}) {
[15697]	467	push(@{$section_infodb{$field}}, $value);
[14934]	468
[16222]	469	if ($section eq "" && $self->{'store_metadata_coverage'} =~ /^true$/i)
[14934]	470	{
	471	$self->infodb_metadata_stats($field);
	472	}
[9919]	473	}
	474	}
	475	}
	476
[14934]	477	if ($section eq "")
	478	{
	479	my $doc_mdprefix_fields = $self->{'doc_mdprefix_fields'};
[11994]	480
[14934]	481	foreach my $prefix (keys %$doc_mdprefix_fields)
	482	{
[15697]	483	push(@{$section_infodb{"metadataset"}}, $prefix);
[14934]	484
	485	foreach my $field (keys %{$doc_mdprefix_fields->{$prefix}})
	486	{
[15708]	487	push(@{$section_infodb{"metadatalist-$prefix"}}, $field);
	488
[14934]	489	my $val = $doc_mdprefix_fields->{$prefix}->{$field};
[15697]	490	push(@{$section_infodb{"metadatafreq-$prefix-$field"}}, $val);
[14934]	491	}
	492	}
	493	}
	494
[15688]	495	# If doc_obj reconstructed from database file then no need to
[11994]	496	# explicitly add <archivedir> as this is preserved as metadata when
[15688]	497	# the database file is loaded in
[11994]	498	if (defined $filename)
	499	{
	500	# output archivedir if at top level
	501	if ($section eq $doc_obj->get_top_section()) {
[15697]	502	$section_infodb{"archivedir"} = [ $archivedir ];
[11994]	503	}
[9919]	504	}
	505
	506	# output document display type
	507	if ($first) {
[15697]	508	$section_infodb{"thistype"} = [ $thistype ];
[9919]	509	}
	510
[15685]	511	if ($self->{'db_level'} eq "document") {
[9919]	512	# doc num is num_docs not num_sections
	513	# output the matching document number
[15697]	514	$section_infodb{"docnum"} = [ $self->{'num_docs'} ];
[15696]	515	}
	516	else {
[9919]	517	# output a list of children
	518	my $children = $doc_obj->get_children ($section);
	519	if (scalar(@$children) > 0) {
[15697]	520	$section_infodb{"childtype"} = [ $childtype ];
[15696]	521	my $contains = "";
	522	foreach my $child (@$children)
	523	{
	524	$contains .= ";" unless ($contains eq "");
	525	if ($child =~ /^.*?\.(\d+)$/)
	526	{
	527	$contains .= "\".$1";
[9919]	528	}
[15698]	529	else
	530	{
[15696]	531	$contains .= "\".$child";
	532	}
[9919]	533	}
[15697]	534	$section_infodb{"contains"} = [ $contains ];
[9919]	535	}
[15696]	536	# output the matching doc number
[15697]	537	$section_infodb{"docnum"} = [ $self->{'num_sections'} ];
[9919]	538	}
	539
[15725]	540	&dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $section_OID, \%section_infodb);
[9919]	541
[17106]	542	# output a database entry for the document number, except for Lucene (which no longer needs this information)
	543	unless (ref($self) eq "lucenebuildproc")
	544	{
	545	if ($self->{'db_level'} eq "document") {
	546	&dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_docs'}, { 'section' => [ $doc_OID ] });
	547	}
	548	else {
	549	&dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_handle, $self->{'num_sections'}, { 'section' => [ $section_OID ] });
	550	}
[9919]	551	}
	552
	553	$first = 0;
	554	$section = $doc_obj->get_next_section($section);
[15685]	555	last if ($self->{'db_level'} eq "document"); # if no sections wanted, only add the docs
[9919]	556	}
[15696]	557	}
[9919]	558
[15696]	559
[9919]	560	sub text {
	561	my $self = shift (@_);
	562	my ($doc_obj) = @_;
	563
	564	my $handle = $self->{'outhandle'};
	565	print $handle "basebuildproc::text function must be implemented in sub classes\n";
	566	die "\n";
	567	}
	568
	569	# should the document be indexed - according to the subcollection and language
	570	# specification.
	571	sub is_subcollection_doc {
	572	my $self = shift (@_);
	573	my ($doc_obj) = @_;
	574
	575	my $indexed_doc = 1;
	576	foreach my $indexexp (@{$self->{'indexexparr'}}) {
	577	$indexed_doc = 0;
	578	my ($field, $exp, $options) = split /\//, $indexexp;
	579	if (defined ($field) && defined ($exp)) {
	580	my ($bool) = $field =~ /^(.)/;
	581	$field =~ s/^.// if $bool eq '!';
[10028]	582	my @metadata_values;
[9919]	583	if ($field =~ /^filename$/i) {
[10028]	584	push(@metadata_values, $doc_obj->get_source_filename());
[9919]	585	}
[10028]	586	else {
	587	@metadata_values = @{$doc_obj->get_metadata($doc_obj->get_top_section(), $field)};
	588	}
	589	next unless @metadata_values;
	590	foreach my $metadata_value (@metadata_values) {
	591	if ($bool eq '!') {
	592	if ($options =~ /^i$/i) {
	593	if ($metadata_value !~ /$exp/i) {$indexed_doc = 1; last;}
	594	} else {
	595	if ($metadata_value !~ /$exp/) {$indexed_doc = 1; last;}
	596	}
[9919]	597	} else {
[10028]	598	if ($options =~ /^i$/i) {
	599	if ($metadata_value =~ /$exp/i) {$indexed_doc = 1; last;}
	600	} else {
	601	if ($metadata_value =~ /$exp/) {$indexed_doc = 1; last;}
	602	}
[9919]	603	}
	604	}
[10028]	605
	606	last if ($indexed_doc == 1);
[9919]	607	}
	608	}
	609
	610	# if this doc is so far in the sub collection, and we have lang info,
	611	# now we check the languages to see if it matches
	612	if($indexed_doc && defined $self->{'lang_meta'}) {
	613	$indexed_doc = 0;
	614	my $field = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'lang_meta'});
	615	if (defined $field) {
	616	foreach my $lang (@{$self->{'langarr'}}) {
	617	my ($bool) = $lang =~ /^(.)/;
	618	if ($bool eq '!') {
	619	$lang =~ s/^.//;
	620	if ($field !~ /$lang/) {
	621	$indexed_doc = 1; last;
	622	}
	623	} else {
	624	if ($field =~ /$lang/) {
	625	$indexed_doc = 1; last;
	626	}
	627	}
	628	}
	629	}
	630	}
	631	return $indexed_doc;
	632
	633	}
	634
	635	# use 'Paged' if document has no more than 2 levels
	636	# and each section at second level has a number for
	637	# Title metadata
	638	# also use Paged if gsdlthistype metadata is set to Paged
	639	sub get_document_type {
	640	my $self = shift (@_);
	641	my ($doc_obj) = @_;
	642
	643	my $thistype = "VList";
	644	my $childtype = "VList";
	645	my $title;
	646	my @tmp = ();
	647
	648	my $section = $doc_obj->get_top_section ();
	649
	650	my $gsdlthistype = $doc_obj->get_metadata_element ($section, "gsdlthistype");
	651	if (defined $gsdlthistype) {
	652	if ($gsdlthistype eq "Paged") {
	653	$childtype = "Paged";
	654	if ($doc_obj->get_text_length ($doc_obj->get_top_section())) {
	655	$thistype = "Paged";
	656	} else {
	657	$thistype = "Invisible";
	658	}
	659
	660	return ($thistype, $childtype);
	661	} elsif ($gsdlthistype eq "Hierarchy") {
	662	return ($thistype, $childtype); # use VList, VList
	663	}
	664	}
	665	my $first = 1;
	666	while (defined $section) {
	667	@tmp = split /\./, $section;
	668	if (scalar(@tmp) > 1) {
	669	return ($thistype, $childtype);
	670	}
	671	if (!$first) {
	672	$title = $doc_obj->get_metadata_element ($section, "Title");
	673	if (!defined $title \|\| $title !~ /^\d+$/) {
	674	return ($thistype, $childtype);
	675	}
	676	}
	677	$first = 0;
	678	$section = $doc_obj->get_next_section($section);
	679	}
	680	if ($doc_obj->get_text_length ($doc_obj->get_top_section())) {
	681	$thistype = "Paged";
	682	} else {
	683	$thistype = "Invisible";
	684	}
	685	$childtype = "Paged";
	686	return ($thistype, $childtype);
	687	}
	688
[12844]	689	sub assoc_files() {
[9919]	690	my $self = shift (@_);
	691	my ($doc_obj, $archivedir) = @_;
	692	my ($afile);
	693
	694	foreach my $assoc_file (@{$doc_obj->get_assoc_files()}) {
[12844]	695	#rint STDERR "Processing associated file - copy " . $assoc_file->[0] . " to " . $assoc_file->[1] . "\n";
[9919]	696	# if assoc file starts with a slash, we put it relative to the assoc
	697	# dir, otherwise it is relative to the HASH... directory
	698	if ($assoc_file->[1] =~ m@^[/\\]@) {
[12844]	699	$afile = &util::filename_cat($self->{'assocdir'}, $assoc_file->[1]);
[9919]	700	} else {
	701	$afile = &util::filename_cat($self->{'assocdir'}, $archivedir, $assoc_file->[1]);
	702	}
	703	&util::hard_link ($assoc_file->[0], $afile);
	704	}
	705	}
	706

Note: See TracBrowser for help on using the repository browser.

Download in other formats: