Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH019c5dca.dir/doc.xml
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH019c5dca.dir/doc.xml (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH019c5dca.dir/doc.xml (revision 27981)
@@ -9,8 +9,8 @@
Bronwyn
biblio_for_dl_scientometrics.do
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/pdf03.html
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/pdf03.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/pdf03.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/pdf03.html
import/pdf03.pdf
- tmp/1373000551/pdf03.html
+ tmp/1375688850/pdf03.html
pdf03.html
pdf03.pdf
@@ -24,27 +24,27 @@
doc.pdf
17
+ Sally Jo Cunningham
+ Applications for Bibliometric Research in the Emerging Digital Libraries
+ 8.57
+ /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/import
+ 2013:08:02 19:30:46+12:00
+ pdf03.pdf
+ 644
+ 35935
+ PDF
+ application/pdf
+ Bronwyn
+ 1999:09:27 16:05:06
+ Microsoft Word
+ false
+ 1.1
+ 17
Acrobat PDFWriter 2.0 for Macintosh
- 1.1
- Applications for Bibliometric Research in the Emerging Digital Libraries
- 644
- pdf03.pdf
- /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import
- Microsoft Word
- 35935
- 2013:06:28 18:27:50+12:00
- Sally Jo Cunningham
- 17
- Bronwyn
- PDF
- false
- 1999:09:27 16:05:06
biblio_for_dl_scientometrics.do
- 8.57
- application/pdf
HASH019c5dca7f5bb781460a6b9c
- 1372400870
- 20130628
- 1373000552
- 20130705
+ 1375428646
+ 20130802
+ 1375688850
+ 20130805
HASH019c5dca.dir
doc.pdf:application/pdf:
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH07915444.dir/doc.xml
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH07915444.dir/doc.xml (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH07915444.dir/doc.xml (revision 27981)
@@ -8,8 +8,8 @@
utf8
Authorship patterns in Information Systems
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/rtf01.html
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/rtf01.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/rtf01.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/rtf01.html
import/rtf01.rtf
- tmp/1373000552/rtf01.html
+ tmp/1375688850/rtf01.html
rtf01.html
rtf01.rtf
@@ -25,8 +25,8 @@
Stuart M. Dillon
HASH079154443e2ecce7bb4208
- 1372400870
- 20130628
- 1373000552
- 20130705
+ 1375428646
+ 20130802
+ 1375688850
+ 20130805
HASH07915444.dir
doc.rtf:application/rtf:
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH1a9cea0f.dir/doc.xml
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH1a9cea0f.dir/doc.xml (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH1a9cea0f.dir/doc.xml (revision 27981)
@@ -9,8 +9,8 @@
Bronwyn
Greenstone: A Comprehensive Open-Source Digital Library Software...
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/pdf01.html
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/pdf01.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688849/pdf01.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688849/pdf01.html
import/pdf01.pdf
- tmp/1373000551/pdf01.html
+ tmp/1375688849/pdf01.html
pdf01.html
pdf01.pdf
@@ -24,29 +24,29 @@
doc.pdf
9
- Acrobat PDFWriter 4.0 for Power Macintosh
- 1.2
- Greenstone: A comprehensive open-source digital library software system
- 644
- pdf01.pdf
- /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import
- Microsoft Word
- 269487
- 2013:06:28 18:27:50+12:00
Ian H. Witten
Rodger J. McNab
Stefan J. Boddie
David Bainbridge
+ Greenstone: A comprehensive open-source digital library software system
+ 8.57
+ /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/import
+ 2013:08:02 19:30:46+12:00
+ pdf01.pdf
+ 644
+ 269487
+ PDF
+ application/pdf
Bronwyn
+ 2000:03:02 15:21:24
+ Microsoft Word
+ false
+ 1.2
9
- PDF
- 2000:03:02 15:21:24
- false
- 8.57
- application/pdf
+ Acrobat PDFWriter 4.0 for Power Macintosh
HASH1a9cea0f239f754007681b
- 1372400870
- 20130628
- 1373000551
- 20130705
+ 1375428646
+ 20130802
+ 1375688850
+ 20130805
HASH1a9cea0f.dir
pdf01-2_1.jpg:image/jpeg:
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH8bbe6da0.dir/doc.xml
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH8bbe6da0.dir/doc.xml (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASH8bbe6da0.dir/doc.xml (revision 27981)
@@ -9,5 +9,5 @@
Bronwyn; page: 1 of 1 1 Using language models for generic entity extraction
import/langmodl.ps
- tmp/1373000551/langmodl.text
+ tmp/1375688849/langmodl.text
langmodl.text
langmodl.ps
@@ -25,8 +25,8 @@
W.J. Teahan
HASH8bbe6da0374b413b1b355c
- 1372400870
- 20130628
- 1373000551
- 20130705
+ 1375428646
+ 20130802
+ 1375688849
+ 20130805
HASH8bbe6da0.dir
doc.ps:application/postscript:
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2992e.dir/doc.xml
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2992e.dir/doc.xml (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2992e.dir/doc.xml (revision 27981)
@@ -9,8 +9,8 @@
wvWare/wvWare version 1.2.4
1997-00 Listing of Working Papers
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/word01.html
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/word01.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/word01.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/word01.html
import/word01.doc
- tmp/1373000552/word01.html
+ tmp/1375688850/word01.html
word01.html
word01.doc
@@ -24,8 +24,8 @@
doc.doc
HASHeaa2992e081949673150f3
- 1372400870
- 20130628
- 1373000552
- 20130705
+ 1375428646
+ 20130802
+ 1375688851
+ 20130805
HASHeaa2992e.dir
doc.doc:application/msword:
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa29d2e.dir/doc.xml
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa29d2e.dir/doc.xml (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa29d2e.dir/doc.xml (revision 27981)
@@ -9,8 +9,8 @@
wvWare/wvWare version 1.2.4
Greenstone: A Comprehensive Open-Source
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/word03.html
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/word03.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/word03.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/word03.html
import/word03.doc
- tmp/1373000552/word03.html
+ tmp/1375688851/word03.html
word03.html
word03.doc
@@ -29,8 +29,8 @@
Greenstone: A comprehensive open-source digital library software system
HASHeaa29d2e081149673150f3
- 1372400870
- 20130628
- 1373000553
- 20130705
+ 1375428646
+ 20130802
+ 1375688851
+ 20130805
HASHeaa29d2e.dir
word030.png:image/png:
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2a12e.dir/doc.xml
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2a12e.dir/doc.xml (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2a12e.dir/doc.xml (revision 27981)
@@ -9,8 +9,8 @@
wvWare/wvWare version 1.2.4
How to build your own digital library with Greenstone
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000553/word05.html
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000553/word05.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/word05.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/word05.html
import/word05.doc
- tmp/1373000553/word05.html
+ tmp/1375688851/word05.html
word05.html
word05.doc
@@ -27,8 +27,8 @@
GREENSTONE DIGITAL LIBRARY INSTALLERâS GUIDE
HASHeaa2a12e080949673150f3
- 1372400870
- 20130628
- 1373000553
- 20130705
+ 1375428646
+ 20130802
+ 1375688852
+ 20130805
HASHeaa2a12e.dir
word050.wmf:unknown:
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2a32e.dir/doc.xml
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2a32e.dir/doc.xml (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2a32e.dir/doc.xml (revision 27981)
@@ -9,8 +9,8 @@
wvWare/wvWare version 1.2.4
Evolving Tool Support for Digital Librarians
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000554/word06.html
- http://research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000554/word06.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688852/word06.html
+ http://research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688852/word06.html
import/word06.doc
- tmp/1373000554/word06.html
+ tmp/1375688852/word06.html
word06.html
word06.doc
@@ -27,8 +27,8 @@
COMPUTATIONAL SENSE: THE ROLE OF TECHNOLOGY IN THE EDUCATION OF DIGITAL LIBRARIANS
HASHeaa2a32e080549673150f3
- 1372400870
- 20130628
- 1373000554
- 20130705
+ 1375428646
+ 20130802
+ 1375688852
+ 20130805
HASHeaa2a32e.dir
doc.doc:application/msword:
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/earliestDatestamp
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/earliestDatestamp (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/earliestDatestamp (revision 27981)
@@ -1,1 +1,1 @@
-1373000550
+1375688848
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/index/build.cfg
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/index/build.cfg (revision 27980)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/index/build.cfg (revision 27981)
@@ -1,5 +1,5 @@
-builddate 1373000556
+builddate 1375688854
buildtype mgpp
-earliestdatestamp 1373000550
+earliestdatestamp 1375688848
indexfieldmap text->TX dc.Title,ex.dc.Title,Title->TI dc.Creator->CR
indexfields text dc.Title,ex.dc.Title,Title dc.Creator
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372400872243.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372400872243.txt (revision 27980)
+++ (revision )
@@ -1,137 +1,0 @@
-s
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-import.pl> Global file scan checking directory: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400873/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400873/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400873/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400873/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400873/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400873/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400874/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400875/vh40.gif to vh40.gif
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400876/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400876/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400876/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400876/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372400876/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2ce671f3.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2a3afe7a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2ce671f3.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2a3afe7a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;Source; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2ce671f3.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2a3afe7a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;Source;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;Source;: 326938
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2ce671f3.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2a3afe7a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;Source;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;Source;: 326938
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2ce671f3.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH2a3afe7a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372401612710.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372401612710.txt (revision 27980)
+++ (revision )
@@ -1,140 +1,0 @@
-s
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401613/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401613/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401613/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401614/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401614/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401614/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401614/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401614/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401614/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401614/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401614/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401614/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401615/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401616/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401616/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401616/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401616/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401616/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS286bin_26Jun2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401617/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401617/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401617/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401617/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401617/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011178d4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee015.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011178d4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee015.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;Source; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011178d4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee015.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;Source;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;Source;: 327283
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011178d4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee015.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;Source;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;Source;: 327283
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011178d4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee015.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372401998212.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372401998212.txt (revision 27980)
+++ (revision )
@@ -1,140 +1,0 @@
-s
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401999/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401999/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401999/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401999/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401999/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401999/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401999/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401999/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372401999/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402000/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402001/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS286bin_26Jun2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402002/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402002/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402002/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402002/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402002/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011a1dd4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011a1dd4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011a1dd4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011a1dd4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011a1dd4.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372402009444.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372402009444.txt (revision 27980)
+++ (revision )
@@ -1,140 +1,0 @@
-s
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402010/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402010/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402010/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402011/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402012/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402012/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402012/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402012/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402012/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS286bin_26Jun2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402013/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011115d4.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011115d4.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011115d4.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011115d4.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASHbc2ee10f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0f55374a.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH014d6653.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1e8bdd2b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH017ebea0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH0194429e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH01b5ae76.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHfe0860a0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH011115d4.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372402379223.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372402379223.txt (revision 27980)
+++ (revision )
@@ -1,141 +1,0 @@
-s
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402380/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402380/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402380/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402380/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402380/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402380/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402380/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402380/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402380/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402381/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402382/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS286bin_26Jun2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402383/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402383/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402383/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402383/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/tmp/1372402383/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect wordpdfb
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/wordpdfb/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372402986474.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372402986474.txt (revision 27980)
+++ (revision )
@@ -1,141 +1,0 @@
-s
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect Word-PDF-Basic
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402987/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402987/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402987/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/err.log" -output text "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402988/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402989/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS286bin_26Jun2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402990/err.log" -output html "/research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402990/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402990/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402990/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/tmp/1372402990/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS286bin_26Jun2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS286bin_26Jun2013/collect Word-PDF-Basic
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS286bin_26Jun2013/collect/Word-PDF-Basic/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910152660.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910152660.txt (revision 27980)
+++ (revision )
@@ -1,216 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01-2_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-2_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-3_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-3_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-4_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-4_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-5_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-5_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-7_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-7_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-8_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-8_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910154/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910154/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910154/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910155/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910155/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910155/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> ImagePlugin processing pdf01-2_1.jpg
-import.pl> Converting image pdf01-2_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-2_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-3_1.jpg
-import.pl> Converting image pdf01-3_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-3_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-4_1.jpg
-import.pl> Converting image pdf01-4_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-4_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-5_1.jpg
-import.pl> Converting image pdf01-5_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-5_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-7_1.jpg
-import.pl> Converting image pdf01-7_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-7_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-8_1.jpg
-import.pl> Converting image pdf01-8_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-8_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> WARNING: No plugin could recognise pdf01.html
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910157/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910158/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_4Jul2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910159/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 16 documents were considered for processing
-import.pl> * 15 were processed and included in the collection
-import.pl> * 1 was unrecognised
-import.pl> See /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/etc/fail.log for a list of unrecognised and/or rejected documents
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text: 745126
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text: 745126
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327833
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327833
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910427066.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910427066.txt (revision 27980)
+++ (revision )
@@ -1,216 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01-2_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-2_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-3_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-3_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-4_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-4_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-5_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-5_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-7_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-7_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-8_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-8_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910428/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910428/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910428/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910429/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910429/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910429/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> ImagePlugin processing pdf01-2_1.jpg
-import.pl> Converting image pdf01-2_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-2_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-3_1.jpg
-import.pl> Converting image pdf01-3_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-3_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-4_1.jpg
-import.pl> Converting image pdf01-4_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-4_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-5_1.jpg
-import.pl> Converting image pdf01-5_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-5_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-7_1.jpg
-import.pl> Converting image pdf01-7_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-7_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-8_1.jpg
-import.pl> Converting image pdf01-8_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-8_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> WARNING: No plugin could recognise pdf01.html
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910431/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910432/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_4Jul2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910433/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910433/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910433/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910433/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910433/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 16 documents were considered for processing
-import.pl> * 15 were processed and included in the collection
-import.pl> * 1 was unrecognised
-import.pl> See /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/etc/fail.log for a list of unrecognised and/or rejected documents
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text: 745126
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text: 745126
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327833
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327833
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910460105.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910460105.txt (revision 27980)
+++ (revision )
@@ -1,216 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01-2_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-2_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-3_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-3_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-4_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-4_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-5_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-5_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-7_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-7_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-8_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-8_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910461/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910461/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910461/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910462/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910462/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910462/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> ImagePlugin processing pdf01-2_1.jpg
-import.pl> Converting image pdf01-2_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-2_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-3_1.jpg
-import.pl> Converting image pdf01-3_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-3_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-4_1.jpg
-import.pl> Converting image pdf01-4_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-4_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-5_1.jpg
-import.pl> Converting image pdf01-5_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-5_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-7_1.jpg
-import.pl> Converting image pdf01-7_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-7_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> ImagePlugin processing pdf01-8_1.jpg
-import.pl> Converting image pdf01-8_1.jpg to: THUMB gif ...
-import.pl> ...done
-import.pl> Converting image pdf01-8_1.jpg to: SCREEN jpeg ...
-import.pl> ...done
-import.pl> WARNING: No plugin could recognise pdf01.html
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910464/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910465/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_4Jul2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910466/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 16 documents were considered for processing
-import.pl> * 15 were processed and included in the collection
-import.pl> * 1 was unrecognised
-import.pl> See /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/etc/fail.log for a list of unrecognised and/or rejected documents
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text: 745126
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text: 745126
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327833
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 745111
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327833
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c408b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018c89ab.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH018d1beb.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8c651b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cae3b1b.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8cf75b1c.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910566465.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910566465.txt (revision 27980)
+++ (revision )
@@ -1,162 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01-2_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-2_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-3_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-3_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-4_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-4_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-5_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-5_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-7_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-7_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01-8_1.jpg
-import.pl> Extracted 19 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01-8_1.jpg EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910568/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910568/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910568/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910568/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910568/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910568/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> WARNING: No plugin could recognise pdf01-2_1.jpg
-import.pl> WARNING: No plugin could recognise pdf01-3_1.jpg
-import.pl> WARNING: No plugin could recognise pdf01-4_1.jpg
-import.pl> WARNING: No plugin could recognise pdf01-5_1.jpg
-import.pl> WARNING: No plugin could recognise pdf01-7_1.jpg
-import.pl> WARNING: No plugin could recognise pdf01-8_1.jpg
-import.pl> WARNING: No plugin could recognise pdf01.html
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910569/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910570/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_4Jul2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910571/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910571/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910571/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910571/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910571/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 16 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> * 7 were unrecognised
-import.pl> See /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/etc/fail.log for a list of unrecognised and/or rejected documents
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910584680.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910584680.txt (revision 27980)
+++ (revision )
@@ -1,141 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910585/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910585/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910585/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910586/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910587/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_4Jul2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910588/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910588/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910588/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910588/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910588/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910598088.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910598088.txt (revision 27980)
+++ (revision )
@@ -1,141 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910599/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910599/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910599/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910599/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910599/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910599/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910599/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910599/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910599/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910600/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_4Jul2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910601/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910758337.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910758337.txt (revision 27980)
+++ (revision )
@@ -1,139 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910759/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910759/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910759/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910759/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910759/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910759/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910760/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910761/vh40.gif to vh40.gif
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910762/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910762/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910762/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910762/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910762/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 326760
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 326760
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910935241.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372910935241.txt (revision 27980)
+++ (revision )
@@ -1,139 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910936/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910936/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910936/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910936/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910936/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910936/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910936/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910936/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910936/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910937/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910938/vh40.gif to vh40.gif
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910939/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910939/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910939/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910939/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372910939/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 326760
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 326760
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372911032346.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372911032346.txt (revision 27980)
+++ (revision )
@@ -1,141 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911033/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911033/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911033/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911033/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911033/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911033/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911034/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911035/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_4Jul2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911036/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911036/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911036/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911036/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911036/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372911588707.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372911588707.txt (revision 27980)
+++ (revision )
@@ -1,141 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911589/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911589/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911589/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911590/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911591/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_4Jul2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911592/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911592/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911592/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911592/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911592/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372911609747.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1372911609747.txt (revision 27980)
+++ (revision )
@@ -1,141 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting cluster.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911610/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911610/cluster.ps"
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911610/cluster.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/cluster.ps"
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/err.log" -output text "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911611/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911612/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_4Jul2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/err.log" -output html "/research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/tmp/1372911613/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 9 documents were considered for processing
-import.pl> * 9 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_4Jul2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_4Jul2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text: 744964
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 744955
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 327503
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_4Jul2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH015936f5.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1373000223047.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1373000223047.txt (revision 27980)
+++ (revision )
@@ -1,132 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_5July2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_5July2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/err.log" -output text "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000224/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000225/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_5July2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000226/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000226/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000226/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000226/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000226/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 8 documents were considered for processing
-import.pl> * 8 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_5July2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_5July2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text: 717794
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text: 717794
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1373000527360.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1373000527360.txt (revision 27980)
+++ (revision )
@@ -1,132 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_5July2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_5July2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000528/err.log" -output text "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000528/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000528/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000528/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000528/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000528/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000528/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000528/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000528/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000529/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000530/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000530/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000530/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000530/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000530/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_5July2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000531/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000531/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000531/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000531/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000531/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 8 documents were considered for processing
-import.pl> * 8 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_5July2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_5July2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text: 717794
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text: 717794
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: her-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1373000550311.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1373000550311.txt (revision 27980)
+++ (revision )
@@ -1,132 +1,0 @@
-s
-Command: perl -S /research/ak19/GS2bin_5July2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_5July2013/collect Word-PDF-Formatting
-import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
-import.pl> Removing current contents of the archives directory...
-import.pl> Removing contents of the collection "tmp" directory...
-import.pl> Global file scan checking directory: /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import
-import.pl> MetadataXMLPlugin: processing metadata.xml
-import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
-import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
-import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
-import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
-import.pl> Converting langmodl.ps to text format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/err.log" -output text "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/langmodl.ps"
-import.pl> Warning: Error executing gs: couldn't run.
-import.pl> Stripping text from postscript
-import.pl> TextPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/langmodl.text
-import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/import/langmodl.ps"
-import.pl> Converting pdf01.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/pdf01.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/pdf01.html
-import.pl> Converting pdf03.pdf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/pdf03.pdf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000551/pdf03.html
-import.pl> Converting rtf01.rtf to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/rtf01.rtf"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/rtf01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/rtf011.gif to rtf011.gif
-import.pl> Converting word01.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/word01.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/word01.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/vh40.gif to vh40.gif
-import.pl> Converting word03.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/word03.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/word03.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000552/vh40.gif to vh40.gif
-import.pl> Converting word05.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000553/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000553/word05.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000553/word05.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000553/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000553/vh40.gif to vh40.gif
-import.pl> Wide character in print at /research/ak19/GS2bin_5July2013/perllib/plugouts/BasePlugout.pm line 899.
-import.pl> Converting word06.doc to html format
-import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000554/err.log" -output html "/research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000554/word06.doc"
-import.pl> HTMLPlugin processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000554/word06.html
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000554/wvSmall.gif to wvSmall.gif
-import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/tmp/1373000554/vh40.gif to vh40.gif
-import.pl> *********************************************
-import.pl> Import complete
-import.pl> *********************************************
-import.pl> * 8 documents were considered for processing
-import.pl> * 8 were processed and included in the collection
-import.pl> Command complete.
-import.pl> Extracting new metadata from archive files.
-import.pl> Archived metadata extraction complete.
-Command: perl -S /research/ak19/GS2bin_5July2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_5July2013/collect Word-PDF-Formatting
-buildcol.pl> *** creating the compressed text
-buildcol.pl> collecting text statistics (mgpp_passes -T1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text: 717794
-buildcol.pl> creating the compression dictionary
-buildcol.pl> compressing the text (mgpp_passes -T2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Compressing text from text)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text: 717794
-buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
-buildcol.pl> creating index dictionary (mgpp_passes -I1)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
-buildcol.pl> inverting the text (mgpp_passes -I2)
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
-buildcol.pl> Total bytes in collection: 717786
-buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
-buildcol.pl> create the weights file
-buildcol.pl> creating 'on-disk' stemmed dictionary
-buildcol.pl> creating stem indexes
-buildcol.pl> BuildDir: /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/building
-buildcol.pl> *** creating the info database and processing associated files
-buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5July2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
-buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
-buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
-buildcol.pl> *** outputting information for classifier: CL1
-buildcol.pl> *** outputting information for classifier: CL2
-buildcol.pl> *** outputting information for classifier: oai
-buildcol.pl> *** creating auxiliary files
-buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
-buildcol.pl> Command complete.
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1375429123824.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1375429123824.txt (revision 27981)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1375429123824.txt (revision 27981)
@@ -0,0 +1,134 @@
+s
+Command: perl -S /research/ak19/GS2bin_1Aug2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_1Aug2013/collect Word-PDF-Formatting
+import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
+import.pl> AutoLoadConverters: PDFBox Extension to Greenstone detected for PDFPlugin
+import.pl> Removing current contents of the archives directory...
+import.pl> Removing contents of the collection "tmp" directory...
+import.pl> Global file scan checking directory: /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/import
+import.pl> MetadataXMLPlugin: processing metadata.xml
+import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
+import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
+import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
+import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
+import.pl> Converting langmodl.ps to text format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429124/err.log" -output text "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429124/langmodl.ps"
+import.pl> Warning: Error executing gs: couldn't run.
+import.pl> Stripping text from postscript
+import.pl> TextPlugin processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429124/langmodl.text
+import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/import/langmodl.ps"
+import.pl> Converting pdf01.pdf to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/err.log" -output html "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/pdf01.pdf"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/pdf01.html
+import.pl> Converting pdf03.pdf to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/err.log" -output html "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/pdf03.pdf"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/pdf03.html
+import.pl> Converting rtf01.rtf to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/err.log" -output html "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/rtf01.rtf"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/rtf01.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429125/rtf011.gif to rtf011.gif
+import.pl> Converting word01.doc to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/err.log" -output html "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/word01.doc"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/word01.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/wvSmall.gif to wvSmall.gif
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/vh40.gif to vh40.gif
+import.pl> Converting word03.doc to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/err.log" -output html "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/word03.doc"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/word03.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/wvSmall.gif to wvSmall.gif
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429126/vh40.gif to vh40.gif
+import.pl> Converting word05.doc to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/err.log" -output html "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/word05.doc"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/word05.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/wvSmall.gif to wvSmall.gif
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/vh40.gif to vh40.gif
+import.pl> Wide character in print at /research/ak19/GS2bin_1Aug2013/perllib/plugouts/BasePlugout.pm line 946.
+import.pl> Converting word06.doc to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/err.log" -output html "/research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/word06.doc"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/word06.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/wvSmall.gif to wvSmall.gif
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/tmp/1375429127/vh40.gif to vh40.gif
+import.pl> *********************************************
+import.pl> Import complete
+import.pl> *********************************************
+import.pl> * 8 documents were considered for processing
+import.pl> * 8 were processed and included in the collection
+import.pl> Command complete.
+import.pl> Extracting new metadata from archive files.
+import.pl> Archived metadata extraction complete.
+Command: perl -S /research/ak19/GS2bin_1Aug2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_1Aug2013/collect Word-PDF-Formatting
+buildcol.pl> AutoLoadConverters: PDFBox Extension to Greenstone detected for PDFPlugin
+buildcol.pl> *** creating the compressed text
+buildcol.pl> collecting text statistics (mgpp_passes -T1)
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> Stats (Compressing text from text)
+buildcol.pl> Total bytes in collection: 717786
+buildcol.pl> Total bytes in text: 717794
+buildcol.pl> creating the compression dictionary
+buildcol.pl> compressing the text (mgpp_passes -T2)
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> Stats (Compressing text from text)
+buildcol.pl> Total bytes in collection: 717786
+buildcol.pl> Total bytes in text: 717794
+buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
+buildcol.pl> creating index dictionary (mgpp_passes -I1)
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
+buildcol.pl> Total bytes in collection: 717786
+buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
+buildcol.pl> inverting the text (mgpp_passes -I2)
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
+buildcol.pl> Total bytes in collection: 717786
+buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
+buildcol.pl> create the weights file
+buildcol.pl> creating 'on-disk' stemmed dictionary
+buildcol.pl> creating stem indexes
+buildcol.pl> BuildDir: /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/building
+buildcol.pl> *** creating the info database and processing associated files
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_1Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> *** outputting information for classifier: CL1
+buildcol.pl> *** outputting information for classifier: CL2
+buildcol.pl> *** outputting information for classifier: oai
+buildcol.pl> *** creating auxiliary files
+buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
+buildcol.pl> Command complete.
Index: /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1375688848609.txt
===================================================================
--- /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1375688848609.txt (revision 27981)
+++ /other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/log/build_log.1375688848609.txt (revision 27981)
@@ -0,0 +1,132 @@
+s
+Command: perl -S /research/ak19/GS2bin_5Aug2013/bin/script/full-import.pl -gli -language en -collectdir /research/ak19/GS2bin_5Aug2013/collect Word-PDF-Formatting
+import.pl> Detected -sortmeta. To effect the stipulated sorting by metadata (or OID) remember this option should be paired with either the '-reversesort' or '-sort' option to ArchivesInfPlugin.
+import.pl> Removing current contents of the archives directory...
+import.pl> Removing contents of the collection "tmp" directory...
+import.pl> Global file scan checking directory: /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/import
+import.pl> MetadataXMLPlugin: processing metadata.xml
+import.pl> EmbeddedMetadataPlugin: processing pdf01.pdf
+import.pl> Extracted 15 pieces of metadata from /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/import/pdf01.pdf EXIF block
+import.pl> EmbeddedMetadataPlugin: processing pdf03.pdf
+import.pl> Extracted 16 pieces of metadata from /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/import/pdf03.pdf EXIF block
+import.pl> Converting langmodl.ps to text format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688849/err.log" -output text "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688849/langmodl.ps"
+import.pl> Warning: Error executing gs: couldn't run.
+import.pl> Stripping text from postscript
+import.pl> TextPlugin processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688849/langmodl.text
+import.pl> PostScriptPlugin: extracting PostScript metadata from "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/import/langmodl.ps"
+import.pl> Converting pdf01.pdf to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688849/err.log" -output html "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688849/pdf01.pdf"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688849/pdf01.html
+import.pl> Converting pdf03.pdf to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -pdf_zoom 2 -errlog "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/err.log" -output html "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/pdf03.pdf"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/pdf03.html
+import.pl> Converting rtf01.rtf to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/err.log" -output html "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/rtf01.rtf"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/rtf01.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/rtf011.gif to rtf011.gif
+import.pl> Converting word01.doc to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/err.log" -output html "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/word01.doc"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/word01.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/wvSmall.gif to wvSmall.gif
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688850/vh40.gif to vh40.gif
+import.pl> Converting word03.doc to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/err.log" -output html "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/word03.doc"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/word03.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/wvSmall.gif to wvSmall.gif
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/vh40.gif to vh40.gif
+import.pl> Converting word05.doc to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/err.log" -output html "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/word05.doc"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/word05.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/wvSmall.gif to wvSmall.gif
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688851/vh40.gif to vh40.gif
+import.pl> Wide character in print at /research/ak19/GS2bin_5Aug2013/perllib/plugouts/BasePlugout.pm line 946.
+import.pl> Converting word06.doc to html format
+import.pl> calling cmd "/usr/bin/perl" -S gsConvert.pl -verbose 2 -errlog "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688852/err.log" -output html "/research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688852/word06.doc"
+import.pl> HTMLPlugin processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688852/word06.html
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688852/wvSmall.gif to wvSmall.gif
+import.pl> BasePlugout::process couldn't copy the associated file /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/tmp/1375688852/vh40.gif to vh40.gif
+import.pl> *********************************************
+import.pl> Import complete
+import.pl> *********************************************
+import.pl> * 8 documents were considered for processing
+import.pl> * 8 were processed and included in the collection
+import.pl> Command complete.
+import.pl> Extracting new metadata from archive files.
+import.pl> Archived metadata extraction complete.
+Command: perl -S /research/ak19/GS2bin_5Aug2013/bin/script/full-buildcol.pl -gli -language en -collectdir /research/ak19/GS2bin_5Aug2013/collect Word-PDF-Formatting
+buildcol.pl> *** creating the compressed text
+buildcol.pl> collecting text statistics (mgpp_passes -T1)
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> Stats (Compressing text from text)
+buildcol.pl> Total bytes in collection: 717786
+buildcol.pl> Total bytes in text: 717794
+buildcol.pl> creating the compression dictionary
+buildcol.pl> compressing the text (mgpp_passes -T2)
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> Stats (Compressing text from text)
+buildcol.pl> Total bytes in collection: 717786
+buildcol.pl> Total bytes in text: 717794
+buildcol.pl> *** building index text;dc.Title,ex.dc.Title,Title;dc.Creator; in subdirectory idx
+buildcol.pl> creating index dictionary (mgpp_passes -I1)
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
+buildcol.pl> Total bytes in collection: 717786
+buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
+buildcol.pl> inverting the text (mgpp_passes -I2)
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> Stats (Creating index text;dc.Title,ex.dc.Title,Title;dc.Creator;)
+buildcol.pl> Total bytes in collection: 717786
+buildcol.pl> Total bytes in text;dc.Title,ex.dc.Title,Title;dc.Creator;: 300222
+buildcol.pl> create the weights file
+buildcol.pl> creating 'on-disk' stemmed dictionary
+buildcol.pl> creating stem indexes
+buildcol.pl> BuildDir: /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/building
+buildcol.pl> *** creating the info database and processing associated files
+buildcol.pl> ArchivesInfPlugin: processing /research/ak19/GS2bin_5Aug2013/collect/Word-PDF-Formatting/archives/archiveinf-doc.gdb
+buildcol.pl> GreenstoneXMLPlugin: processing HASH019c5dca.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH07915444.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH1a9cea0f.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASH8bbe6da0.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2992e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa29d2e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a12e.dir/doc.xml
+buildcol.pl> GreenstoneXMLPlugin: processing HASHeaa2a32e.dir/doc.xml
+buildcol.pl> *** outputting information for classifier: CL1
+buildcol.pl> *** outputting information for classifier: CL2
+buildcol.pl> *** outputting information for classifier: oai
+buildcol.pl> *** creating auxiliary files
+buildcol.pl> Copying rss-items.rdf file from archives to building (eventually to index)
+buildcol.pl> Command complete.