source: main/trunk/model-sites-dev/von-sparql/collect/nz-natlib-cat/pre-import/RUN_NZ.sh@ 28923

Last change on this file since 28923 was 28923, checked in by ak19, 10 years ago

Tidy up and changes resulting from UTF8 encoding issues

  • Property svn:executable set to *
File size: 980 bytes
Line 
1#!/bin/bash
2
3full_filename=NZDataFull.xml
4full_filename_utf8=NZDataFull-UTF8.xml
5
6if [ ! -f $full_filename ] ; then
7 echo "Did not detect uncompressed MARC-XML file '$full_filename'"
8 echo " => Including UTF-8 character encoding XML processing instruction at start"
9 echo "<?xml version=\"1.0\" encoding=\"utf-8\"?>" > $full_filename
10 echo "<collection xmlns=\"http://www.loc.gov/MARC21/slim\">" >> $full_filename
11
12 echo " => Appending uncompressed data to '$full_filename' ..."
13 gzip -d --stdout pubsnzmetadata.xml.gz >> $full_filename
14 echo "</collection>" >> $full_filename
15 echo " => ... Done"
16fi
17
18if [ ! -f $full_filename_utf8 ] ; then
19
20 echo "Fixing 'alien' character encodings issues within a UTF-8 file"
21
22 java -cp UTF8_Fix/bin UTF8Fix "$full_filename" "$full_filename_utf8"
23
24fi
25
26echo "Splitting '$full_filename', this may take some time ..."
27java -classpath marcXML_Split/lib/marc4j-2.6.0.jar:marcXML_Split/lib/guava-15.0.jar:marcXML_Split/bin split $*
28
Note: See TracBrowser for help on using the repository browser.