Last change
on this file since 28923 was 28923, checked in by ak19, 10 years ago |
Tidy up and changes resulting from UTF8 encoding issues
|
-
Property svn:executable
set to
*
|
File size:
980 bytes
|
Line | |
---|
1 | #!/bin/bash
|
---|
2 |
|
---|
3 | full_filename=NZDataFull.xml
|
---|
4 | full_filename_utf8=NZDataFull-UTF8.xml
|
---|
5 |
|
---|
6 | if [ ! -f $full_filename ] ; then
|
---|
7 | echo "Did not detect uncompressed MARC-XML file '$full_filename'"
|
---|
8 | echo " => Including UTF-8 character encoding XML processing instruction at start"
|
---|
9 | echo "<?xml version=\"1.0\" encoding=\"utf-8\"?>" > $full_filename
|
---|
10 | echo "<collection xmlns=\"http://www.loc.gov/MARC21/slim\">" >> $full_filename
|
---|
11 |
|
---|
12 | echo " => Appending uncompressed data to '$full_filename' ..."
|
---|
13 | gzip -d --stdout pubsnzmetadata.xml.gz >> $full_filename
|
---|
14 | echo "</collection>" >> $full_filename
|
---|
15 | echo " => ... Done"
|
---|
16 | fi
|
---|
17 |
|
---|
18 | if [ ! -f $full_filename_utf8 ] ; then
|
---|
19 |
|
---|
20 | echo "Fixing 'alien' character encodings issues within a UTF-8 file"
|
---|
21 |
|
---|
22 | java -cp UTF8_Fix/bin UTF8Fix "$full_filename" "$full_filename_utf8"
|
---|
23 |
|
---|
24 | fi
|
---|
25 |
|
---|
26 | echo "Splitting '$full_filename', this may take some time ..."
|
---|
27 | java -classpath marcXML_Split/lib/marc4j-2.6.0.jar:marcXML_Split/lib/guava-15.0.jar:marcXML_Split/bin split $*
|
---|
28 |
|
---|
Note:
See
TracBrowser
for help on using the repository browser.