Last change
on this file was 32965, checked in by davidb, 5 years ago |
Further changes after test-run
|
-
Property svn:executable
set to
*
|
File size:
674 bytes
|
Line | |
---|
1 | #!/bin/bash
|
---|
2 |
|
---|
3 | input=${1:-'hathi_full_20190301.txt.gz'}
|
---|
4 | output=${2:-'hathi_brief_20190301.txt'}
|
---|
5 |
|
---|
6 | echo ""
|
---|
7 | echo "===="
|
---|
8 | echo " Script to extract Format (and related fields, such as copyright)"
|
---|
9 | echo " from HathiTrust tab-delimited metadata dump"
|
---|
10 | echo "===="
|
---|
11 |
|
---|
12 | echo ""
|
---|
13 | echo "Reading in : $input"
|
---|
14 | echo "Writing out : $output"
|
---|
15 | echo ""
|
---|
16 |
|
---|
17 | echo "Processing ..."
|
---|
18 | zcat "$input" \
|
---|
19 | | awk -F '\t' '{print $1 "\t" $3 "\t" $20 "\t" $24} ' \
|
---|
20 | > "$output"
|
---|
21 |
|
---|
22 | echo "... Done"
|
---|
23 | echo ""
|
---|
24 |
|
---|
25 | echo "===="
|
---|
26 | echo " Next, extract entried that are Music Format, Public Domain and"
|
---|
27 | echo " NOT scanned by Google (so called 'open-open' files):"
|
---|
28 | echo " ./HATHI-EXTRACT-PD-NON-GOOGLE.sh"
|
---|
29 | echo "===="
|
---|
30 |
|
---|
Note:
See
TracBrowser
for help on using the repository browser.