root/other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh @ 28037

Revision 28037, 11.9 KB (checked in by ak19, 6 years ago)

The script to generate the MODEL collections now also handles the svn rm and svn add if passed the svndelete flag, and just the copying over of the rebuilt archives and index into the model-collect (svn) folder if passed in the svnupdate flag.

Line 
1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
15# that you want to generate the model collections with.
16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
18
19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
22
23# Pass in --svndelete to remove what's in archives and index from svn and replace
24# the contents of archives and index with the matching contents from the rebuild
25# collection. Useful for when the HASH directory naming has changed and everything
26# in archives and index has to be wiped out and moved back in from the rebuilt col.
27
28# If neither is passed in, then the collections are rebuilt but the svn model-collect
29# is not updated and the repository is not updated.
30
31# This program leaves you to do the final commit on the (svn) model-collect folder!
32
33# Examples of usage:
34# ./gen-model-colls.sh
35# ./gen-model-colls.sh --svndelete
36# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
37
38# The first just rebuilds all the collections in a new folder called collect and stops there
39# The second rebuilds all the collections and then removes all contents of the archives and the
40# index folders from the svn checked-out model-collect and removes them from svn. Then it copies
41# across all the contents of the rebuilt archives and index into model-collect and svn adds them.
42# The third example checks out all the model-collections again, but rebuilds only the 3 collections
43# specified in the new collect folder. Then it copies across the contents of the archives and index
44# folders of those 3 collections into their model-collect equivalents.
45
46# You then still have to do the final svn commit on the model-collect folder.
47
48# Also valid examples:
49# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
50# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
51# ./gen-model-colls.sh --svnupdate
52
53# PSEUDOCODE
54# This script:
55# Checks out the model-collections folder from SVN
56# Makes a copy
57# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
58# If --svndelete was passed in: svn removes model-collect/archives/* and model-collect/index/*, copies over index/*
59# and archives/* from collect into model-collect and svn adds model-collect/archives/* and model-collect/index/*
60# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
61# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
62# --svnupdate will leave untouched any files and folders unique to model-collect.
63
64# svn update/delete a single collection
65function update_single_collection () {
66    collection=$1
67    mode=$2
68
69    #escape the filename (in case of space)
70    collection=`echo $collection | sed 's@ @\\\ @g'`
71   
72    #get just the basename
73    collection=`basename $collection`
74
75    if [ ! -e model-collect/$collection ]; then
76    echo "$collection does not exist in model-collect, commit separately"
77    return;
78    fi
79
80    # diff the svn model and rebuilt model collections
81    diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
82#    echo "Diff result for collection $collection: $diff_result"
83
84    # if no differences in the current collection, then we're done
85    if [ "x$diff_result" == "x" ]; then
86    echo "No differences in collection $collection"
87    return;
88    fi
89
90    # check that none of the lines mention files outside the archives or index folders
91    # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
92    # http://forums.gentoo.org/viewtopic-p-3130541.html
93
94    # http://wi-fizzle.com/article/276
95    # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
96    # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
97    # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
98
99    # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
100
101    IFS_BAK=$IFS
102#    IFS='\n' # splits on all whitespace
103IFS='
104'
105    # in the lines returned from the diff, test for archives or newline
106    # http://stackoverflow.com/questions/229551/string-contains-in-bash
107    for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
108    # echo "LINE: $line"   
109    if [[ "$line" != *archives* && "$line" != *index* ]]; then
110        # the file that is different is neither in index nor in archives, send this diffline to the report
111        echo $line >> report.txt
112    fi
113    done
114 
115    IFS=$IFS_BAK
116    IFS_BAK=
117
118    # Now svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
119    # see # http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
120 
121    if [ "$mode" == "svndelete" ]; then
122    svn rm model-collect/$collection/archives/*
123    svn rm model-collect/$collection/index/*
124    fi
125
126    # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
127    cp -r collect/$collection/archives/* model-collect/$collection/archives/.
128    cp -r collect/$collection/index/* model-collect/$collection/index/.
129
130    if [ "$mode" == "svndelete" ]; then
131    svn add model-collect/$collection/archives/*
132    svn add model-collect/$collection/index/*
133    fi
134
135    # To undo the changes made by svndelete, run the following manually
136    # svn revert --depth infinity <model-collect/$collection/archives/*
137    # svn revert --depth infinity <model-collect/$collection/archives/*
138    # then remove both the local archives and index, and do an svn up to get original checkout back
139
140    # if etc/collect.cfg is different, copy it across too?
141
142
143}
144
145# re-build a single collection in "collect" which is a copy of model-collect
146function single_collection () {
147    collection=$1   
148
149    #escape the filename (in case of space)
150    collection=`echo $collection | sed 's@ @\\\ @g'`
151   
152    #get just the basename
153    collection=`basename $collection`
154   
155    import.pl -removeold $collection
156    buildcol.pl -removeold $collection
157    rm -rf collect/$collection/index
158    mv collect/$collection/building collect/$collection/index
159   
160    echo
161    echo "*********************"
162    echo "Done processing $collection"
163    echo "*********************"
164    echo
165}
166
167# unused
168# re-build all collections in "collect" which is a copy of model-collect
169function all_collections() {
170
171    #for each collection, import, build, move building to index
172    for collection in collect/*; do
173    single_collection $collection;
174    #update_single_collection $collection;
175    done
176}
177
178# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
179function usage() {
180# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
181
182    echo "*******************************************"
183    echo "Usage: $0 [--svnupdate|--svndelete] [col1, col2, col3,...]";
184    echo "If no collections are provided, all collections will be processed.";
185    echo "If neither svnupdate nor svndelete are provided, svnupdate is assumed.";
186    echo "*******************************************"
187    exit 1;
188}
189
190
191# The program starts here
192
193# process optional command line arguments
194# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
195# Execute getopt
196ARGS=$(getopt -o ud -l "svnupdate,svndelete" -n "$0" -- "$@");
197
198#Bad arguments
199if [ $? -ne 0 ];then
200    usage
201    exit 1
202fi
203
204eval set -- "$ARGS";
205
206# mode can be svndelete or svnupdate
207mode=
208
209# -n: http://tldp.org/LDP/abs/html/testconstructs.html
210while true; do
211  case "$1" in
212    -d|--svndelete)
213      shift;
214      if [ "x$mode" == "xsvnupdate" ]; then
215      echo
216      echo "Can't use both svndelete and svnupdate"
217      usage
218      exit 1
219      else
220      mode=svndelete
221      fi
222      ;;
223    -u|--svnupdate)
224      shift;
225      if [ "x$mode" == "xsvndelete" ]; then
226      echo
227      echo "Can't use both svndelete and svnupdate"
228      usage
229      exit 1
230      else
231      mode=svnupdate
232      fi
233      ;;
234    --)
235      shift;
236      break;
237      ;;
238  esac
239done
240
241# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
242# the svn model-collect folder. We leave it at rebuilding its copy in collect
243
244#http://www.cyberciti.biz/faq/linux-unix-sleep-bash-scripting/
245#if [ "x$mode" == "x" ]; then
246#    echo
247#    echo "***Mode svndelete or svnupdate not provided. Defaulting to svnupdate... 3s to change to your mind"
248#    echo
249#    sleep 3s
250#    mode=svnupdate
251#fi
252
253# the remaining arguments to the script are assumed to be collections
254
255# debugging
256#for collection in "$@"; do
257#    collection=collect/$collection
258#    echo "Collection: $collection"
259#done
260
261# finished processing arguments
262
263
264# report will contain the output of the diff for
265if [ -f report.txt ]; then
266    rm report.txt
267fi
268
269# Need pdfbox for the PDFBox tutorial
270if [ ! -e ext/pdf-box ]; then
271    cd ext
272    if [ ! -e ext/pdf-box-java.tar.gz ]; then
273    wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
274    tar -xvzf pdf-box-java.tar.gz
275    fi
276    cd ..
277fi
278
279
280if test -e model-collect; then   
281    svn up model-collect
282else
283    svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
284fi
285
286# move the existing collect folder out of the way
287if [ -e collect ] && [ ! -e collect_orig ] ; then
288    mv collect collect_orig
289fi
290
291# make a copy of the model-collect named as the new collect
292# and remove the copy's .svn folders
293if [ -e collect_orig ]; then
294    if [ -e collect ]; then
295    rm -rf collect
296    fi
297    cp -r model-collect collect
298fi
299
300#cd collect
301#find . -name ".svn" -type d -exec rm -rf {} \;
302#cd ..
303find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
304
305
306# Set up the Greenstone environment for building
307source setup.bash
308
309# parse arguments
310# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
311# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
312
313if [ "$1" == "" ]; then
314    # all_collections   
315    #for each collection, import, build, move building to index
316    for collection in collect/*; do
317    single_collection $collection;
318    if [ "x$mode" != "x" ]; then
319        update_single_collection $collection $mode;
320    fi
321    done
322else
323    # Command-line args are a list of collections,
324    # process each command-line arg, after confirming such a collection exists
325
326    for collection in "$@"; do
327    collection=collect/$collection
328    if test -e $collection; then   
329        single_collection $collection;
330        if [ "x$mode" != "x" ]; then
331        update_single_collection $collection $mode;
332        fi
333    else
334        echo "Can't find collection $collection. Skipping."
335    fi
336    done
337fi
338
339
340echo
341echo "*****************************************"
342# if we were svn updating/deleting collections, then mode was set
343# if in that case a report was generated with additional differences, point the user to it
344if [ -f report.txt ] && [ "x$mode" != "x" ]; then
345    echo "Some files outside of archives and index folders were different. See report.txt"
346    echo
347fi
348
349echo "The original collect directory has been left renamed as collect_orig"
350echo
351if [ "x$mode" == "x" ]; then
352    echo "The model-collect folder has not been altered. Changes have only been made to collect"
353else
354    echo "You still need to run svn status and svn commit on the model-collect folder"
355fi
356echo "*****************************************"
357echo
358
359
Note: See TracBrowser for help on using the browser.