source: other-projects/nightly-tasks/diffcol/trunk/gen-model-colls.sh@ 28933

Last change on this file since 28933 was 28925, checked in by ak19, 10 years ago

Minor changes

File size: 30.2 KB
RevLine 
[28026]1#!/bin/bash
2
3# PURPOSE
4# This is not a nightly script. You use it to regenerate the model-collections
5# if Greenstone has changed fundamentally, such as what HASH OIDs get assigned
6# to documents or something that changes the contents of the index and
7# archives folders. This has happened now with the commits
8# http://trac.greenstone.org/changeset/28022 and
9# http://trac.greenstone.org/changeset/28021
10# These commits generate new stable HASH OIDs for the existing documents.
11
12
13# USAGE
[28037]14# Put this file in the toplevel of the Greenstone 2 binary/compiled SVN installation
[28026]15# that you want to generate the model collections with.
[28037]16# You can provide a list of collection names or none, in which case all the collections
17# are processed.
[28026]18
[28037]19# Pass in --svnupdate to copy across the contents of archives and index in the
20# rebuilt collection, overwriting their equivalents in the svn model collection,
21# but not removing any extraneous HASH folders already present.
[28049]22# !!!!! IMPORTANT: if you pass in svnupdate, it leaves you to do the final commit on
23# the (svn) model-collect folder!
[28037]24
[28049]25# Pass in --svndelete to remove the archives and index from svn in the model-collect
26# and replace this with the rebuilt archives and index
27# The --svndelete is useful for when the HASH directory naming has changed and everything
[28037]28# in archives and index has to be wiped out and moved back in from the rebuilt col.
[28049]29# Passing in --svndelete will do the final commits on the model-collect folder.
[28037]30
[28049]31# If neither flag is passed in, then the collections are rebuilt but the svn model-collect
[28037]32# is not updated and the repository is not updated.
33
34# Examples of usage:
35# ./gen-model-colls.sh
36# ./gen-model-colls.sh --svndelete
37# ./gen-model-colls.sh --svnupdate Tudor-Basic Tudor-Enhanced
38
39# The first just rebuilds all the collections in a new folder called collect and stops there
40
[28049]41# The second rebuilds all the collections in collect and svn removes the archives and the index
42# folders in model-collect. Then it copies across the rebuilt archives and index into model-collect
43# and svn adds them.
[28037]44
[28049]45# The third example checks out all the model-collections again, but rebuilds only the 2 collections
46# specified in the new collect folder. Then it copies across the *contents* of the archives and
47# index folders of those 2 collections into their model-collect equivalents. You then still have to
48# do the final svn commit on the model-collect folder after looking over the differences.
49
[28037]50# Also valid examples:
51# ./gen-model-colls.sh Tudor-Basic Tudor-Enhanced
52# ./gen-model-colls.sh --svndelete Tudor-Basic Tudor-Enhanced
53# ./gen-model-colls.sh --svnupdate
54
[28026]55# PSEUDOCODE
56# This script:
57# Checks out the model-collections folder from SVN
58# Makes a copy
59# In the copy: gets rid of their .svn folders, and builds each collection in turn, moving building to index once done
[28049]60# If --svndelete was passed in: svn removes model-collect/archives and model-collect/index, copies over collect/index
61# and collect/archives into model-collect and svn adds model-collect/archives and model-collect/index. Then SVN COMMITS
62# model-collect/archives and model-collect/index.
[28037]63# If --svnupdate was passed in: copies collect/archives/* into model-collect/archives/*, and copies collect/index/*
64# into model-collect/index/*, overwriting files that already existed but have now been updated upon rebuild. However,
[28049]65# --svnupdate will leave untouched any files and folders unique to model-collect. No SVN commit, that's LEFT UP TO YOU.
[28026]66
[28048]67# See earlier version of this script:
68# To svn remove what's unique to model-collect and svn add what's been rebuilt in index and archives
69# see http://stackoverflow.com/questions/7502261/delete-folder-content-and-remove-from-version-control
70
71# http://stackoverflow.com/questions/5044214/how-do-i-detect-and-or-delete-empty-subversion-directories
72# http://stackoverflow.com/questions/1301203/removing-svn-files-from-all-directories
73
[28277]74# To checkout just this file and other files at this level from trac, see
75# http://stackoverflow.com/questions/11650156/svn-checkout-depth
76# http://svnbook.red-bean.com/en/1.7/svn.advanced.sparsedirs.html
77# So you would do:
78# svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk diffcol --depth files
79
[28069]80#*******************************GLOBAL VARIABLES***************************
[28048]81
[28069]82# mode can be svndelete or svnupdate
83mode=
84debug_mode=0
85commit_message=
86
87#*****************************FUNCTIONS*****************************
88
[28048]89# DON'T ADD ANY FURTHER ECHO STATEMENTS IN FUNCTION get_col_basename
90# "you have to be really careful on what you have in this function, as having any code which will eventually echo will mean that you get incorrect return string."
91# see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
92function get_col_basename () {
[28037]93 collection=$1
[28026]94
[28037]95 #escape the filename (in case of space)
96 collection=`echo $collection | sed 's@ @\\\ @g'`
97
98 #get just the basename
99 collection=`basename $collection`
[28026]100
[28048]101 # returning a string does not work in bash
102 # see http://stackoverflow.com/questions/3236871/how-to-return-a-string-value-from-a-bash-function
103
104 #return $collection
105 echo $collection
106}
107
[28081]108# model-collect>svn -R propset svn:ignore -F .customignore .
109# where .customignore is a file containing:
110# log
111# earliestDatestamp
112# cache
113# model-collect>svn proplist -v
114# shows the svn properties, including the svn:ignore property. So it shows what files svn will ignore
[28074]115function svn_add_new_collection () {
[28048]116
[28074]117 collection=$1
118
119 #escape the filename (in case of space) and get just the basename
120 collection=$(get_col_basename $collection)
121
122 if [ -e model-collect/$collection ];then
123 echo "svn_add_new_collection: $collection already exists in model-collect, can't add it to svn."
124 return
125 fi
126
[28081]127 # Using rsync to copy folders while excluding files/subfolders, BUT rsync is not available on lsb
128 # http://www.linuxquestions.org/questions/linux-software-2/copy-svn-working-dir-without-svn-hidden-dirs-and-files-620586/
129 # See also http://www.thegeekstuff.com/2011/01/rsync-exclude-files-and-folders/,
130 # section "Exclude multiple files and directories at the same time" (can also use a file to blacklist folders/files)
131
[28148]132 # for GS3 we have a custom ignore file
133# if [ "x$GSDL3SRCHOME" != "x" ]; then
134# mkdir model-collect/$collection
135# svn add --force model-collect/$collection
136# svn propset -R svn:ignore -F model-collect/.customignore model-collect/$collection
137#
138# if [ "x$commit_message" == "x" ]; then
139# commit_message="Adding new model collections 1/2: new empty collection dir with svn-ignore set."
140# fi
141# if [ "$debug_mode" -eq "0" ]; then
142# svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" model-collect/$collection
143# fi
144# fi
145
146
[28081]147 # need slash on end of src dir collect/$collection/ !
[28816]148 rsync -r --exclude=.svn/ --exclude=log/ --exclude=cached/ --exclude=cache/ --exclude=earliestDatestamp --exclude=fail.log --exclude=collectionConfig.bak collect/$collection/ model-collect/$collection
[28081]149
150# find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
151# cp -r collect/$collection model-collect/$collection
152
153 # http://www.thegeekstuff.com/2010/06/bash-array-tutorial/
154# ignorelist=('log' 'cache' 'archives/earliestDatestamp');
155# for ignored in "${ignorelist[@]}"; do
156# if [ -f model-collect/$collection/$ignorelist ]; then
157# rm model-collect/$collection/$ignorelist
158# elif [ -d model-collect/$collection/$ignorelist ]; then
159# rm -rf model-collect/$collection/$ignorelist
160# fi
161# done
162
163 svn add --force model-collect/$collection
164
165 # http://stackoverflow.com/questions/15880249/subclipse-svn-first-commit-ignore-certain-directories
166 # http://wolfram.kriesing.de/blog/index.php/2005/svnignore-and-svnkeywords
167 # http://boblet.tumblr.com/post/35755799/setting-up-and-using-svn-ignore-with-subversion
168 # http://www.petefreitag.com/item/662.cfm
169 # http://svnbook.red-bean.com/en/1.7/svn.advanced.props.special.ignore.html
170 # http://stackoverflow.com/questions/116074/how-to-ignore-a-directory-with-svn
171
172 # Dr Bainbridge's way of doing an svn ignore is better and involves fewer steps:
173 # create the empty collection folder (-p for subcollections), svn add it,
174 # svn:ignore all the files to be ignored
175 # copy the contents of the collection across,
176 # do an svn add --force on the collection folder
177
178 #mkdir -p model-collect/$collection
179 #svn add model-collect/$collection
180 #ignorelist=('log' 'cache' 'archives/earliestDatestamp');
181 #for ignored in "${ignorelist[@]}"; do
182 # svn propset svn:ignore $ignorelist model-collect/$collection/.
183 #done
184 # cp -r collect/$collection/* model-collect/$collection/*
185 #svn add --force model-collect/$collection
[28074]186}
187
[28049]188# Function that handles the --svndelete flag (mode) of this script for a single collection
[28048]189function svn_delete () {
190
191 # svn remove archives and index in each collection
192 # commit them all
193 # copy over newly rebuilt archives and index into each model-collection
194 # svn add the new archives and index folders of each collection
195 # commit them all
196
197
[28816]198 # make a space-separated list of all the collections
199 # to keep track of which ones should be committed from the model-collect folder
200 concatlist=
201
[28048]202 if [ "x$1" == "x" ]; then
203 for collection in collect/*; do
204 _del_col_archives_index $collection
[28816]205 concatlist="$concatlist model-$collection"
[28048]206 done
207 else
208 for collection in "$@"; do
209 _del_col_archives_index $collection
[28816]210 concatlist="$concatlist model-collect/$collection"
[28048]211 done
212 fi
213
[28925]214 # svn commit in one go all the svn rm statements done above:
[28069]215 # don't do `svn up` at this point, as doing so will then retrieve all the folders that just were svn-removed
[28048]216
[28069]217 if [ "x$commit_message" == "x" ]; then
218 commit_message="Clean rebuild of model collections 1/2. Clearing out deprecated archives and index."
219 fi
[28048]220
[28069]221 # Numerical comparisons: http://tldp.org/LDP/abs/html/comparison-ops.html
222 if [ "$debug_mode" -eq "0" ]; then
[28816]223 svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" $concatlist
[28069]224 fi
225
226 # Having svn committed the deletes, do an svn up to locally delete what was svn-removed above,
227 # BEFORE copying from the rebuilt archives and index folders
228 if [ "$debug_mode" -eq "0" ]; then
229 svn up model-collect
230 fi
231
232 # copy from the rebuilt archives and index over into the svn model-collect and svn add them
[28048]233 if [ "x$1" == "x" ]; then
234 for collection in collect/*; do
235 _add_col_archives_index $collection
236 done
237 else
238 for collection in "$@"; do
239 _add_col_archives_index $collection
240 done
241 fi
242
243 # commit all the svn add statements done just above in one go
[28069]244 if [ "x$commit_message" == "x" ]; then
245 commit_message="Clean rebuild of model collections 2/2. Adding rebuilt archives and index."
246 fi
[28049]247
[28069]248 if [ "$debug_mode" -eq "0" ]; then
[28816]249 svn commit -m "AUTOCOMMIT by gen-model-colls.sh script. Message: $commit_message" $concatlist
[28069]250 fi
251
[28049]252 echo
253 echo "*********************"
254 echo "Done svn-deleting rebuilt model-collection: $collection"
255 echo "*********************"
256 echo
[28048]257}
258
259# To undo the changes made by svndelete, run the following manually
260# svn revert --depth infinity <model-collect/$collection/archives/*
261# svn revert --depth infinity <model-collect/$collection/archives/*
262# then remove both the local archives and index, and do an svn up to get original checkout back
263
264# svn delete this collection's archives and index folders
[28925]265# (On returning from this subroutine, the commit will be done in one step for all collections on which this function was called)
[28048]266function _del_col_archives_index () {
267 collection=$1
268
269 #escape the filename (in case of space) and get just the basename
270 collection=$(get_col_basename $collection)
271
[28037]272 if [ ! -e model-collect/$collection ]; then
[28070]273 echo "del_col_archives_index: $collection does not exist in model-collect"
[28037]274 return;
275 fi
276
[28048]277 # remove the entire archives and index folders from svn
[28069]278 if [ "$debug_mode" -eq "0" ]; then
[28660]279 svn rm --force model-collect/$collection/archives
280 svn rm --force model-collect/$collection/index
[28069]281 elif [ "$debug_mode" -eq "1" ]; then
282 rm -rf model-collect/$collection/archives
283 rm -rf model-collect/$collection/index
284 fi
[28048]285
286}
287
288
289# copy and then svn add the collection's archives and index folders
290function _add_col_archives_index () {
291 collection=$1
292
293 #escape the filename (in case of space) and get just the basename
294 collection=$(get_col_basename $collection)
295
296 if [ ! -e model-collect/$collection ]; then
[28070]297 echo "add_col_archives_index: $collection does not exist in model-collect"
[28048]298 return;
299 fi
300
[28240]301 # copy across collect.cfg file if it has been modified
302 `diff collect/$collection/etc/collect.cfg model-collect/$collection/etc/collect.cfg`
303 status=$?
304 if [ "$status" -eq "1" ]; then
305 cp collect/$collection/etc/collect.cfg model-collect/$collection/etc/collect.cfg
306 fi
307
[28105]308 # copy across the entire rebuilt index and archives folders to the svn model-collect
[28277]309 rsync -r --exclude=.svn/ --exclude=cached/ --exclude=cache/ --exclude=earliestDatestamp collect/$collection/archives model-collect/$collection
310 rsync -r --exclude=.svn/ --exclude=cached/ --exclude=cache/ collect/$collection/index model-collect/$collection
[28069]311
[28081]312 # need a --force to skip all the svn:ignored files (archives/earliestDatestamp)
313 # when doing the recursive svn add on the archives and index directories
[28069]314 if [ "$debug_mode" -eq "0" ]; then
[28081]315 svn add --force model-collect/$collection/archives
316 svn add --force model-collect/$collection/index
[28069]317 fi
[28048]318}
319
320
[28049]321# UNUSED, but useful for spotting differences between the collect and model-collect
322# after rebuild, before svn updating/deleting, as opposed to at the end of the script
[28048]323function svn_process_single_collection () {
324 collection=$1
325
326 #escape the filename (in case of space) and get just the basename
327 collection=$(get_col_basename $collection)
328
329 if [ ! -e model-collect/$collection ]; then
[28070]330 echo "svn_process_single_collection: $collection does not exist in model-collect"
[28048]331 return;
332 fi
333
[28046]334# return here if just deleting empty dirs
335#return
336
[28037]337 # diff the svn model and rebuilt model collections
338 diff_result=`diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`
339# echo "Diff result for collection $collection: $diff_result"
340
341 # if no differences in the current collection, then we're done
342 if [ "x$diff_result" == "x" ]; then
343 echo "No differences in collection $collection"
344 return;
345 fi
346
347 # check that none of the lines mention files outside the archives or index folders
348 # http://en.gibney.org/tell_the_bash_to_split_by_newline_charac
349 # http://forums.gentoo.org/viewtopic-p-3130541.html
350
351 # http://wi-fizzle.com/article/276
352 # http://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash
353 # http://www.linuxquestions.org/questions/programming-9/split-a-string-on-newlines-bash-313206/
354 # http://unix.stackexchange.com/questions/39473/command-substitution-splitting-on-newline-but-not-space
355
356 # store backup of Internal Field Separator value, then set IFS to newline for splitting on newline
357
358 IFS_BAK=$IFS
359# IFS='\n' # splits on all whitespace
360IFS='
361'
362 # in the lines returned from the diff, test for archives or newline
363 # http://stackoverflow.com/questions/229551/string-contains-in-bash
364 for line in `diff -rq model-collect/$collection collect/$collection | grep -v ".svn"`; do
365 # echo "LINE: $line"
366 if [[ "$line" != *archives* && "$line" != *index* ]]; then
367 # the file that is different is neither in index nor in archives, send this diffline to the report
368 echo $line >> report.txt
369 fi
370 done
371
372 IFS=$IFS_BAK
373 IFS_BAK=
[28048]374}
[28037]375
[28049]376# Function that takes care of the --svnupdate flag mode of this script for a single collection
[28048]377function update_single_collection () {
[28070]378 collection=$1
379
380 #escape the filename (in case of space) and get just the basename
381 collection=$(get_col_basename $collection)
[28037]382
[28070]383 if [ ! -e model-collect/$collection ]; then
384 echo "update_single_collection: $collection does not exist in model-collect"
385 return;
386 fi
387
[28240]388 # copy across collect.cfg file if it has been modified
389 `diff collect/$collection/etc/collect.cfg model-collect/$collection/etc/collect.cfg`
390 status=$?
391 if [ "$status" -eq "1" ]; then
392 cp collect/$collection/etc/collect.cfg model-collect/$collection/etc/collect.cfg
393 fi
394
[28037]395 # copy across the contents of the rebuilt model-collection's index and archives to the svn model-collect
[28277]396 rsync -r --exclude=.svn/ --exclude=cached/ --exclude=cache/ --exclude=earliestDatestamp collect/$collection/archives/* model-collect/$collection/archives
397 rsync -r --exclude=.svn/ --exclude=cached/ --exclude=cache/ collect/$collection/index/* model-collect/$collection/index
[28037]398
[28069]399 # now svn add any and all the NEW items in model-collect's archives and index
400 # see http://stackoverflow.com/questions/1071857/how-do-i-svn-add-all-unversioned-files-to-svn
[28081]401 # see also http://stackoverflow.com/questions/116074/how-to-ignore-a-directory-with-svn
[28069]402# if [ "$debug_mode" -eq "0" ]; then
403 svn add --force model-collect/$collection/archives/* --auto-props --parents --depth infinity -q
404 svn add --force model-collect/$collection/index/* --auto-props --parents --depth infinity -q
405# fi
406
[28048]407 echo "svn model-collect update process complete. CHECK AND COMMIT THE model-collect FOLDER!"
[28037]408
409 # if etc/collect.cfg is different, copy it across too?
[28049]410
411 echo
412 echo "*********************"
413 echo "Done updating the rebuilt LOCAL model-collection: model-collect/$collection"
414 echo "*********************"
415 echo
[28046]416}
[28037]417
418
419# re-build a single collection in "collect" which is a copy of model-collect
[28048]420function build_single_collection () {
421 collection=$1
422
423 collection=$(get_col_basename $collection)
[28148]424
425 # GS2 or GS3 building
426 if [ "x$GSDL3SRCHOME" == "x" ]; then
427 import.pl -removeold $collection;
428 buildcol.pl -removeold $collection;
429 else
430 import.pl -site localsite -removeold $collection
431 buildcol.pl -site localsite -removeold $collection
432 fi
433
[28026]434 rm -rf collect/$collection/index
435 mv collect/$collection/building collect/$collection/index
436
437 echo
438 echo "*********************"
[28049]439 echo "Done rebuilding model collection: $collection"
[28026]440 echo "*********************"
441 echo
442}
443
444
[28037]445# http://stackoverflow.com/questions/16483119/example-of-how-to-use-getopt-in-bash
446function usage() {
447# usage() { echo "Usage: $0 [-s <45|90>] [-p <string>]" 1>&2; exit 1; }
[28026]448
[28037]449 echo "*******************************************"
[28133]450 echo "Usage: $0 [--svnupdate|--svndelete|--svnaddnew] [--debug] [--message 'custom commit message'] [col1, col2, col3,...]";
[28037]451 echo "If no collections are provided, all collections will be processed.";
[28227]452 echo "If none of svnupdate, svndelete or svnaddnew are provided,"
453 echo "the specified collections are just rebuilt in the collect folder.";
[28037]454 echo "*******************************************"
455 exit 1;
456}
457
458
[28069]459#*******************************MAIN PROGRAM***************************
[28027]460
[28037]461# process optional command line arguments
462# http://blog.onetechnical.com/2012/07/16/bash-getopt-versus-getopts/
463# Execute getopt
[28074]464ARGS=$(getopt -o m:uxadh -l "message:,svnupdate,svndelete,svnaddnew,debug,help" -n "$0" -- "$@");
[28037]465
466#Bad arguments
467if [ $? -ne 0 ];then
468 usage
469 exit 1
470fi
471
472eval set -- "$ARGS";
473
474
475# -n: http://tldp.org/LDP/abs/html/testconstructs.html
476while true; do
477 case "$1" in
[28073]478 -h|--help)
479 shift;
480 usage
481 exit 0
482 ;;
[28074]483 -a|--svnaddnew)
484 shift;
485 if [ "x$mode" == "xsvnupdate" ] || [ "x$mode" == "xsvndelete" ]; then
486 echo
487 echo "Can't use both svnaddnew and svndelete/svnupdate"
488 usage
489 exit 1
490 else
491 mode=svnaddnew
492# echo "svnaddnew not yet implemented"
493# exit 0
494 fi
495 ;;
[28069]496 -x|--svndelete)
[28037]497 shift;
[28074]498 if [ "x$mode" == "xsvnupdate" ] || [ "x$mode" == "xsvnaddnew" ]; then
[28037]499 echo
[28074]500 echo "Can't use both svndelete and svnupdate/svnaddnew"
[28037]501 usage
502 exit 1
503 else
504 mode=svndelete
505 fi
506 ;;
507 -u|--svnupdate)
508 shift;
[28074]509 if [ "x$mode" == "xsvndelete" ] || [ "x$mode" == "xsvnaddnew" ]; then
[28037]510 echo
[28074]511 echo "Can't use both svnupdate and svndelete/svnaddnew"
[28037]512 usage
513 exit 1
514 else
515 mode=svnupdate
516 fi
517 ;;
[28069]518 -d|--debug)
519 shift;
520 debug_mode=1
521 ;;
522 -m|--message)
523 shift;
524 if [ -n "$1" ]; then
525 commit_message=$1
526 shift;
527 fi
528 ;;
[28037]529 --)
530 shift;
531 break;
532 ;;
533 esac
534done
535
[28069]536#echo "commit message: $commit_message"
537#echo "Debug mode is: $debug_mode"
538#exit
539
[28070]540
[28148]541# Set up the Greenstone environment, this is mainly for building, but also for locating a
542# Greenstone installation folder, in case this script doesn't live in one.
543# Then cd into the collect folder's parent for the Greenstone installation.
544# Test for GS3 home env then for GS2 home and if found, cd into the GS2/GS3 home location and
545# run setup, else try to find setup.bash/gs3-setup.bash in the current location and run it.
[28133]546# Else print a warning message saying that GSDLHOME is not set.
[28148]547if [ "$GSDL3SRCHOME" != "" ]; then
548 echo "cd-ing into Greenstone 3 home directory: $GSDL3SRCHOME"
549 cd "$GSDL3SRCHOME/web/sites/localsite"
550elif [ "$GSDLHOME" != "" ]; then
[28133]551 echo "cd-ing into Greenstone home directory: $GSDLHOME"
552 cd "$GSDLHOME"
553else
[28240]554 echo "** No GS envvars set. Attempting to source the Greenstone setup script"
[28148]555 if [ -e gs3-setup.sh ]; then
556 source ./gs3-setup.sh
557 cd "$GSDL3SRCHOME/web/sites/localsite"
558 elif [ -e setup.bash ]; then
[28133]559 source ./setup.bash
560 else
[28148]561 echo "No Greenstone Home set and no setup script found in current folder."
562 echo "You need to source the setup script in a Greenstone installation. Exiting."
[28133]563 exit -1
564 fi
565fi
566
567
[28037]568# If no mode provided (svndelete|svnupdate) as cmd line arg, then don't modify
[28048]569# the svn model-collect folder. Then this script stops after rebuilding the model-copy in collect
[28037]570
571# the remaining arguments to the script are assumed to be collections
572
573# debugging
574#for collection in "$@"; do
575# collection=collect/$collection
576# echo "Collection: $collection"
577#done
578
579# finished processing arguments
580
581
582# report will contain the output of the diff for
583if [ -f report.txt ]; then
584 rm report.txt
585fi
586
[28148]587# Need the pdfbox extension for the PDFBox tutorial
588# The PDFBox ext has to be placed in the *GSDLHOME*/ext,
589# also in GS3's case where GSDLHOME is GS3/gs2build/ext
590# Go into ext and at end popd to get back into the collect folder's parent for the GS installation
591if [ ! -e "$GSDLHOME/ext/pdf-box" ]; then
[28240]592 echo "** Getting PDFBox"
[28148]593 pushd "$GSDLHOME/ext"
594 if [ ! -e "$GSDLHOME/ext/pdf-box-java.tar.gz" ]; then
[28027]595 wget http://trac.greenstone.org/export/head/gs2-extensions/pdf-box/trunk/pdf-box-java.tar.gz
596 fi
[28148]597 tar -xvzf pdf-box-java.tar.gz
598 popd
[28027]599fi
600
601
[28026]602# move the existing collect folder out of the way
[28074]603# unless we are adding a new collection to svn, in which case, we'll grab them from whatever collect folder exists
604if [ "x$mode" != "xsvnaddnew" ] && [ -e collect ] && [ ! -e collect_orig ]; then
[28240]605 echo "** Moving collect out of the way"
[28026]606 mv collect collect_orig
607fi
608
[28048]609
610# get model-collect from svn
611# if we already have it, svn update the entire model-collect folder if processing all collections
612# or svn update just any collections specified in the model-collect folder
613if test -e model-collect; then
[28240]614 echo "** SVN updating model-collect"
[28048]615 if [ "$1" == "" ]; then
616 svn up model-collect
617 else
618 for collection in "$@"; do
[28069]619 svn up model-collect/$collection
[28048]620 done
621 fi
622else
[28240]623 echo "** Getting the model-collect folder from SVN"
[28148]624 if [ "$GSDL3SRCHOME" != "" ]; then
625 svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/gs3-model-collect model-collect
626 else
627 svn co http://svn.greenstone.org/other-projects/nightly-tasks/diffcol/trunk/model-collect
628 fi
[28048]629fi
630
[28133]631
[28081]632# Not using rsync to copy folders while excluding files/subfolders, since rsync is not available on lsb
633# http://www.linuxquestions.org/questions/linux-software-2/copy-svn-working-dir-without-svn-hidden-dirs-and-files-620586/
634# rsync -r --exclude=.svn/ model-collect/ collect
635
[28048]636# Make a copy of the model-collect named as the new collect
637# (or if collections are specified in the cmdline arguments, copy just these over from model-collect into collect)
638# Then remove the copy's .svn folders
[28074]639if [ "x$mode" != "xsvnaddnew" ] && [ -e collect_orig ]; then
640
641 echo "***********************************************"
642 echo "Creating a copy of the model-collect folder as folder collect and removing the .svn subfolders from the copy:"
643 echo
644
[28048]645 if [ ! -e collect ]; then
646 cp -r model-collect collect
647 find collect -name ".svn" -type d -exec rm -rf {} \; #2>&1 > /dev/null
648 else
649 if [ "$1" == "" ]; then
650 rm -rf collect
651 cp -r model-collect collect
652 find collect -name ".svn" -type d -exec rm -rf {} \;
653 else
654 for collection in "$@"; do
655 if [ -e collect/$collection ]; then
656 rm -rf collect/$collection
657 fi
658 cp -r model-collect/$collection collect/$collection
659 find collect/$collection -name ".svn" -type d -exec rm -rf {} \;
660 done
661 fi
[28026]662 fi
[28074]663 echo "***********************************************"
[28026]664fi
665
[28074]666
[28026]667# parse arguments
668# http://stackoverflow.com/questions/12711786/bash-convert-command-line-arguments-into-array
669# http://stackoverflow.com/questions/255898/how-to-iterate-over-arguments-in-bash-script
670
671if [ "$1" == "" ]; then
[28048]672
673 # all_collections
[28037]674 #for each collection, import, build, move building to index
675 for collection in collect/*; do
[28048]676 build_single_collection $collection;
677
[28037]678 if [ "x$mode" != "x" ]; then
[28049]679 #svn_process_single_collection $collection
[28048]680
681 if [ "x$mode" == "xsvnupdate" ]; then
682 update_single_collection $collection
[28074]683 elif [ "x$mode" == "xsvnaddnew" ]; then
684 svn_add_new_collection $collection
[28048]685 fi
[28037]686 fi
687 done
[28048]688
[28074]689 # having rebuilt all the collections, just the processing for svndelete remains:
[28048]690 if [ "x$mode" == "xsvndelete" ]; then
691 svn_delete
692 fi
693
[28026]694else
695 # Command-line args are a list of collections,
696 # process each command-line arg, after confirming such a collection exists
697
[28037]698 for collection in "$@"; do
699 collection=collect/$collection
[28026]700 if test -e $collection; then
[28048]701 build_single_collection $collection;
702
[28037]703 if [ "x$mode" != "x" ]; then
[28049]704 #svn_process_single_collection $collection
[28048]705
[28049]706 if [ "x$mode" == "xsvnupdate" ]; then
[28048]707 update_single_collection $collection
[28074]708 elif [ "x$mode" == "xsvnaddnew" ]; then
709 svn_add_new_collection $collection
[28048]710 fi
[28037]711 fi
[28026]712 else
[28074]713 echo
[28026]714 echo "Can't find collection $collection. Skipping."
[28074]715 echo
[28026]716 fi
717 done
[28048]718
[28074]719 # having rebuilt the specified collections above, just the processing for svndelete remains
[28048]720 if [ "x$mode" == "xsvndelete" ]; then
721 svn_delete $@
722 fi
[28026]723fi
[28037]724
725
726echo
727echo "*****************************************"
[28049]728echo
729# NO LONGER NECESSARY: WE'RE DOING A DIFF BETWEEN collect AND model-collect AT THIS SCRIPT'S END
[28037]730# if we were svn updating/deleting collections, then mode was set
731# if in that case a report was generated with additional differences, point the user to it
[28049]732#if [ -f report.txt ] && [ "x$mode" != "x" ]; then
733# echo "Some files or folders outside of archives and index directories were different. See report.txt"
734# echo
735#fi
736
737# if not svnupdating or svndeleting, then inform the user that model-collect is unchanged
738# if svnupdating, then warn the user that model-collect still needs committing
739# if svndeleting, then inform the user that model-collect has been changed and committed
740if [ "x$mode" == "x" ]; then
741 echo "* The model-collect folder has not been altered. Changes have only been made to collect"
742elif [ "x$mode" == "xsvnupdate" ]; then
[28081]743 echo "* TO DO: You still need to run svn status and svn commit on the model-collect folder. Besides that:"
[28049]744elif [ "x$mode" == "xsvndelete" ]; then
745 echo "* The model-collect folder's archives and index subfolders have been updated and committed to svn."
[28074]746elif [ "x$mode" == "xsvnaddnew" ]; then
747 echo "* The new collection(s) have been built, copied to model-collect and added to svn."
[28081]748 echo "* TO DO: You still need to run svn status and svn commit on the model-collect folder. Besides that:"
[28049]749fi
750echo
751
752if [ "x$mode" != "x" ]; then
753 echo "* DIFFERENCES REMAINING BETWEEN model-collect AND collect (skipping .svn folders):"
[28037]754 echo
[28069]755 if [ "$1" == "" ]; then
756 echo "---START DIFF---"
757 diff -rq model-collect collect | grep -v ".svn"
758 else
759 for collection in "$@"; do
760 echo "--COLLECTION: $collection"
761 diff -rq model-collect/$collection collect/$collection | grep -v ".svn"
762 echo "--"
763 done
764 fi
[28049]765 echo "---END DIFF---"
766 echo
[28037]767fi
768
[28074]769if [ -e collect_orig ]; then
770 echo "* The original collect directory has been left renamed as collect_orig"
771 echo
772fi
[28069]773
774if [ "$debug_mode" -eq "1" ]; then
775 echo "* This script was run in DEBUG MODE, nothing has been changed in svn"
776fi
777echo
[28037]778echo "*****************************************"
779echo
780
781
[28048]782# deletes empty dirs
783# find collect/$collection/archives/HASH* -type d -empty -delete
784# find collect/$collection/index/assoc/HASH* -type d -empty -delete
785
786# To recursively delete all empty dirs in the copy of model-collect (since the dirs will not have .svn folders in them anymore)
787# http://www.commandlinefu.com/commands/view/5131/recursively-remove-all-empty-directories
788#find collect -type d -empty -delete
789
790# The following when put in a separate script file will delete all folders from model-collect that are
[28046]791# empty in the copied collection (all folders which contain only a .svn subfolder in model-collect)
[28049]792# ---------------------------------------------
[28048]793#!/bin/bash
[28046]794
795#for collection in collect/*; do
796 #escape the filename (in case of space)
797# collection=`echo $collection | sed 's@ @\\\ @g'`
798
799 #get just the basename
800# collection=`basename $collection`
801
802 # HASH dirs that are empty in local collect's archives and index/assoc,
803 # need to be removed from the svn in model-collect
804
805# for line in `find collect/$collection/archives/HASH* -type d -empty`; do
806# modelline="model-$line"
807# echo "LINE: $modelline"
808
809 # remove from svn of model collect
810# svn rm $modelline
811## rm -rf $modelline
812 # remove physically from local collect
813# rm -rf $line
814# done
815
816# for line in `find collect/$collection/index/assoc/HASH* -type d -empty`; do
817# modelline="model-$line"
818# echo "LINE: $modelline"
819
820 # remove from svn of model collect
821# svn rm $modelline
822## rm -rf $modelline
823 # remove physically from local collect
824# rm -rf $line
825# done
826
[28048]827#done
[28049]828# ---------------------------------------------
Note: See TracBrowser for help on using the repository browser.