source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26725

Last change on this file since 26725 was 26725, checked in by davidb, 11 years ago

Updated a comment regarding the method for retrieving metadata.

File size: 11.7 KB
RevLine 
[26596]1 #!/cygdrive/c/strawberry/perl/bin/perl -w
[24916]2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6
7use strict;
8
9# Set this to 1 to work around IIS 6 craziness
10my $iis6_mode = 0;
11
12
13# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
14# directory rather than the cgi-bin directory, causing lots of stuff to fail
15if ($iis6_mode)
16{
17 # Change into cgi-bin directory
18 chdir("cgi-bin");
19}
20
21
22# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
23eval("require \"gsdlCGI.pm\"");
24if ($@)
25{
26 print STDOUT "Content-type:text/plain\n\n";
27 print STDOUT "ERROR: $@\n";
28 exit 0;
29}
30
31
[24926]32sub generate_html_form
[24916]33{
[24926]34 my ($isGSDL2,$site,$collect,$cl) = @_;
[24916]35
[24926]36 my $html_form = <<EOT;
[24916]37<html>
38 <head>
39 <title>HTML to Expeditee Frames</title>
40
[24922]41 <base href=".." />
42
43 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
44 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
45 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
46 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
48
49
50
[24916]51 </head>
52 <body>
[24922]53 <form class="ui-widget">
54 Convert the collection
55 <input type="text"
56 class="ui-corner-all" style="padding: 4px;"
57 name="collect" value="$collect" id="collect" />
58 to Expeditee frames by traversing the classifier
59 <input type="text"
60 class="ui-corner-all" style="padding: 4px;"
61 name="cl" value="$cl" id="cl" />
[24916]62
[26593]63 <input value="$site" name="site" id="site" type="hidden">
[24922]64 <input value="Go" id="go"
65 class="ui-button ui-widget ui-state-default ui-corner-all"
66 type="submit">
[24916]67
68 </form>
69 <script type="text/javascript">
70 var docOIDs = [];
71 var numDocOIDs;
72
73 \$(function() {
74 \$('#progress').progressbar();
75
76 \$('#go').button().click(function() {
77 var collect = document.getElementById("collect").value;
78 if (collect.match(/^\\s*\$/)) {
79 alert("No collection specified");
80 return false;
81 }
82
83 var cl = document.getElementById("cl").value;
84 if (cl.match(/^\\s*\$/)) {
85 alert("No classifier specified");
86 return false;
87 }
88 if (cl.match(/^\\d+\$/)) {
[24920]89 cl = "CL" + cl;
[24916]90 }
91
[24920]92 var gs2=$isGSDL2;
93
94 var url;
95 if (gs2) {
96 url = "library.cgi";
97 url += "?c="+collect +"&a=d&cl=" + cl;
98 }
99 else {
[25795]100 url = "library";
[24920]101 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;
[24922]102 url += "&excerptid=gs_content";
[24920]103 }
104
[24916]105 /* processing animation */
106
107 docOIDs = [];
108 var outstandingURLs = [];
[24944]109 var visitedURLs = {};
110
[24916]111 outstandingURLs.push(url);
[24944]112 visitedURLs[url] = 1;
[24920]113
[24916]114 while (outstandingURLs.length>0) {
115 url = outstandingURLs.shift();
116
117 var clHtml = urlGetSync(url);
[24920]118
[24916]119 var aElems;
[24922]120 var workingTrav = document.getElementById("workingTraverse");
121 workingTrav.innerHTML = clHtml;
122 aElems = workingTrav.getElementsByTagName("a");
[24916]123
124
[24944]125 /* any links with a=(b|d) ... cl=??? => outstandingURLS */
[24916]126 /* any links with a=d ... d=??? => docOIDS */
[25795]127
128/*
[24920]129 var actionRE = new RegExp("(\\\\?|&)a=(?:d|b)(&|\$)");
[24944]130 var clRE = new RegExp("(\\\\?|&)cl=" + cl + "(\\\\.\\\\d+)+(&|\$)");
[24920]131 var docRE = new RegExp("(?:\\\\?|&)d=(.*?)(?:&|\$)");
[25795]132*/
133 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
134 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
135 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
136
[24916]137 for (var i=0; i<aElems.length; i++) {
138 var aElem = aElems[i];
139 var href=aElem.href;
140 if (href && href.match(actionRE)) {
141 if (href.match(clRE)) {
[24944]142 if (!visitedURLs[href]) {
143 // console.log("found a new CL line: " + href);
144 outstandingURLs.push(href);
145 visitedURLs[href] = 1;
146 }
[24916]147 }
148 else if (href.match(docRE)) {
[26593]149 if(!visitedURLs[href]){
[24916]150 var docMatch = docRE.exec(href);
151 var docOID = docMatch[1];
[24920]152
[24944]153 // console.log("found a new doc line: " + docOID);
[26518]154 visitedURLs[href] = 1;
[24916]155 docOIDs.push(docOID);
156 }
[26518]157 }
[24916]158 }
159 }
160 }
161
162 numDocOIDs = docOIDs.length;
163
164 var iframe = document.getElementById("iframe");
165
166 var docOID = docOIDs.shift();
[24944]167 //console.log("doc oid = " + docOID);
[24916]168
[24920]169 var url;
170 if (gs2) {
171 url = "library.cgi";
172 url += "?c="+collect +"&a=d&d=" + docOID;
[24934]173 }
174 else {
[25795]175 url = "library";
[24920]176 url += "?c="+collect +"&a=d&d=" + docOID;
[26519]177
178 /** The line below has been commented out. This is because the incorrect page is being loaded up in the iframe (i.e. just the document image and heading is being displayed but not any html customizations made in the collectionConfig.xml file**/
179 //url += "&ed=1&dt=hierarchy";
[24934]180 // url += "&excerptid=gs_content";
181 }
[24916]182
[26596]183 //url += "&p.showExpediteeAttributes=1";
[26694]184 url += "&p.showAssocFilePath=1";
[25059]185
[26596]186
[24934]187 /* load iframe with document drawn from Greenstone collection */
[24916]188 iframe.src = url;
189
190 var progressbar = document.getElementById("progressbar");
191 progressbar.style.display = "block";
192
193
194 return false; });
195 });
196
197 function pageLoaded()
198 {
199 var iframe = document.getElementById("iframe");
200
201 if (iframe.src) {
202 if (iframe.style.display != "block") {
203 iframe.height = "90%";
204 iframe.style.display = "block";
205 }
206
207 var iframeDoc;
208 if ( iframe.contentDocument )
209 { /* FF */
210 iframeDoc = iframe.contentDocument;
211 }
212 else if ( iframe.contentWindow )
213 { /* IE */
214 iframeDoc = iframe.contentWindow.document;
215 }
[26694]216
217 var xmlUrl = iframe.src + "&o=xml";
218
219 var iter = (numDocOIDs - docOIDs.length);
220 var progressPercent = iter/numDocOIDs * 100;
221
222 /*Check if a frame-id metadata element already exists,
223 otherwise generate a new frame number based on the
224 number of documents to process.*/
225 var frameNum = getMetadata(xmlUrl,'frameID');
226
227 if(frameNum === null){
228 frameNum = iter;
229 console.log("no frameNum found, use iter: " + frameNum);
230 }else{
231 console.log("frameNum metadata found: " + frameNum);
232 }
233
[24916]234 \$(function() {
235 \$('#progressbar').progressbar({ value: progressPercent })
236 });
237
[24934]238 var gsContent = iframeDoc.getElementById("gs_content");
239
240 /*
241 var gsContentPos = getElementPosition(gsContent);
242 var pxl = gsContentPos.xl;
243 var pxr = gsContentPos.xr;
244 */
[26596]245
[26694]246 //Check if an assocfilepath annotation already exists on the page, otherwise obtain it from the xml.
247 var assocElem = iframeDoc.getElementById('assocfilepath');
248 var assoc = null;
249
250 if(assocElem === undefined || assocElem === null){
251 assoc = getMetadata(xmlUrl,'assocfilepath');
252 }
[26596]253
[24934]254 var expFrameTree = htmlToExpeditee(gsContent);
255
[24924]256 var expFrame = JSON.stringify(expFrameTree);
[26725]257
258 console.log("******");
259 console.log(expFrame);
260
[24916]261 //alert(expFrame);
262
263 var collect = document.getElementById("collect").value;
[24920]264 var site = document.getElementById("site").value;
[24916]265
[24922]266 var url = "cgi-bin/html-to-expeditee.pl";
[24920]267 var params = "c=" + collect;
268 if (site.match(/\\w/)) {
[26596]269 params += "&site=" + site;
[24920]270 }
[26596]271
[26694]272 //params += "&a=generate-frame&fn=" + iter;
273 params += "&a=generate-frame&fn=" + frameNum;
[24916]274 params += "&json=" + escape(expFrame);
[26596]275
[26694]276 //add an assocfilepath parameter but only if it is defined.
277 if(assoc !== null){
[26596]278 params += "&assoc=" + assoc;
279 }
[24916]280
281 var clHtml = urlPostSync(url,params);
282
283 if (!clHtml.match(/html-to-expeditee saved frame/)) {
[26596]284 alert("Error processing url: " + url);
[24916]285 }
286
287 if (docOIDs.length>0) {
288 var docOID = docOIDs.shift();
289
290 // console.log("doc oid = " + docOID);
291
[24924]292 // is the following line used ????
[24916]293 var cl = document.getElementById("cl").value;
294
[24920]295 var gs2 = $isGSDL2;
296 var url;
297
298 if (gs2) {
299 url = "library.cgi";
300 url += "?c="+collect +"&a=d&d=" + docOID;
301 }
302 else {
[25795]303 url = "library";
[24920]304 url += "?c="+collect +"&a=d&d=" + docOID;
[26519]305
306 /** Refer to comment earlier in this code in regards to the commented out line below.**/
307 //url += "&ed=1&dt=hierarchy";
[24934]308 // url += "&excerptid=gs_content";
[24920]309 }
310
[26596]311 //url += "&p.showExpediteeAttributes=1";
[26694]312 url += "&p.showAssocFilePath=1";
313
314 iframe.src = url;
[24916]315 }
316 else {
[26694]317 var progressbar = document.getElementById("progressbar");
318 progressbar.style.display = "none";
[24916]319
[26694]320 iframe.style.display = "none";
321 delete iframe.src;
[24916]322 }
323 }
324
325 }
[26596]326
[26694]327 /**
328 * This method is used at this stage to retrieve assocfilepath
329 * and frame number metadata values from the document's xml.
330 **/
331 function getMetadata(xmlUrl,nameValue){
[26596]332
[26694]333 var metadata = null;
334
335 \$.ajax({
[26596]336 type: "GET",
337 async: false,
338 url: xmlUrl,
339 dataType: "xml",
340 success: function(xml){
341
342
343 \$(xml).find('metadata').each(function(){
344
345 var name = \$(this).attr('name');
346
[26694]347 if(name === nameValue){
348 if(metadata === null){
349 metadata = \$(this).text();
350 //console.log(metadata);
[26596]351 }
352
353 }
354 });
355
356 }
357
358 });
[26694]359
360 return metadata;
[26596]361 }
[24916]362
363 </script>
364
365 <div id="progressbar" width="100%"
366 style="display: none; margin: 10px; height: 10px;"></div>
367 <div id="workingTraverse" style="display: none"></div>
368
369 <hr style="margin: 10px;">
370
371 <iframe width="100%" id="iframe" style="display: none;"
372 onload="pageLoaded()"></iframe>
373
374 </body>
375</html>
376EOT
377
[24926]378 print "Content-type:text/html\n\n";
379 print $html_form;
380}
[24916]381
[24926]382sub main
383{
[24916]384
[24934]385# $ENV{'QUERY_STRING'} = "a=...";set-import-metadata&c=espresso-music&d=HASH012d6f72cde5dc48162f4a1d.1&metaname=annotation&metapos=0&metavalue=adfadfad";
[24926]386# $ENV{'REQUEST_METHOD'} = "GET";
387
388 my $gsdl_cgi = new gsdlCGI();
389
390 # Load the Greenstone modules that we need to use
391 $gsdl_cgi->setup_gsdl();
392
393 my $gsdlhome = $ENV{'GSDLHOME'};
394 $gsdl_cgi->checked_chdir($gsdlhome);
395
396 require cgiactions::HtmlToExpediteeAction;
397
398 # Useful debug statement for seeing what packages have been included
399#### printf("%-45s%-s\n",$_,$INC{$_}) foreach (sort keys %INC);
400
401 $gsdl_cgi->parse_cgi_args();
402
403 # We don't want the gsdlCGI module to return errors and warnings in XML
404 $gsdl_cgi->{'xml'} = 0;
405
406 my $fn = $gsdl_cgi->clean_param("fn"); # frame number
407
408 if (defined $fn) {
409
410 my $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
411
412 $action->do_action();
[24916]413 }
[24926]414 else {
415 # generate form, pre-filled out with any useful values such
416 # as the collection and classifier value
417
[26593]418 my $collect = $gsdl_cgi->clean_param("collect");
419 my $cl = $gsdl_cgi->clean_param("cl");
420
[24926]421 # Establish collect_dir using defining 'site' along the way if GS3
422
423 my $site = undef;
424 my $isGSDL2 = undef;
425
426 if ($gsdl_cgi->greenstone_version() == 2) {
427 $isGSDL2 = 1;
428 }
429 else {
430 $isGSDL2 = 0;
431
432 # GS3 (and possible future versions) make use of 'site'
433 $site = $gsdl_cgi->clean_param("site");
434 if (!defined $site) {
435 $gsdl_cgi->generate_error("No site specified.");
436 }
437 }
438
439 generate_html_form($isGSDL2,$site,$collect,$cl);
440 }
[24916]441}
442
443
444
445&main();
Note: See TracBrowser for help on using the repository browser.