source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26731

Last change on this file since 26731 was 26731, checked in by davidb, 10 years ago

Can now correctly obtain colours and size of text when generating Expeditee frames. Also accounted for when the web browser being used is Firefox (FF uses "700" instead of the word "bold" for font-weight). The width of text items is also calculated and written to Expeditee. Parameters have also been provided so the user can decide whether they want the correct font information and text width used or to just use default values.

File size: 12.5 KB
Line 
1 #!/cygdrive/c/strawberry/perl/bin/perl -w
2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6
7use strict;
8
9# Set this to 1 to work around IIS 6 craziness
10my $iis6_mode = 0;
11
12
13# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
14# directory rather than the cgi-bin directory, causing lots of stuff to fail
15if ($iis6_mode)
16{
17 # Change into cgi-bin directory
18 chdir("cgi-bin");
19}
20
21
22# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
23eval("require \"gsdlCGI.pm\"");
24if ($@)
25{
26 print STDOUT "Content-type:text/plain\n\n";
27 print STDOUT "ERROR: $@\n";
28 exit 0;
29}
30
31
32sub generate_html_form
33{
34 my ($isGSDL2,$site,$collect,$cl) = @_;
35
36 my $html_form = <<EOT;
37<html>
38 <head>
39 <title>HTML to Expeditee Frames</title>
40
41 <base href=".." />
42
43 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
44 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
45 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
46 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
48
49
50
51 </head>
52 <body>
53 <form class="ui-widget">
54 Convert the collection
55 <input type="text"
56 class="ui-corner-all" style="padding: 4px;"
57 name="collect" value="$collect" id="collect" />
58 to Expeditee frames by traversing the classifier
59 <input type="text"
60 class="ui-corner-all" style="padding: 4px;"
61 name="cl" value="$cl" id="cl" />
62
63 <input value="$site" name="site" id="site" type="hidden">
64
65 <p>
66 <input type="checkbox" id="checkBoxBrowsing" name="generate_browsing" value="generate_browsing">Generate Browsing Frameset<br/>
67 </p>
68
69 <p style="font-weight: bold;">Extra Expeditee Frame Output Options:</p>
70 <input type="checkbox" id="checkBoxFont" name="compute_font" value="compute_font">Compute Font<br/>
71 <input type="checkbox" id="checkBoxWidth" name="compute_width" value="compute_width">Compute Width<br/>
72
73 <p><input value="Go" id="go" class="ui-button ui-widget ui-state-default ui-corner-all" type="submit"><p>
74
75 </form>
76 <script type="text/javascript">
77 var docOIDs = [];
78 var numDocOIDs;
79 var compute_font = false;
80 var compute_width = false;
81 var generate_browsing = false; //whether to generate a matching browsing frameset
82
83 \$(function() {
84 \$('#progress').progressbar();
85
86 \$('#go').button().click(function() {
87 var collect = document.getElementById("collect").value;
88 if (collect.match(/^\\s*\$/)) {
89 alert("No collection specified");
90 return false;
91 }
92
93 var cl = document.getElementById("cl").value;
94 if (cl.match(/^\\s*\$/)) {
95 alert("No classifier specified");
96 return false;
97 }
98 if (cl.match(/^\\d+\$/)) {
99 cl = "CL" + cl;
100 }
101
102 compute_font = document.getElementById("checkBoxFont").checked;
103 compute_width = document.getElementById("checkBoxWidth").checked;
104
105 generate_browsing = document.getElementById("checkBoxBrowsing").checked;
106
107 var gs2=$isGSDL2;
108
109 var url;
110 if (gs2) {
111 url = "library.cgi";
112 url += "?c="+collect +"&a=d&cl=" + cl;
113 }
114 else {
115 url = "library";
116 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;
117 url += "&excerptid=gs_content";
118 }
119
120 console.log("*** CHECKING URL: " + url);
121
122 /* processing animation */
123
124 docOIDs = [];
125 var outstandingURLs = [];
126 var visitedURLs = {};
127
128 outstandingURLs.push(url);
129 visitedURLs[url] = 1;
130
131 while (outstandingURLs.length>0) {
132 url = outstandingURLs.shift();
133
134 var clHtml = urlGetSync(url);
135
136 var aElems;
137 var workingTrav = document.getElementById("workingTraverse");
138 workingTrav.innerHTML = clHtml;
139 aElems = workingTrav.getElementsByTagName("a");
140
141
142 /* any links with a=(b|d) ... cl=??? => outstandingURLS */
143 /* any links with a=d ... d=??? => docOIDS */
144
145/*
146 var actionRE = new RegExp("(\\\\?|&)a=(?:d|b)(&|\$)");
147 var clRE = new RegExp("(\\\\?|&)cl=" + cl + "(\\\\.\\\\d+)+(&|\$)");
148 var docRE = new RegExp("(?:\\\\?|&)d=(.*?)(?:&|\$)");
149*/
150 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
151 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
152 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
153
154 for (var i=0; i<aElems.length; i++) {
155 var aElem = aElems[i];
156 var href=aElem.href;
157 if (href && href.match(actionRE)) {
158 if (href.match(clRE)) {
159 if (!visitedURLs[href]) {
160 // console.log("found a new CL line: " + href);
161 outstandingURLs.push(href);
162 visitedURLs[href] = 1;
163 }
164 }
165 else if (href.match(docRE)) {
166 if(!visitedURLs[href]){
167 var docMatch = docRE.exec(href);
168 var docOID = docMatch[1];
169
170 // console.log("found a new doc line: " + docOID);
171 visitedURLs[href] = 1;
172 docOIDs.push(docOID);
173 }
174 }
175 }
176 }
177 }
178
179 numDocOIDs = docOIDs.length;
180
181 var iframe = document.getElementById("iframe");
182
183 var docOID = docOIDs.shift();
184 //console.log("doc oid = " + docOID);
185
186 var url;
187 if (gs2) {
188 url = "library.cgi";
189 url += "?c="+collect +"&a=d&d=" + docOID;
190 }
191 else {
192 url = "library";
193 url += "?c="+collect +"&a=d&d=" + docOID;
194
195 /** The line below has been commented out. This is because the incorrect page is being loaded up in the iframe (i.e. just the document image and heading is being displayed but not any html customizations made in the collectionConfig.xml file**/
196 //url += "&ed=1&dt=hierarchy";
197 // url += "&excerptid=gs_content";
198 }
199
200 //url += "&p.showExpediteeAttributes=1";
201 url += "&p.showAssocFilePath=1";
202
203
204 /* load iframe with document drawn from Greenstone collection */
205 iframe.src = url;
206
207 var progressbar = document.getElementById("progressbar");
208 progressbar.style.display = "block";
209
210
211 return false; });
212 });
213
214 function pageLoaded()
215 {
216 var iframe = document.getElementById("iframe");
217
218 if (iframe.src) {
219 if (iframe.style.display != "block") {
220 iframe.height = "90%";
221 iframe.style.display = "block";
222 }
223
224 var iframeDoc;
225 if ( iframe.contentDocument )
226 { /* FF */
227 iframeDoc = iframe.contentDocument;
228 }
229 else if ( iframe.contentWindow )
230 { /* IE */
231 iframeDoc = iframe.contentWindow.document;
232 }
233
234 var xmlUrl = iframe.src + "&o=xml";
235
236 var iter = (numDocOIDs - docOIDs.length);
237 var progressPercent = iter/numDocOIDs * 100;
238
239 /*Check if a frame-id metadata element already exists,
240 otherwise generate a new frame number based on the
241 number of documents to process.*/
242 var frameNum = getMetadata(xmlUrl,'frameID');
243
244 if(frameNum === null){
245 frameNum = iter;
246 }
247
248 \$(function() {
249 \$('#progressbar').progressbar({ value: progressPercent })
250 });
251
252 var gsContent = iframeDoc.getElementById("gs_content");
253
254 /*
255 var gsContentPos = getElementPosition(gsContent);
256 var pxl = gsContentPos.xl;
257 var pxr = gsContentPos.xr;
258 */
259
260 //Check if an assocfilepath annotation already exists on the page, otherwise obtain it from the xml.
261 var assocElem = iframeDoc.getElementById('assocfilepath');
262 var assoc = null;
263
264 if(assocElem === undefined || assocElem === null){
265 assoc = getMetadata(xmlUrl,'assocfilepath');
266 }
267
268 var expFrameTree = htmlToExpeditee(gsContent,compute_font,compute_width);
269
270 var expFrame = JSON.stringify(expFrameTree);
271 //console.log(expFrame);
272 //alert(expFrame);
273
274 var collect = document.getElementById("collect").value;
275 var site = document.getElementById("site").value;
276
277 var url = "cgi-bin/html-to-expeditee.pl";
278 var params = "c=" + collect;
279 if (site.match(/\\w/)) {
280 params += "&site=" + site;
281 }
282
283 //params += "&a=generate-frame&fn=" + iter;
284 params += "&a=generate-frame&fn=" + frameNum;
285 params += "&json=" + escape(expFrame);
286
287 //add an assocfilepath parameter but only if it is defined.
288 if(assoc !== null){
289 params += "&assoc=" + assoc;
290 }
291
292 params += "&compute-font=" + compute_font;
293
294 var clHtml = urlPostSync(url,params);
295
296 if (!clHtml.match(/html-to-expeditee saved frame/)) {
297 alert("Error processing url: " + url);
298 }
299
300 if (docOIDs.length>0) {
301 var docOID = docOIDs.shift();
302
303 // console.log("doc oid = " + docOID);
304
305 // is the following line used ????
306 var cl = document.getElementById("cl").value;
307
308 var gs2 = $isGSDL2;
309 var url;
310
311 if (gs2) {
312 url = "library.cgi";
313 url += "?c="+collect +"&a=d&d=" + docOID;
314 }
315 else {
316 url = "library";
317 url += "?c="+collect +"&a=d&d=" + docOID;
318
319 /** Refer to comment earlier in this code in regards to the commented out line below.**/
320 //url += "&ed=1&dt=hierarchy";
321 // url += "&excerptid=gs_content";
322 }
323
324 //url += "&p.showExpediteeAttributes=1";
325 url += "&p.showAssocFilePath=1";
326
327 iframe.src = url;
328 }
329 else {
330 var progressbar = document.getElementById("progressbar");
331 progressbar.style.display = "none";
332
333 iframe.style.display = "none";
334 delete iframe.src;
335 }
336 }
337
338 }
339
340 /**
341 * This method is used at this stage to retrieve assocfilepath
342 * and frame number metadata values from the document's xml.
343 **/
344 function getMetadata(xmlUrl,nameValue){
345
346 var metadata = null;
347
348 \$.ajax({
349 type: "GET",
350 async: false,
351 url: xmlUrl,
352 dataType: "xml",
353 success: function(xml){
354
355
356 \$(xml).find('metadata').each(function(){
357
358 var name = \$(this).attr('name');
359
360 if(name === nameValue){
361 if(metadata === null){
362 metadata = \$(this).text();
363 //console.log(metadata);
364 }
365
366 }
367 });
368
369 }
370
371 });
372
373 return metadata;
374 }
375
376 </script>
377
378 <div id="progressbar" width="100%"
379 style="display: none; margin: 10px; height: 10px;"></div>
380 <div id="workingTraverse" style="display: none"></div>
381
382 <hr style="margin: 10px;">
383
384 <iframe width="100%" id="iframe" style="display: none;"
385 onload="pageLoaded()"></iframe>
386
387 </body>
388</html>
389EOT
390
391 print "Content-type:text/html\n\n";
392 print $html_form;
393}
394
395sub main
396{
397
398# $ENV{'QUERY_STRING'} = "a=...";set-import-metadata&c=espresso-music&d=HASH012d6f72cde5dc48162f4a1d.1&metaname=annotation&metapos=0&metavalue=adfadfad";
399# $ENV{'REQUEST_METHOD'} = "GET";
400
401 my $gsdl_cgi = new gsdlCGI();
402
403 # Load the Greenstone modules that we need to use
404 $gsdl_cgi->setup_gsdl();
405
406 my $gsdlhome = $ENV{'GSDLHOME'};
407 $gsdl_cgi->checked_chdir($gsdlhome);
408
409 require cgiactions::HtmlToExpediteeAction;
410
411 # Useful debug statement for seeing what packages have been included
412#### printf("%-45s%-s\n",$_,$INC{$_}) foreach (sort keys %INC);
413
414 $gsdl_cgi->parse_cgi_args();
415
416 # We don't want the gsdlCGI module to return errors and warnings in XML
417 $gsdl_cgi->{'xml'} = 0;
418
419 my $fn = $gsdl_cgi->clean_param("fn"); # frame number
420
421 if (defined $fn) {
422
423 my $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
424
425 $action->do_action();
426 }
427 else {
428 # generate form, pre-filled out with any useful values such
429 # as the collection and classifier value
430
431 my $collect = $gsdl_cgi->clean_param("collect");
432 my $cl = $gsdl_cgi->clean_param("cl");
433
434 # Establish collect_dir using defining 'site' along the way if GS3
435
436 my $site = undef;
437 my $isGSDL2 = undef;
438
439 if ($gsdl_cgi->greenstone_version() == 2) {
440 $isGSDL2 = 1;
441 }
442 else {
443 $isGSDL2 = 0;
444
445 # GS3 (and possible future versions) make use of 'site'
446 $site = $gsdl_cgi->clean_param("site");
447 if (!defined $site) {
448 $gsdl_cgi->generate_error("No site specified.");
449 }
450 }
451
452 generate_html_form($isGSDL2,$site,$collect,$cl);
453 }
454}
455
456
457
458&main();
Note: See TracBrowser for help on using the repository browser.