source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26518

Last change on this file since 26518 was 26518, checked in by davidb, 11 years ago

Added an if statement to check whether a doc URL line obtained has already been processed. This should stop duplicate Expeditee frames being produced.

File size: 9.7 KB
RevLine 
[24918]1#!@FULL_PERL_EXE@ -w
[24916]2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6
7use strict;
8
9# Set this to 1 to work around IIS 6 craziness
10my $iis6_mode = 0;
11
12
13# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
14# directory rather than the cgi-bin directory, causing lots of stuff to fail
15if ($iis6_mode)
16{
17 # Change into cgi-bin directory
18 chdir("cgi-bin");
19}
20
21
22# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
23eval("require \"gsdlCGI.pm\"");
24if ($@)
25{
26 print STDOUT "Content-type:text/plain\n\n";
27 print STDOUT "ERROR: $@\n";
28 exit 0;
29}
30
31
[24926]32sub generate_html_form
[24916]33{
[24926]34 my ($isGSDL2,$site,$collect,$cl) = @_;
[24916]35
[24926]36 my $html_form = <<EOT;
[24916]37<html>
38 <head>
39 <title>HTML to Expeditee Frames</title>
40
[24922]41 <base href=".." />
42
43 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
44 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
45 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
46 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
48
49
50
[24916]51 </head>
52 <body>
[24922]53 <form class="ui-widget">
54 Convert the collection
55 <input type="text"
56 class="ui-corner-all" style="padding: 4px;"
57 name="collect" value="$collect" id="collect" />
58 to Expeditee frames by traversing the classifier
59 <input type="text"
60 class="ui-corner-all" style="padding: 4px;"
61 name="cl" value="$cl" id="cl" />
[24916]62
[24922]63 <input value="$site" id="site" type="hidden">
64 <input value="Go" id="go"
65 class="ui-button ui-widget ui-state-default ui-corner-all"
66 type="submit">
[24916]67
68 </form>
69 <script type="text/javascript">
70 var docOIDs = [];
71 var numDocOIDs;
72
73 \$(function() {
74 \$('#progress').progressbar();
75
76 \$('#go').button().click(function() {
77 var collect = document.getElementById("collect").value;
78 if (collect.match(/^\\s*\$/)) {
79 alert("No collection specified");
80 return false;
81 }
82
83 var cl = document.getElementById("cl").value;
84 if (cl.match(/^\\s*\$/)) {
85 alert("No classifier specified");
86 return false;
87 }
88 if (cl.match(/^\\d+\$/)) {
[24920]89 cl = "CL" + cl;
[24916]90 }
91
[24920]92 var gs2=$isGSDL2;
93
94 var url;
95 if (gs2) {
96 url = "library.cgi";
97 url += "?c="+collect +"&a=d&cl=" + cl;
98 }
99 else {
[25795]100 url = "library";
[24920]101 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;
[24922]102 url += "&excerptid=gs_content";
[24920]103 }
104
[24916]105 /* processing animation */
106
107 docOIDs = [];
108 var outstandingURLs = [];
[24944]109 var visitedURLs = {};
110
[24916]111 outstandingURLs.push(url);
[24944]112 visitedURLs[url] = 1;
[24920]113
[24916]114 while (outstandingURLs.length>0) {
115 url = outstandingURLs.shift();
116
117 var clHtml = urlGetSync(url);
[24920]118
[24916]119 var aElems;
[24922]120 var workingTrav = document.getElementById("workingTraverse");
121 workingTrav.innerHTML = clHtml;
122 aElems = workingTrav.getElementsByTagName("a");
[24916]123
124
[24944]125 /* any links with a=(b|d) ... cl=??? => outstandingURLS */
[24916]126 /* any links with a=d ... d=??? => docOIDS */
[25795]127
128/*
[24920]129 var actionRE = new RegExp("(\\\\?|&)a=(?:d|b)(&|\$)");
[24944]130 var clRE = new RegExp("(\\\\?|&)cl=" + cl + "(\\\\.\\\\d+)+(&|\$)");
[24920]131 var docRE = new RegExp("(?:\\\\?|&)d=(.*?)(?:&|\$)");
[25795]132*/
133 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
134 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
135 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
136
[24916]137 for (var i=0; i<aElems.length; i++) {
138 var aElem = aElems[i];
139 var href=aElem.href;
140 if (href && href.match(actionRE)) {
141 if (href.match(clRE)) {
[24944]142 if (!visitedURLs[href]) {
143 // console.log("found a new CL line: " + href);
144 outstandingURLs.push(href);
145 visitedURLs[href] = 1;
146 }
[24916]147 }
148 else if (href.match(docRE)) {
[26518]149 if(!visitedURLS[href]){
[24916]150 var docMatch = docRE.exec(href);
151 var docOID = docMatch[1];
[24920]152
[24944]153 // console.log("found a new doc line: " + docOID);
[26518]154 visitedURLs[href] = 1;
[24916]155 docOIDs.push(docOID);
156 }
[26518]157 }
[24916]158 }
159 }
160 }
161
162 numDocOIDs = docOIDs.length;
163
164 var iframe = document.getElementById("iframe");
165
166 var docOID = docOIDs.shift();
[24944]167 //console.log("doc oid = " + docOID);
[24916]168
[24920]169 var url;
170 if (gs2) {
171 url = "library.cgi";
172 url += "?c="+collect +"&a=d&d=" + docOID;
[24934]173 }
174 else {
[25795]175 url = "library";
[24920]176 url += "?c="+collect +"&a=d&d=" + docOID;
[24922]177 url += "&ed=1&dt=hierarchy";
[24934]178 // url += "&excerptid=gs_content";
179 }
[24916]180
[25059]181 url += "&p.showExpediteeAttributes=1";
182
[24934]183 /* load iframe with document drawn from Greenstone collection */
[24916]184 iframe.src = url;
185
186 var progressbar = document.getElementById("progressbar");
187 progressbar.style.display = "block";
188
189
190 return false; });
191 });
192
193 function pageLoaded()
194 {
195 var iframe = document.getElementById("iframe");
196
197 if (iframe.src) {
198 if (iframe.style.display != "block") {
199 iframe.height = "90%";
200 iframe.style.display = "block";
201 }
202
203 var iframeDoc;
204 if ( iframe.contentDocument )
205 { /* FF */
206 iframeDoc = iframe.contentDocument;
207 }
208 else if ( iframe.contentWindow )
209 { /* IE */
210 iframeDoc = iframe.contentWindow.document;
211 }
212
213 var frameID = (numDocOIDs - docOIDs.length);
214 var progressPercent = frameID/numDocOIDs * 100;
215
216 \$(function() {
217 \$('#progressbar').progressbar({ value: progressPercent })
218 });
219
[24934]220 var gsContent = iframeDoc.getElementById("gs_content");
221
222 /*
223 var gsContentPos = getElementPosition(gsContent);
224 var pxl = gsContentPos.xl;
225 var pxr = gsContentPos.xr;
226 */
227
228 var expFrameTree = htmlToExpeditee(gsContent);
229
[24924]230 var expFrame = JSON.stringify(expFrameTree);
[24916]231 //alert(expFrame);
232
233 var collect = document.getElementById("collect").value;
[24920]234 var site = document.getElementById("site").value;
[24916]235
[24922]236 var url = "cgi-bin/html-to-expeditee.pl";
[24920]237 var params = "c=" + collect;
238 if (site.match(/\\w/)) {
239 params += "&site=" + site;
240 }
[24924]241 params += "&a=generate-frame&fn=" + frameID;
[24916]242 params += "&json=" + escape(expFrame);
243
244 var clHtml = urlPostSync(url,params);
245
246 if (!clHtml.match(/html-to-expeditee saved frame/)) {
247 alert("Error processing url: " + url);
248 }
249
250 if (docOIDs.length>0) {
251 var docOID = docOIDs.shift();
252
253 // console.log("doc oid = " + docOID);
254
[24924]255 // is the following line used ????
[24916]256 var cl = document.getElementById("cl").value;
257
[24920]258 var gs2 = $isGSDL2;
259 var url;
260
261 if (gs2) {
262 url = "library.cgi";
263 url += "?c="+collect +"&a=d&d=" + docOID;
264 }
265 else {
[25795]266 url = "library";
[24920]267 url += "?c="+collect +"&a=d&d=" + docOID;
[24922]268 url += "&ed=1&dt=hierarchy";
[24934]269 // url += "&excerptid=gs_content";
[24920]270 }
271
[25059]272 url += "&p.showExpediteeAttributes=1";
273
[24916]274 iframe.src = url;
275 }
276 else {
277 var progressbar = document.getElementById("progressbar");
278 progressbar.style.display = "none";
279
280 iframe.style.display = "none";
281 delete iframe.src;
282 }
283 }
284
285 }
286
287 </script>
288
289 <div id="progressbar" width="100%"
290 style="display: none; margin: 10px; height: 10px;"></div>
291 <div id="workingTraverse" style="display: none"></div>
292
293 <hr style="margin: 10px;">
294
295 <iframe width="100%" id="iframe" style="display: none;"
296 onload="pageLoaded()"></iframe>
297
298 </body>
299</html>
300EOT
301
[24926]302 print "Content-type:text/html\n\n";
303 print $html_form;
304}
[24916]305
[24926]306sub main
307{
[24916]308
[24934]309# $ENV{'QUERY_STRING'} = "a=...";set-import-metadata&c=espresso-music&d=HASH012d6f72cde5dc48162f4a1d.1&metaname=annotation&metapos=0&metavalue=adfadfad";
[24926]310# $ENV{'REQUEST_METHOD'} = "GET";
311
312 my $gsdl_cgi = new gsdlCGI();
313
314 # Load the Greenstone modules that we need to use
315 $gsdl_cgi->setup_gsdl();
316
317 my $gsdlhome = $ENV{'GSDLHOME'};
318 $gsdl_cgi->checked_chdir($gsdlhome);
319
320 require cgiactions::HtmlToExpediteeAction;
321
322 # Useful debug statement for seeing what packages have been included
323#### printf("%-45s%-s\n",$_,$INC{$_}) foreach (sort keys %INC);
324
325 $gsdl_cgi->parse_cgi_args();
326
327 # We don't want the gsdlCGI module to return errors and warnings in XML
328 $gsdl_cgi->{'xml'} = 0;
329
330 my $fn = $gsdl_cgi->clean_param("fn"); # frame number
331
332 if (defined $fn) {
333
334 my $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
335
336 $action->do_action();
[24916]337 }
[24926]338 else {
339 # generate form, pre-filled out with any useful values such
340 # as the collection and classifier value
341
342 my $collect = $gsdl_cgi->clean_param("c");
343 my $cl = $gsdl_cgi->clean_param("cl");
344
345 # Establish collect_dir using defining 'site' along the way if GS3
346
347 my $site = undef;
348 my $isGSDL2 = undef;
349
350 if ($gsdl_cgi->greenstone_version() == 2) {
351 $isGSDL2 = 1;
352 }
353 else {
354 $isGSDL2 = 0;
355
356 # GS3 (and possible future versions) make use of 'site'
357 $site = $gsdl_cgi->clean_param("site");
358 if (!defined $site) {
359 $gsdl_cgi->generate_error("No site specified.");
360 }
361 }
362
363 generate_html_form($isGSDL2,$site,$collect,$cl);
364 }
[24916]365}
366
367
368
369&main();
Note: See TracBrowser for help on using the repository browser.