source: gsdl/trunk/src/recpt/dynamicclassifieraction.cpp@ 16065

Last change on this file since 16065 was 16065, checked in by mdewsnip, 16 years ago

(Adding dynamic classifiers) Changed the links generated for parent/current classifier nodes in vlistbrowserclass to use ".pr" instead of trying to work out the parent OID, to avoid this code needing to know about hierarchy separators.

Added code in dynamicclassifieraction for resolving the ".pr" bits.

  • Property svn:executable set to *
File size: 22.3 KB
Line 
1/**********************************************************************
2 *
3 * dynamicclassifieraction.cpp --
4 * Copyright (C) 2008 DL Consulting Ltd
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "dynamicclassifieraction.h"
27#include "recptprototools.h"
28
29
30dynamicclassifieraction::dynamicclassifieraction ()
31{
32 recpt = NULL;
33
34 cgiarginfo arg_ainfo;
35 arg_ainfo.shortname = "dcl";
36 arg_ainfo.longname = "dynamic classifier ID";
37 arg_ainfo.multiplechar = true;
38 arg_ainfo.defaultstatus = cgiarginfo::weak;
39 arg_ainfo.argdefault = "";
40 arg_ainfo.savedarginfo = cgiarginfo::must;
41 argsinfo.addarginfo (NULL, arg_ainfo);
42
43 arg_ainfo.shortname = "dcn";
44 arg_ainfo.longname = "dynamic classifier node";
45 arg_ainfo.multiplechar = true;
46 arg_ainfo.defaultstatus = cgiarginfo::weak;
47 arg_ainfo.argdefault = "";
48 arg_ainfo.savedarginfo = cgiarginfo::must;
49 argsinfo.addarginfo (NULL, arg_ainfo);
50}
51
52
53dynamicclassifieraction::~dynamicclassifieraction()
54{
55}
56
57
58bool dynamicclassifieraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
59 recptprotolistclass *protos, ostream &logout)
60{
61 return true;
62}
63
64
65void dynamicclassifieraction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos,
66 response_t &response,text_t &response_data,
67 ostream &logout)
68{
69 response = content;
70 response_data = "text/html";
71}
72
73
74// define all the macros which might be used by other actions to produce pages.
75void dynamicclassifieraction::define_external_macros (displayclass &disp, cgiargsclass &args,
76 recptprotolistclass *protos, ostream &logout)
77{
78 // A valid collection server is vital
79 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
80 if (collectproto == NULL)
81 {
82 logout << "dynamicclassifieraction::define_external_macros called with NULL collectproto\n";
83 return;
84 }
85
86 // Define _dynamicclassifiernavbarentries_ to add buttons to the navigation bar for the dynamic classifiers
87 text_t navigation_bar_entries = "";
88 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
89 text_tmap::iterator dynamic_classifier_iterator = cinfo->dynamic_classifiers.begin();
90 while (dynamic_classifier_iterator != cinfo->dynamic_classifiers.end())
91 {
92 text_t dynamic_classifier_id = (*dynamic_classifier_iterator).first;
93 navigation_bar_entries += "_navbarspacer_";
94 navigation_bar_entries += "_navtab_(_gwcgi_?c=" + args["c"] + "&amp;a=dc&amp;dcl=" + dynamic_classifier_id + "," + dynamic_classifier_id;
95 if (args["a"] == "dc" && args["dcl"] == dynamic_classifier_id)
96 {
97 navigation_bar_entries += ",selected";
98 }
99 navigation_bar_entries += ")";
100 dynamic_classifier_iterator++;
101 }
102
103 disp.setmacro("dynamicclassifiernavbarentries", displayclass::defaultpackage, navigation_bar_entries);
104}
105
106
107// define all the macros which are related to pages generated
108// by this action. we also load up the formatinfo structure
109// here (it's used in do_action as well as here)
110void dynamicclassifieraction::define_internal_macros (displayclass &disp, cgiargsclass &args,
111 recptprotolistclass *protos, ostream &logout)
112{
113 // define_internal_macros sets the following macros:
114}
115
116
117bool dynamicclassifieraction::do_action(cgiargsclass &args, recptprotolistclass *protos,
118 browsermapclass *browsers, displayclass &disp,
119 outconvertclass &outconvert, ostream &textout,
120 ostream &logout)
121{
122 // A valid collection server is vital
123 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
124 if (collectproto == NULL)
125 {
126 logout << "dynamicclassifieraction::do_action called with NULL collectproto\n";
127 return false;
128 }
129
130 textout << outconvert << disp << "_dynamicclassifier:header_\n";
131 textout << outconvert << disp << "_dynamicclassifier:content_\n";
132
133 // Check a dynamic classifier ID has been specified
134 text_t arg_dcl = args["dcl"];
135 if (arg_dcl.empty())
136 {
137 textout << outconvert << disp << "Error: Missing dcl argument.\n";
138 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
139 return true;
140 }
141
142 // Check the dynamic classifier ID is valid (ie. there is an entry in the collect.cfg file for it)
143 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
144 if (cinfo->dynamic_classifiers.find(arg_dcl) == cinfo->dynamic_classifiers.end())
145 {
146 textout << outconvert << disp << "Error: Invalid dcl value \"" << arg_dcl << "\".\n";
147 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
148 return true;
149 }
150
151 // Parse the classifier options from the specification
152 text_t classifier_specification = cinfo->dynamic_classifiers[arg_dcl];
153 text_tmap classifier_options = parse_classifier_options (classifier_specification, args);
154
155 // Output the "<ID>Header" format statement if there is one
156 text_t classifier_header_format_statement = "";
157 get_formatstring (arg_dcl + "Header", cinfo->format, classifier_header_format_statement);
158 textout << outconvert << disp << classifier_header_format_statement << "\n";
159
160 // Resolve any ".pr" bits at the end of the "dcn" argument
161 if (ends_with (args["dcn"], ".pr"))
162 {
163 // Change the "dcn" argument to be the OID of the parent of the specified classifier node
164 text_t::iterator parent_classifier_node_OID_end = findlastchar (args["dcn"].begin(), args["dcn"].end(), '|');
165 if (parent_classifier_node_OID_end != args["dcn"].end())
166 {
167 args["dcn"] = substr (args["dcn"].begin(), parent_classifier_node_OID_end);
168 }
169 else
170 {
171 args["dcn"] = "";
172 }
173 }
174
175 // Output the dynamic classifier, beginning with the (optional) grouping nodes
176 text_t selected_grouping_node_OID = "";
177 if (!classifier_options["-group_using"].empty())
178 {
179 selected_grouping_node_OID = output_grouping_nodes (classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
180 }
181 text_t classifier_node_OID = args["dcn"]; // args["dcn"] may have been modified by output_grouping_nodes()
182 int classifier_node_indent = 0;
183
184 // Simple case at the top level: just output the child classifier nodes
185 if (classifier_node_OID == selected_grouping_node_OID)
186 {
187 text_t metadata_value_filter = selected_grouping_node_OID + "*";
188 output_child_classifier_nodes (classifier_node_OID, "", metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
189 }
190
191 // More complex case below the top level
192 else
193 {
194 // This is the classifier node OID without any grouping information
195 text_t classifier_node_OID_sans_grouping = classifier_node_OID;
196 if (starts_with (classifier_node_OID, selected_grouping_node_OID + "|"))
197 {
198 classifier_node_OID_sans_grouping = substr (classifier_node_OID.begin() + (selected_grouping_node_OID + "|").size(), classifier_node_OID.end());
199 }
200
201 // Determine the parent classifier node labels
202 text_tlist parent_classifier_node_labels;
203 splitchar(classifier_node_OID_sans_grouping.begin(), classifier_node_OID_sans_grouping.end(), '|', parent_classifier_node_labels);
204
205 // Output the parent classifier nodes and the current classifier node
206 output_upper_classifier_nodes (selected_grouping_node_OID, parent_classifier_node_labels, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
207
208 // Output the child classifier nodes
209 text_t classifier_node_metadata_value = classifier_node_OID_sans_grouping;
210 text_t metadata_value_filter = classifier_node_OID_sans_grouping + "|*";
211 output_child_classifier_nodes (classifier_node_OID, classifier_node_metadata_value, metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
212
213 // Get the document nodes at this level
214 text_t metadata_element_name = classifier_options["metadata_element_name"];
215 text_t sort_documents_by = classifier_options["-sort_documents_by"];
216 FilterResponse_t documents_response;
217 get_documents_with_metadata_value (metadata_element_name, classifier_node_metadata_value, sort_documents_by, args["c"], collectproto, documents_response, logout);
218
219 // Display the document nodes
220 display_document_nodes (documents_response, classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
221 }
222
223 // Output the "<ID>Footer" format statement if there is one
224 text_t classifier_footer_format_statement = "";
225 get_formatstring (arg_dcl + "Footer", cinfo->format, classifier_footer_format_statement);
226 textout << outconvert << disp << classifier_footer_format_statement << "\n";
227
228 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
229 return true;
230}
231
232
233text_tmap dynamicclassifieraction::parse_classifier_options (text_t classifier_specification, cgiargsclass &args)
234{
235 text_tmap classifier_options;
236
237 // Split the classifier specification string by spaces
238 text_tlist classifier_specification_parts;
239 splitchar (classifier_specification.begin(), classifier_specification.end(), ' ', classifier_specification_parts);
240
241 // The metadata element to classify by should be the first value
242 classifier_options["metadata_element_name"] = classifier_specification_parts.front();
243 classifier_specification_parts.pop_front();
244
245 // Parse options from the remainder of the classifier specification
246 while (!classifier_specification_parts.empty())
247 {
248 // Parse the option name
249 text_t classifier_option_name = classifier_specification_parts.front();
250 classifier_specification_parts.pop_front();
251
252 // Check if the option has a value (it may just be a flag, in which case we use "1" as the value)
253 text_t classifier_option_value = "1";
254 if (!classifier_specification_parts.empty() && !starts_with(classifier_specification_parts.front(), "-"))
255 {
256 classifier_option_value = classifier_specification_parts.front();
257 classifier_specification_parts.pop_front();
258 }
259
260 // Record the option
261 classifier_options[classifier_option_name] = classifier_option_value;
262 }
263
264 return classifier_options;
265}
266
267
268text_t dynamicclassifieraction::output_grouping_nodes (text_tmap classifier_options, cgiargsclass &args,
269 recptproto *collectproto, browsermapclass *browsers,
270 displayclass &disp, outconvertclass &outconvert,
271 ostream &textout, ostream &logout)
272{
273 // Get all the metadata values for the specified element, and group them according to the "-group_using" value
274 text_t metadata_element_name = classifier_options["metadata_element_name"];
275 text_t metadata_value_grouping_expression = classifier_options["-group_using"];
276 FilterResponse_t grouping_nodes_response;
277 bool request_success = get_metadata_values (metadata_element_name, "", metadata_value_grouping_expression, args["c"], collectproto, grouping_nodes_response, logout);
278
279 // If the request failed then it's probably because the collection isn't using an SQL infodbtype
280 if (request_success == false)
281 {
282 textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
283 return "";
284 }
285
286 // Check some grouping nodes were returned
287 if (grouping_nodes_response.docInfo.empty())
288 {
289 return "";
290 }
291
292 // If no classifier node has been specified automatically go to the first grouping node
293 if (args["dcn"] == "")
294 {
295 args["dcn"] = grouping_nodes_response.docInfo.front().OID;
296 }
297
298 text_t selected_grouping_node_OID = "";
299 ResultDocInfo_tarray::iterator grouping_node_iterator = grouping_nodes_response.docInfo.begin();
300 while (grouping_node_iterator != grouping_nodes_response.docInfo.end())
301 {
302 // Is this the grouping node that is currently selected?
303 if (starts_with (args["dcn"], (*grouping_node_iterator).OID))
304 {
305 selected_grouping_node_OID = (*grouping_node_iterator).OID;
306 }
307
308 // Add the necessary metadata required to display the grouping nodes correctly
309 (*grouping_node_iterator).metadata["doctype"].values.push_back ("classify");
310 (*grouping_node_iterator).metadata["haschildren"].values.push_back ("1");
311 (*grouping_node_iterator).metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests
312 (*grouping_node_iterator).metadata["Title"].values.push_back ((*grouping_node_iterator).OID);
313 grouping_node_iterator++;
314 }
315
316 // Get the format statement for this classifier if there is one, or use the browser's default otherwise
317 text_t formatstring;
318 text_t classifier_type = "HList";
319 browserclass *bptr = browsers->getbrowser (classifier_type);
320 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
321 if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
322 {
323 formatstring = bptr->get_default_formatstring();
324 }
325 format_t *formatlistptr = new format_t();
326 text_tset metadata;
327 bool getParents = false;
328 parse_formatstring (formatstring, formatlistptr, metadata, getParents);
329 bool use_table = is_table_content (formatlistptr);
330
331 // Display the grouping nodes
332 bptr->output_section_group (grouping_nodes_response, args, args["c"], 0, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
333
334 return selected_grouping_node_OID;
335}
336
337
338void dynamicclassifieraction::output_upper_classifier_nodes (text_t root_classifier_node_OID,
339 text_tlist parent_classifier_node_labels,
340 int& classifier_node_indent,
341 text_tmap classifier_options, cgiargsclass &args,
342 recptproto *collectproto, browsermapclass *browsers,
343 displayclass &disp, outconvertclass &outconvert,
344 ostream &textout, ostream &logout)
345{
346 // Display the parent classifier nodes
347 text_t parent_classifier_node_OID = root_classifier_node_OID;
348 text_tlist::iterator parent_classifier_node_labels_iterator = parent_classifier_node_labels.begin();
349 while (parent_classifier_node_labels_iterator != parent_classifier_node_labels.end())
350 {
351 parent_classifier_node_OID += (parent_classifier_node_OID != "" ? "|" : "");
352 parent_classifier_node_OID += *parent_classifier_node_labels_iterator;
353 text_t parent_classifier_node_label = *parent_classifier_node_labels_iterator;
354 text_t parent_classifier_node_numleafdocs = "?"; // We can't determine this without more database requests
355 display_classifier_node (parent_classifier_node_OID, parent_classifier_node_label, parent_classifier_node_numleafdocs, classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
356 classifier_node_indent++;
357
358 parent_classifier_node_labels_iterator++;
359 }
360}
361
362
363void dynamicclassifieraction::output_child_classifier_nodes (text_t classifier_node_OID,
364 text_t classifier_node_metadata_value,
365 text_t metadata_value_filter,
366 int& classifier_node_indent,
367 text_tmap classifier_options, cgiargsclass &args,
368 recptproto *collectproto, browsermapclass *browsers,
369 displayclass &disp, outconvertclass &outconvert,
370 ostream &textout, ostream &logout)
371{
372 // Get all the metadata values for the specified element that match the filter
373 text_t metadata_element_name = classifier_options["metadata_element_name"];
374 FilterResponse_t metadata_values_response;
375 bool request_success = get_metadata_values (metadata_element_name, metadata_value_filter, "", args["c"], collectproto, metadata_values_response, logout);
376
377 // If the request failed then it's probably because the collection isn't using an SQL infodbtype
378 if (request_success == false)
379 {
380 textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
381 return;
382 }
383
384 // After processing any hierarchical metadata values we're left with the child classifer nodes
385 map<text_t, int, lttext_t> child_classifier_nodes;
386 ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
387 while (metadata_value_iterator != metadata_values_response.docInfo.end())
388 {
389 text_t metadata_value = (*metadata_value_iterator).OID;
390 // logout << "Metadata value: " << metadata_value << ", current position: " << current_position << endl;
391
392 // If we're not at the top-level we need to remove the current position from the metadata values
393 if (starts_with(metadata_value, classifier_node_metadata_value + "|"))
394 {
395 metadata_value = substr(metadata_value.begin() + (classifier_node_metadata_value + "|").size(), metadata_value.end());
396 }
397
398 // Is this metadata value hierarchical?
399 text_t::iterator hierarchy_split_position = findchar(metadata_value.begin(), metadata_value.end(), '|');
400 if (hierarchy_split_position != metadata_value.end())
401 {
402 // Yes, so use the first part of the hierarchy only
403 metadata_value = substr(metadata_value.begin(), hierarchy_split_position);
404 }
405
406 // Create a node for this metadata value if we haven't seen it before
407 if (child_classifier_nodes.find(metadata_value) == child_classifier_nodes.end())
408 {
409 child_classifier_nodes[metadata_value] = 0;
410 }
411
412 // Increment the occurrence count
413 child_classifier_nodes[metadata_value] += (*metadata_value_iterator).result_num;
414
415 metadata_value_iterator++;
416 }
417
418 // Display the child classifier nodes
419 map<text_t, int, lttext_t>::iterator child_classifier_nodes_iterator = child_classifier_nodes.begin();
420 while (child_classifier_nodes_iterator != child_classifier_nodes.end())
421 {
422 text_t child_classifier_node_OID = (*child_classifier_nodes_iterator).first;
423 if (classifier_node_OID != "")
424 {
425 child_classifier_node_OID = classifier_node_OID + "|" + child_classifier_node_OID;
426 }
427 text_t child_classifier_node_label = (*child_classifier_nodes_iterator).first;
428 int child_classifier_node_numleafdocs = (*child_classifier_nodes_iterator).second;
429 display_classifier_node (child_classifier_node_OID, child_classifier_node_label, child_classifier_node_numleafdocs, classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
430 child_classifier_nodes_iterator++;
431 }
432}
433
434
435void dynamicclassifieraction::display_classifier_node (text_t classifier_node_OID, text_t classifier_node_label,
436 text_t classifier_node_numleafdocs, int classifier_node_indent,
437 cgiargsclass &args, recptproto *collectproto,
438 browsermapclass *browsers, displayclass &disp,
439 outconvertclass &outconvert, ostream &textout,
440 ostream &logout)
441{
442 // Generate the ResultDocInfo_t containing the information for the classifier node
443 ResultDocInfo_t classifier_node;
444 classifier_node.OID = classifier_node_OID;
445 classifier_node.metadata["doctype"].values.push_back ("classify");
446 classifier_node.metadata["haschildren"].values.push_back ("1");
447 classifier_node.metadata["numleafdocs"].values.push_back (classifier_node_numleafdocs);
448 classifier_node.metadata["Title"].values.push_back (classifier_node_label);
449
450 // Get the format statement for this classifier if there is one, or use the browser's default otherwise
451 text_t formatstring;
452 text_t classifier_type = "VList";
453 browserclass *bptr = browsers->getbrowser (classifier_type);
454 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
455 if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
456 {
457 formatstring = bptr->get_default_formatstring();
458 }
459 format_t *formatlistptr = new format_t();
460 text_tset metadata;
461 bool getParents = false;
462 parse_formatstring (formatstring, formatlistptr, metadata, getParents);
463 bool use_table = is_table_content (formatlistptr);
464
465 // Display the classifier node
466 bptr->output_section_group (classifier_node, args, args["c"], classifier_node_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
467}
468
469
470void dynamicclassifieraction::display_document_nodes (FilterResponse_t documents_response, int document_nodes_indent,
471 cgiargsclass &args, recptproto *collectproto,
472 browsermapclass *browsers, displayclass &disp,
473 outconvertclass &outconvert, ostream &textout,
474 ostream &logout)
475{
476 // Get the format statement for this classifier if there is one, or use the browser's default otherwise
477 text_t formatstring;
478 text_t classifier_type = "VList";
479 browserclass *bptr = browsers->getbrowser (classifier_type);
480 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
481 if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
482 {
483 formatstring = bptr->get_default_formatstring();
484 }
485 format_t *formatlistptr = new format_t();
486 text_tset metadata;
487 bool getParents = false;
488 parse_formatstring (formatstring, formatlistptr, metadata, getParents);
489 bool use_table = is_table_content (formatlistptr);
490
491 // Request the necessary metadata for displaying the documents
492 text_tarray document_OIDs;
493 ResultDocInfo_tarray::iterator document_iterator = documents_response.docInfo.begin();
494 while (document_iterator != documents_response.docInfo.end())
495 {
496 document_OIDs.push_back ((*document_iterator).OID);
497 document_iterator++;
498 }
499 FilterResponse_t document_nodes_response;
500 get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_nodes_response, logout);
501
502 // Display the document nodes
503 bptr->output_section_group (document_nodes_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
504}
Note: See TracBrowser for help on using the repository browser.