source: gsdl/trunk/src/recpt/dynamicclassifieraction.cpp@ 16036

Last change on this file since 16036 was 16035, checked in by mdewsnip, 16 years ago

(Adding dynamic classifiers) Added support for the "-sort_documents_by" option.

  • Property svn:executable set to *
File size: 19.3 KB
Line 
1/**********************************************************************
2 *
3 * dynamicclassifieraction.cpp --
4 * Copyright (C) 2008 DL Consulting Ltd
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "dynamicclassifieraction.h"
27#include "recptprototools.h"
28
29
30dynamicclassifieraction::dynamicclassifieraction ()
31{
32 recpt = NULL;
33
34 cgiarginfo arg_ainfo;
35 arg_ainfo.shortname = "dcl";
36 arg_ainfo.longname = "dynamic classifier ID";
37 arg_ainfo.multiplechar = true;
38 arg_ainfo.defaultstatus = cgiarginfo::weak;
39 arg_ainfo.argdefault = "";
40 arg_ainfo.savedarginfo = cgiarginfo::must;
41 argsinfo.addarginfo (NULL, arg_ainfo);
42
43 arg_ainfo.shortname = "dcn";
44 arg_ainfo.longname = "dynamic classifier node";
45 arg_ainfo.multiplechar = true;
46 arg_ainfo.defaultstatus = cgiarginfo::weak;
47 arg_ainfo.argdefault = "";
48 arg_ainfo.savedarginfo = cgiarginfo::must;
49 argsinfo.addarginfo (NULL, arg_ainfo);
50}
51
52
53dynamicclassifieraction::~dynamicclassifieraction()
54{
55}
56
57
58bool dynamicclassifieraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
59 recptprotolistclass *protos, ostream &logout)
60{
61 return true;
62}
63
64
65void dynamicclassifieraction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos,
66 response_t &response,text_t &response_data,
67 ostream &logout)
68{
69 response = content;
70 response_data = "text/html";
71}
72
73
74// define all the macros which might be used by other actions to produce pages.
75void dynamicclassifieraction::define_external_macros (displayclass &disp, cgiargsclass &args,
76 recptprotolistclass *protos, ostream &logout)
77{
78 // A valid collection server is vital
79 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
80 if (collectproto == NULL)
81 {
82 logout << "dynamicclassifieraction::define_external_macros called with NULL collectproto\n";
83 return;
84 }
85
86 // Define _dynamicclassifiernavbarentries_ to add buttons to the navigation bar for the dynamic classifiers
87 text_t navigation_bar_entries = "";
88 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
89 text_tmap::iterator dynamic_classifier_iterator = cinfo->dynamic_classifiers.begin();
90 while (dynamic_classifier_iterator != cinfo->dynamic_classifiers.end())
91 {
92 text_t dynamic_classifier_id = (*dynamic_classifier_iterator).first;
93 navigation_bar_entries += "_navbarspacer_";
94 navigation_bar_entries += "_navtab_(_gwcgi_?c=" + args["c"] + "&amp;a=dc&amp;dcl=" + dynamic_classifier_id + "," + dynamic_classifier_id;
95 if (args["a"] == "dc" && args["dcl"] == dynamic_classifier_id)
96 {
97 navigation_bar_entries += ",selected";
98 }
99 navigation_bar_entries += ")";
100 dynamic_classifier_iterator++;
101 }
102
103 disp.setmacro("dynamicclassifiernavbarentries", displayclass::defaultpackage, navigation_bar_entries);
104}
105
106
107// define all the macros which are related to pages generated
108// by this action. we also load up the formatinfo structure
109// here (it's used in do_action as well as here)
110void dynamicclassifieraction::define_internal_macros (displayclass &disp, cgiargsclass &args,
111 recptprotolistclass *protos, ostream &logout)
112{
113 // define_internal_macros sets the following macros:
114}
115
116
117bool dynamicclassifieraction::do_action(cgiargsclass &args, recptprotolistclass *protos,
118 browsermapclass *browsers, displayclass &disp,
119 outconvertclass &outconvert, ostream &textout,
120 ostream &logout)
121{
122 // A valid collection server is vital
123 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
124 if (collectproto == NULL)
125 {
126 logout << "dynamicclassifieraction::do_action called with NULL collectproto\n";
127 return false;
128 }
129
130 textout << outconvert << disp << "_dynamicclassifier:header_\n";
131 textout << outconvert << disp << "_dynamicclassifier:content_\n";
132
133 // Check a dynamic classifier ID has been specified
134 text_t arg_dcl = args["dcl"];
135 if (arg_dcl.empty())
136 {
137 textout << outconvert << disp << "Error: Missing dcl argument.\n";
138 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
139 return true;
140 }
141
142 // Check the dynamic classifier ID is valid (ie. there is an entry in the collect.cfg file for it)
143 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
144 if (cinfo->dynamic_classifiers.find(arg_dcl) == cinfo->dynamic_classifiers.end())
145 {
146 textout << outconvert << disp << "Error: Invalid dcl value \"" << arg_dcl << "\".\n";
147 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
148 return true;
149 }
150
151 // Parse the classifier options from the specification
152 text_t classifier_specification = cinfo->dynamic_classifiers[arg_dcl];
153 text_tmap classifier_options = parse_classifier_options (classifier_specification, args);
154
155 // Output the "<ID>Header" format statement if there is one
156 text_t classifier_header_format_statement = "";
157 get_formatstring (arg_dcl + "Header", cinfo->format, classifier_header_format_statement);
158 textout << outconvert << disp << classifier_header_format_statement << "\n";
159
160 // Output the dynamic classifier
161 if (args["dcn"].empty())
162 {
163 // Simple case for the top-level page
164 output_top_level_page (classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
165 }
166 else
167 {
168 // More complex case for an internal page
169 output_internal_page (classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
170 }
171
172 // Output the "<ID>Footer" format statement if there is one
173 text_t classifier_footer_format_statement = "";
174 get_formatstring (arg_dcl + "Footer", cinfo->format, classifier_footer_format_statement);
175 textout << outconvert << disp << classifier_footer_format_statement << "\n";
176
177 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
178 return true;
179}
180
181
182text_tmap dynamicclassifieraction::parse_classifier_options (text_t classifier_specification, cgiargsclass &args)
183{
184 text_tmap classifier_options;
185 classifier_options["current_position"] = args["dcn"];
186
187 // Split the classifier specification string by spaces
188 text_tlist classifier_specification_parts;
189 splitchar (classifier_specification.begin(), classifier_specification.end(), ' ', classifier_specification_parts);
190
191 // The metadata element to classify by should be the first value
192 classifier_options["metadata_element_name"] = classifier_specification_parts.front();
193 classifier_specification_parts.pop_front();
194
195 // Parse options from the remainder of the classifier specification
196 while (!classifier_specification_parts.empty())
197 {
198 // Parse the option name
199 text_t classifier_option_name = classifier_specification_parts.front();
200 classifier_specification_parts.pop_front();
201
202 // Check if the option has a value (it may just be a flag, in which case we use "1" as the value)
203 text_t classifier_option_value = "1";
204 if (!classifier_specification_parts.empty() && !starts_with(classifier_specification_parts.front(), "-"))
205 {
206 classifier_option_value = classifier_specification_parts.front();
207 classifier_specification_parts.pop_front();
208 }
209
210 // Record the option
211 classifier_options[classifier_option_name] = classifier_option_value;
212 }
213
214 return classifier_options;
215}
216
217
218void dynamicclassifieraction::output_top_level_page (text_tmap classifier_options, cgiargsclass &args,
219 recptproto *collectproto, browsermapclass *browsers,
220 displayclass &disp, outconvertclass &outconvert,
221 ostream &textout, ostream &logout)
222{
223 // Get all the metadata values for the specified element (these become the classifier nodes at the top level)
224 text_t metadata_element_name = classifier_options["metadata_element_name"];
225 FilterResponse_t metadata_values_response;
226 bool request_success = get_metadata_values (metadata_element_name, "", args["c"], collectproto, metadata_values_response, logout);
227
228 // If the request failed then it's probably because the collection isn't using an SQL infodbtype
229 if (request_success == false)
230 {
231 textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
232 return;
233 }
234
235 // After processing any hierarchical metadata values, we're left with the top-level classifer nodes
236 map<text_t, int, lttext_t> classifier_nodes = process_metadata_values (classifier_options, metadata_values_response.docInfo);
237
238 // Display the top-level classifier nodes
239 map<text_t, int, lttext_t>::iterator classifier_nodes_iterator = classifier_nodes.begin();
240 while (classifier_nodes_iterator != classifier_nodes.end())
241 {
242 text_t classifier_node_OID = (*classifier_nodes_iterator).first;
243 text_t classifier_node_label = (*classifier_nodes_iterator).first;
244 int classifier_node_numleafdocs = (*classifier_nodes_iterator).second;
245 output_classifier_node (classifier_node_OID, classifier_node_label, classifier_node_numleafdocs, 0, args, collectproto, browsers, disp, outconvert, textout, logout);
246 classifier_nodes_iterator++;
247 }
248}
249
250
251void dynamicclassifieraction::output_internal_page (text_tmap classifier_options, cgiargsclass &args,
252 recptproto *collectproto, browsermapclass *browsers,
253 displayclass &disp, outconvertclass &outconvert,
254 ostream &textout, ostream &logout)
255{
256 text_t classifier_node_OID = classifier_options["current_position"];
257 text_t classifier_node_metadata_value = classifier_options["current_position"];
258 int node_indent = 0;
259
260 // Get all the classifier nodes at this level
261 text_t metadata_element_name = classifier_options["metadata_element_name"];
262 text_t metadata_value_filter = classifier_node_metadata_value + "|*";
263 FilterResponse_t metadata_values_response;
264 bool request_success = get_metadata_values (metadata_element_name, metadata_value_filter, args["c"], collectproto, metadata_values_response, logout);
265
266 // If the request failed then it's probably because the collection isn't using an SQL infodbtype
267 if (request_success == false)
268 {
269 textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
270 return;
271 }
272
273 // Get all the documents at this level
274 text_t sort_documents_by = "";
275 if (classifier_options.find("-sort_documents_by") != classifier_options.end())
276 {
277 sort_documents_by = classifier_options["-sort_documents_by"];
278 }
279 FilterResponse_t documents_response;
280 get_documents_with_metadata_value (metadata_element_name, classifier_node_metadata_value, sort_documents_by, args["c"], collectproto, documents_response, logout);
281
282 // If there are no classifier nodes or documents at this level then the classifier node value is invalid
283 if (metadata_values_response.docInfo.empty() && documents_response.docInfo.empty())
284 {
285 textout << outconvert << disp << "Error: Invalid classifier node \"" << classifier_node_OID << "\".\n";
286 return;
287 }
288
289 // Determine the parent classifier nodes
290 text_tarray parent_classifier_node_labels;
291 splitchar(classifier_node_OID.begin(), classifier_node_OID.end(), '|', parent_classifier_node_labels);
292 text_t classifier_node_label = parent_classifier_node_labels.back();
293 parent_classifier_node_labels.pop_back();
294
295 // Display the parent classifier nodes
296 text_t parent_classifier_node_OID = "";
297 text_tarray::iterator parent_classifier_node_labels_iterator = parent_classifier_node_labels.begin();
298 while (parent_classifier_node_labels_iterator != parent_classifier_node_labels.end())
299 {
300 parent_classifier_node_OID += (parent_classifier_node_OID != "" ? "|" : "");
301 parent_classifier_node_OID += *parent_classifier_node_labels_iterator;
302 text_t parent_classifier_node_label = *parent_classifier_node_labels_iterator;
303 text_t parent_classifier_node_numleafdocs = "?"; // We can't determine this without more database requests
304 output_classifier_node (parent_classifier_node_OID, parent_classifier_node_label, parent_classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
305 node_indent++;
306
307 parent_classifier_node_labels_iterator++;
308 }
309
310 // Display the selected classifier node
311 int classifier_node_numleafdocs = metadata_values_response.docInfo.size() + documents_response.docInfo.size();
312 output_classifier_node (classifier_node_OID, classifier_node_label, classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
313 node_indent++;
314
315 // After processing any hierarchical metadata values, we're left with the child classifer nodes
316 map<text_t, int, lttext_t> child_classifier_nodes = process_metadata_values (classifier_options, metadata_values_response.docInfo);
317
318 // Display the child classifier nodes
319 map<text_t, int, lttext_t>::iterator child_classifier_nodes_iterator = child_classifier_nodes.begin();
320 while (child_classifier_nodes_iterator != child_classifier_nodes.end())
321 {
322 text_t child_classifier_node_OID = classifier_node_OID + "|" + (*child_classifier_nodes_iterator).first;
323 text_t child_classifier_node_label = (*child_classifier_nodes_iterator).first;
324 int child_classifier_node_numleafdocs = (*child_classifier_nodes_iterator).second;
325 output_classifier_node (child_classifier_node_OID, child_classifier_node_label, child_classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
326 child_classifier_nodes_iterator++;
327 }
328
329 // Display the documents at this level
330 output_document_nodes (documents_response, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
331}
332
333
334map<text_t, int, lttext_t> dynamicclassifieraction::process_metadata_values (text_tmap classifier_options,
335 ResultDocInfo_tarray metadata_values)
336{
337 map<text_t, int, lttext_t> metadata_values_grouped;
338
339 text_t current_position = classifier_options["current_position"];
340 ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values.begin();
341 while (metadata_value_iterator != metadata_values.end())
342 {
343 text_t metadata_value = (*metadata_value_iterator).OID;
344
345 // If we're not at the top-level we need to remove the current position from the metadata values
346 if (current_position != "" && starts_with(metadata_value, current_position + "|"))
347 {
348 metadata_value = substr(metadata_value.begin() + (current_position + "|").size(), metadata_value.end());
349 }
350
351 // Is this metadata value hierarchical?
352 text_t::iterator hierarchy_split_position = findchar(metadata_value.begin(), metadata_value.end(), '|');
353 if (hierarchy_split_position != metadata_value.end())
354 {
355 // Yes, so use the first part of the hierarchy only
356 metadata_value = substr(metadata_value.begin(), hierarchy_split_position);
357 }
358
359 // Create a node for this metadata value if we haven't seen it before
360 if (metadata_values_grouped.find(metadata_value) == metadata_values_grouped.end())
361 {
362 metadata_values_grouped[metadata_value] = 0;
363 }
364
365 // Increment the occurrence count
366 metadata_values_grouped[metadata_value] += (*metadata_value_iterator).result_num;
367
368 metadata_value_iterator++;
369 }
370
371 return metadata_values_grouped;
372}
373
374
375void dynamicclassifieraction::output_classifier_node (text_t classifier_node_OID, text_t classifier_node_label,
376 text_t classifier_node_numleafdocs, int classifier_node_indent,
377 cgiargsclass &args, recptproto *collectproto,
378 browsermapclass *browsers, displayclass &disp,
379 outconvertclass &outconvert, ostream &textout,
380 ostream &logout)
381{
382 // Generate the ResultDocInfo_t containing the information for the classifier node
383 ResultDocInfo_t classifier_node;
384 classifier_node.OID = classifier_node_OID;
385 classifier_node.metadata["doctype"].values.push_back ("classify");
386 classifier_node.metadata["haschildren"].values.push_back ("1");
387 classifier_node.metadata["numleafdocs"].values.push_back (classifier_node_numleafdocs);
388 classifier_node.metadata["Title"].values.push_back (classifier_node_label);
389
390 // Get the format statement for this classifier if there is one, or use the browser's default otherwise
391 text_t formatstring;
392 text_t classifier_type = "VList";
393 browserclass *bptr = browsers->getbrowser (classifier_type);
394 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
395 if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
396 {
397 formatstring = bptr->get_default_formatstring();
398 }
399 format_t *formatlistptr = new format_t();
400 text_tset metadata;
401 bool getParents = false;
402 parse_formatstring (formatstring, formatlistptr, metadata, getParents);
403 bool use_table = is_table_content (formatlistptr);
404
405 // Display the classifier node
406 bptr->output_section_group (classifier_node, args, args["c"], classifier_node_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
407}
408
409
410void dynamicclassifieraction::output_document_nodes (FilterResponse_t documents_response, int document_nodes_indent,
411 cgiargsclass &args, recptproto *collectproto,
412 browsermapclass *browsers, displayclass &disp,
413 outconvertclass &outconvert, ostream &textout,
414 ostream &logout)
415{
416 // Get the format statement for this classifier if there is one, or use the browser's default otherwise
417 text_t formatstring;
418 text_t classifier_type = "VList";
419 browserclass *bptr = browsers->getbrowser (classifier_type);
420 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
421 if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
422 {
423 formatstring = bptr->get_default_formatstring();
424 }
425 format_t *formatlistptr = new format_t();
426 text_tset metadata;
427 bool getParents = false;
428 parse_formatstring (formatstring, formatlistptr, metadata, getParents);
429 bool use_table = is_table_content (formatlistptr);
430
431 // Request the necessary metadata for displaying the documents
432 text_tarray document_OIDs;
433 ResultDocInfo_tarray::iterator document_iterator = documents_response.docInfo.begin();
434 while (document_iterator != documents_response.docInfo.end())
435 {
436 document_OIDs.push_back ((*document_iterator).OID);
437 document_iterator++;
438 }
439 FilterResponse_t document_nodes_response;
440 get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_nodes_response, logout);
441
442 // Display the document nodes
443 bptr->output_section_group (document_nodes_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
444}
Note: See TracBrowser for help on using the repository browser.