source: gsdl/trunk/src/recpt/dynamicclassifieraction.cpp@ 15999

Last change on this file since 15999 was 15999, checked in by mdewsnip, 16 years ago

(Adding dynamic classifiers) Some modifications in preparation for allowing options to be specified to the dynamic classifiers in the collect.cfg file.

  • Property svn:executable set to *
File size: 17.5 KB
Line 
1/**********************************************************************
2 *
3 * dynamicclassifieraction.cpp --
4 * Copyright (C) 2008 DL Consulting Ltd
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "dynamicclassifieraction.h"
27#include "recptprototools.h"
28
29
30dynamicclassifieraction::dynamicclassifieraction ()
31{
32 recpt = NULL;
33
34 cgiarginfo arg_ainfo;
35 arg_ainfo.shortname = "dcl";
36 arg_ainfo.longname = "dynamic classifier ID";
37 arg_ainfo.multiplechar = true;
38 arg_ainfo.defaultstatus = cgiarginfo::weak;
39 arg_ainfo.argdefault = "";
40 arg_ainfo.savedarginfo = cgiarginfo::must;
41 argsinfo.addarginfo (NULL, arg_ainfo);
42
43 arg_ainfo.shortname = "dcn";
44 arg_ainfo.longname = "dynamic classifier node";
45 arg_ainfo.multiplechar = true;
46 arg_ainfo.defaultstatus = cgiarginfo::weak;
47 arg_ainfo.argdefault = "";
48 arg_ainfo.savedarginfo = cgiarginfo::must;
49 argsinfo.addarginfo (NULL, arg_ainfo);
50}
51
52
53dynamicclassifieraction::~dynamicclassifieraction()
54{
55}
56
57
58bool dynamicclassifieraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
59 recptprotolistclass *protos, ostream &logout)
60{
61 return true;
62}
63
64
65void dynamicclassifieraction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos,
66 response_t &response,text_t &response_data,
67 ostream &logout)
68{
69 response = content;
70 response_data = "text/html";
71}
72
73
74// define all the macros which might be used by other actions to produce pages.
75void dynamicclassifieraction::define_external_macros (displayclass &disp, cgiargsclass &args,
76 recptprotolistclass *protos, ostream &logout)
77{
78 // A valid collection server is vital
79 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
80 if (collectproto == NULL)
81 {
82 logout << "dynamicclassifieraction::define_external_macros called with NULL collectproto\n";
83 return;
84 }
85
86 // Define _dynamicclassifiernavbarentries_ to add buttons to the navigation bar for the dynamic classifiers
87 text_t navigation_bar_entries = "";
88 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
89 text_tmap::iterator dynamic_classifier_iterator = cinfo->dynamic_classifiers.begin();
90 while (dynamic_classifier_iterator != cinfo->dynamic_classifiers.end())
91 {
92 text_t dynamic_classifier_id = (*dynamic_classifier_iterator).first;
93 navigation_bar_entries += "_navbarspacer_";
94 navigation_bar_entries += "_navtab_(_gwcgi_?c=" + args["c"] + "&amp;a=dc&amp;dcl=" + dynamic_classifier_id + "," + dynamic_classifier_id;
95 if (args["a"] == "dc" && args["dcl"] == dynamic_classifier_id)
96 {
97 navigation_bar_entries += ",selected";
98 }
99 navigation_bar_entries += ")";
100 dynamic_classifier_iterator++;
101 }
102
103 disp.setmacro("dynamicclassifiernavbarentries", displayclass::defaultpackage, navigation_bar_entries);
104}
105
106
107// define all the macros which are related to pages generated
108// by this action. we also load up the formatinfo structure
109// here (it's used in do_action as well as here)
110void dynamicclassifieraction::define_internal_macros (displayclass &disp, cgiargsclass &args,
111 recptprotolistclass *protos, ostream &logout)
112{
113 // define_internal_macros sets the following macros:
114}
115
116
117bool dynamicclassifieraction::do_action(cgiargsclass &args, recptprotolistclass *protos,
118 browsermapclass *browsers, displayclass &disp,
119 outconvertclass &outconvert, ostream &textout,
120 ostream &logout)
121{
122 // A valid collection server is vital
123 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
124 if (collectproto == NULL)
125 {
126 logout << "dynamicclassifieraction::do_action called with NULL collectproto\n";
127 return false;
128 }
129
130 textout << outconvert << disp << "_dynamicclassifier:header_\n";
131 textout << outconvert << disp << "_dynamicclassifier:content_\n";
132
133 // Check a dynamic classifier ID has been specified
134 text_t arg_dcl = args["dcl"];
135 if (arg_dcl.empty())
136 {
137 textout << outconvert << disp << "Error: Missing dcl argument.\n";
138 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
139 return true;
140 }
141
142 // Check the dynamic classifier ID is valid (ie. there is an entry in the collect.cfg file for it)
143 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
144 if (cinfo->dynamic_classifiers.find(arg_dcl) == cinfo->dynamic_classifiers.end())
145 {
146 textout << outconvert << disp << "Error: Invalid dcl value \"" << arg_dcl << "\".\n";
147 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
148 return true;
149 }
150
151 // Parse the classifier options from the specification
152 text_t classifier_specification = cinfo->dynamic_classifiers[arg_dcl];
153 text_tmap classifier_options;
154
155 // The metadata element to classify by should be left after all the options have been parsed off
156 classifier_options["metadata_element_name"] = classifier_specification;
157
158 // Output the "<ID>Header" format statement if there is one
159 text_t classifier_header_format_statement = "";
160 get_formatstring (arg_dcl + "Header", cinfo->format, classifier_header_format_statement);
161 textout << outconvert << disp << classifier_header_format_statement << "\n";
162
163 // Output the dynamic classifier
164 if (args["dcn"].empty())
165 {
166 // Simple case for the top-level page
167 output_top_level_page (classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
168 }
169 else
170 {
171 // More complex case for an internal page
172 output_internal_page (classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
173 }
174
175 // Output the "<ID>Footer" format statement if there is one
176 text_t classifier_footer_format_statement = "";
177 get_formatstring (arg_dcl + "Footer", cinfo->format, classifier_footer_format_statement);
178 textout << outconvert << disp << classifier_footer_format_statement << "\n";
179
180 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
181 return true;
182}
183
184
185void dynamicclassifieraction::output_top_level_page (text_tmap classifier_options, cgiargsclass &args,
186 recptproto *collectproto, browsermapclass *browsers,
187 displayclass &disp, outconvertclass &outconvert,
188 ostream &textout, ostream &logout)
189{
190 // Get all the metadata values for the specified element (these become the classifier nodes at the top level)
191 text_t metadata_element_name = classifier_options["metadata_element_name"];
192 FilterResponse_t metadata_values_response;
193 get_metadata_values (metadata_element_name, "", args["c"], collectproto, metadata_values_response, logout);
194
195 // Deal with any hierarchical metadata values
196 map<text_t, int, lttext_t> classifier_nodes;
197 ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
198 while (metadata_value_iterator != metadata_values_response.docInfo.end())
199 {
200 // Is this metadata value hierarchical?
201 text_t metadata_value = (*metadata_value_iterator).OID;
202 text_t::iterator hierarchy_split_position = findchar(metadata_value.begin(), metadata_value.end(), '|');
203 if (hierarchy_split_position != metadata_value.end())
204 {
205 // Yes, so use the first part of the hierarchy only
206 metadata_value = substr(metadata_value.begin(), hierarchy_split_position);
207 }
208
209 // Create a node for this metadata value if we haven't seen it before
210 if (classifier_nodes.find(metadata_value) == classifier_nodes.end())
211 {
212 classifier_nodes[metadata_value] = 0;
213 }
214
215 // Increment the occurrence count
216 classifier_nodes[metadata_value] += (*metadata_value_iterator).result_num;
217
218 metadata_value_iterator++;
219 }
220
221 // Display the top-level classifier nodes
222 map<text_t, int, lttext_t>::iterator classifier_nodes_iterator = classifier_nodes.begin();
223 while (classifier_nodes_iterator != classifier_nodes.end())
224 {
225 text_t classifier_node_OID = (*classifier_nodes_iterator).first;
226 text_t classifier_node_label = (*classifier_nodes_iterator).first;
227 int classifier_node_numleafdocs = (*classifier_nodes_iterator).second;
228 output_classifier_node (classifier_node_OID, classifier_node_label, classifier_node_numleafdocs, 0, args, collectproto, browsers, disp, outconvert, textout, logout);
229 classifier_nodes_iterator++;
230 }
231}
232
233
234void dynamicclassifieraction::output_internal_page (text_tmap classifier_options, cgiargsclass &args,
235 recptproto *collectproto, browsermapclass *browsers,
236 displayclass &disp, outconvertclass &outconvert,
237 ostream &textout, ostream &logout)
238{
239 text_t arg_dcn = args["dcn"];
240 text_t classifier_node_OID = arg_dcn;
241 text_t classifier_node_metadata_value = arg_dcn;
242 int node_indent = 0;
243
244 // Get all the classifier nodes at this level
245 text_t metadata_element_name = classifier_options["metadata_element_name"];
246 text_t metadata_value_filter = classifier_node_metadata_value + "|*";
247 FilterResponse_t metadata_values_response;
248 get_metadata_values (metadata_element_name, metadata_value_filter, args["c"], collectproto, metadata_values_response, logout);
249
250 // Get all the documents at this level
251 FilterResponse_t document_OIDs_response;
252 get_documents_with_metadata_value (metadata_element_name, classifier_node_metadata_value, "dls.Title", args["c"], collectproto, document_OIDs_response, logout);
253
254 // Check there are some classifier nodes or some documents at this level, otherwise the "dcn" argument was invalid
255 if (metadata_values_response.docInfo.empty() && document_OIDs_response.docInfo.empty())
256 {
257 textout << outconvert << disp << "Error: Invalid dcn value \"" << arg_dcn << "\".\n";
258 return;
259 }
260
261 // Determine the parent classifier nodes
262 text_tarray parent_classifier_node_labels;
263 splitchar(classifier_node_OID.begin(), classifier_node_OID.end(), '|', parent_classifier_node_labels);
264 text_t classifier_node_label = parent_classifier_node_labels.back();
265 parent_classifier_node_labels.pop_back();
266
267 // Display the parent classifier nodes
268 text_t parent_classifier_node_OID = "";
269 text_tarray::iterator parent_classifier_node_labels_iterator = parent_classifier_node_labels.begin();
270 while (parent_classifier_node_labels_iterator != parent_classifier_node_labels.end())
271 {
272 parent_classifier_node_OID += (parent_classifier_node_OID != "" ? "|" : "");
273 parent_classifier_node_OID += *parent_classifier_node_labels_iterator;
274 text_t parent_classifier_node_label = *parent_classifier_node_labels_iterator;
275 text_t parent_classifier_node_numleafdocs = "?"; // We can't determine this without more database requests
276 output_classifier_node (parent_classifier_node_OID, parent_classifier_node_label, parent_classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
277 node_indent++;
278
279 parent_classifier_node_labels_iterator++;
280 }
281
282 // Display the selected classifier node
283 int classifier_node_numleafdocs = metadata_values_response.docInfo.size() + document_OIDs_response.docInfo.size();
284 output_classifier_node (classifier_node_OID, classifier_node_label, classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
285 node_indent++;
286
287 // Determine the child classifier nodes
288 map<text_t, int, lttext_t> child_classifier_nodes;
289 ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
290 while (metadata_value_iterator != metadata_values_response.docInfo.end())
291 {
292 text_t metadata_value = (*metadata_value_iterator).OID;
293
294 // If the classifier is hierarchical we need to remove the current position from the metadata value
295 if (starts_with(metadata_value, arg_dcn + "|"))
296 {
297 metadata_value = substr(metadata_value.begin() + (arg_dcn + "|").size(), metadata_value.end());
298 }
299
300 // Is this metadata value hierarchical?
301 text_t::iterator hierarchy_split_position = findchar(metadata_value.begin(), metadata_value.end(), '|');
302 if (hierarchy_split_position != metadata_value.end())
303 {
304 // Yes, so split off the first part of the hierarchy for the classifier node
305 metadata_value = substr(metadata_value.begin(), hierarchy_split_position);
306 }
307
308 // Create a node for this metadata value if we haven't seen it before
309 if (child_classifier_nodes.find(metadata_value) == child_classifier_nodes.end())
310 {
311 child_classifier_nodes[metadata_value] = 0;
312 }
313
314 // Increment the occurrence count
315 child_classifier_nodes[metadata_value] += (*metadata_value_iterator).result_num;
316
317 metadata_value_iterator++;
318 }
319
320 // Display the child classifier nodes
321 map<text_t, int, lttext_t>::iterator child_classifier_nodes_iterator = child_classifier_nodes.begin();
322 while (child_classifier_nodes_iterator != child_classifier_nodes.end())
323 {
324 text_t child_classifier_node_OID = classifier_node_OID + "|" + (*child_classifier_nodes_iterator).first;
325 text_t child_classifier_node_label = (*child_classifier_nodes_iterator).first;
326 int child_classifier_node_numleafdocs = (*child_classifier_nodes_iterator).second;
327 output_classifier_node (child_classifier_node_OID, child_classifier_node_label, child_classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
328 child_classifier_nodes_iterator++;
329 }
330
331 // Display the documents at this level
332 text_tarray document_OIDs;
333 ResultDocInfo_tarray::iterator document_OID_iterator = document_OIDs_response.docInfo.begin();
334 while (document_OID_iterator != document_OIDs_response.docInfo.end())
335 {
336 document_OIDs.push_back ((*document_OID_iterator).OID);
337 document_OID_iterator++;
338 }
339
340 output_document_nodes (document_OIDs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
341}
342
343
344void dynamicclassifieraction::output_classifier_node (text_t classifier_node_OID, text_t classifier_node_label,
345 text_t classifier_node_numleafdocs, int classifier_node_indent,
346 cgiargsclass &args, recptproto *collectproto,
347 browsermapclass *browsers, displayclass &disp,
348 outconvertclass &outconvert, ostream &textout,
349 ostream &logout)
350{
351 // Generate the ResultDocInfo_t containing the information for the classifier node
352 ResultDocInfo_t classifier_node;
353 classifier_node.OID = classifier_node_OID;
354 classifier_node.metadata["doctype"].values.push_back ("classify");
355 classifier_node.metadata["haschildren"].values.push_back ("1");
356 classifier_node.metadata["numleafdocs"].values.push_back (classifier_node_numleafdocs);
357 classifier_node.metadata["Title"].values.push_back (classifier_node_label);
358
359 // Get the format statement for this classifier if there is one, or use the browser's default otherwise
360 text_t formatstring;
361 text_t classifier_type = "VList";
362 browserclass *bptr = browsers->getbrowser (classifier_type);
363 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
364 if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
365 {
366 formatstring = bptr->get_default_formatstring();
367 }
368 format_t *formatlistptr = new format_t();
369 text_tset metadata;
370 bool getParents = false;
371 parse_formatstring (formatstring, formatlistptr, metadata, getParents);
372 bool use_table = is_table_content (formatlistptr);
373
374 // Display the classifier node
375 bptr->output_section_group (classifier_node, args, args["c"], classifier_node_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
376}
377
378
379void dynamicclassifieraction::output_document_nodes (text_tarray document_OIDs, int document_nodes_indent,
380 cgiargsclass &args, recptproto *collectproto,
381 browsermapclass *browsers, displayclass &disp,
382 outconvertclass &outconvert, ostream &textout,
383 ostream &logout)
384{
385 // Get the format statement for this classifier if there is one, or use the browser's default otherwise
386 text_t formatstring;
387 text_t classifier_type = "VList";
388 browserclass *bptr = browsers->getbrowser (classifier_type);
389 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
390 if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
391 {
392 formatstring = bptr->get_default_formatstring();
393 }
394 format_t *formatlistptr = new format_t();
395 text_tset metadata;
396 bool getParents = false;
397 parse_formatstring (formatstring, formatlistptr, metadata, getParents);
398 bool use_table = is_table_content (formatlistptr);
399
400 // Request the necessary metadata for the documents
401 FilterResponse_t document_OIDs_response;
402 get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_OIDs_response, logout);
403
404 // Display the document nodes
405 bptr->output_section_group (document_OIDs_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
406}
Note: See TracBrowser for help on using the repository browser.