source: gsdl/trunk/src/recpt/dynamicclassifieraction.cpp@ 16004

Last change on this file since 16004 was 16004, checked in by mdewsnip, 16 years ago

(Adding dynamic classifiers) Moved some common code out of output_top_level_page() and output_internal_page() into a new process_metadata_values() function, in preparation for adding special date support.

  • Property svn:executable set to *
File size: 17.1 KB
Line 
1/**********************************************************************
2 *
3 * dynamicclassifieraction.cpp --
4 * Copyright (C) 2008 DL Consulting Ltd
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "dynamicclassifieraction.h"
27#include "recptprototools.h"
28
29
30dynamicclassifieraction::dynamicclassifieraction ()
31{
32 recpt = NULL;
33
34 cgiarginfo arg_ainfo;
35 arg_ainfo.shortname = "dcl";
36 arg_ainfo.longname = "dynamic classifier ID";
37 arg_ainfo.multiplechar = true;
38 arg_ainfo.defaultstatus = cgiarginfo::weak;
39 arg_ainfo.argdefault = "";
40 arg_ainfo.savedarginfo = cgiarginfo::must;
41 argsinfo.addarginfo (NULL, arg_ainfo);
42
43 arg_ainfo.shortname = "dcn";
44 arg_ainfo.longname = "dynamic classifier node";
45 arg_ainfo.multiplechar = true;
46 arg_ainfo.defaultstatus = cgiarginfo::weak;
47 arg_ainfo.argdefault = "";
48 arg_ainfo.savedarginfo = cgiarginfo::must;
49 argsinfo.addarginfo (NULL, arg_ainfo);
50}
51
52
53dynamicclassifieraction::~dynamicclassifieraction()
54{
55}
56
57
58bool dynamicclassifieraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
59 recptprotolistclass *protos, ostream &logout)
60{
61 return true;
62}
63
64
65void dynamicclassifieraction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos,
66 response_t &response,text_t &response_data,
67 ostream &logout)
68{
69 response = content;
70 response_data = "text/html";
71}
72
73
74// define all the macros which might be used by other actions to produce pages.
75void dynamicclassifieraction::define_external_macros (displayclass &disp, cgiargsclass &args,
76 recptprotolistclass *protos, ostream &logout)
77{
78 // A valid collection server is vital
79 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
80 if (collectproto == NULL)
81 {
82 logout << "dynamicclassifieraction::define_external_macros called with NULL collectproto\n";
83 return;
84 }
85
86 // Define _dynamicclassifiernavbarentries_ to add buttons to the navigation bar for the dynamic classifiers
87 text_t navigation_bar_entries = "";
88 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
89 text_tmap::iterator dynamic_classifier_iterator = cinfo->dynamic_classifiers.begin();
90 while (dynamic_classifier_iterator != cinfo->dynamic_classifiers.end())
91 {
92 text_t dynamic_classifier_id = (*dynamic_classifier_iterator).first;
93 navigation_bar_entries += "_navbarspacer_";
94 navigation_bar_entries += "_navtab_(_gwcgi_?c=" + args["c"] + "&amp;a=dc&amp;dcl=" + dynamic_classifier_id + "," + dynamic_classifier_id;
95 if (args["a"] == "dc" && args["dcl"] == dynamic_classifier_id)
96 {
97 navigation_bar_entries += ",selected";
98 }
99 navigation_bar_entries += ")";
100 dynamic_classifier_iterator++;
101 }
102
103 disp.setmacro("dynamicclassifiernavbarentries", displayclass::defaultpackage, navigation_bar_entries);
104}
105
106
107// define all the macros which are related to pages generated
108// by this action. we also load up the formatinfo structure
109// here (it's used in do_action as well as here)
110void dynamicclassifieraction::define_internal_macros (displayclass &disp, cgiargsclass &args,
111 recptprotolistclass *protos, ostream &logout)
112{
113 // define_internal_macros sets the following macros:
114}
115
116
117bool dynamicclassifieraction::do_action(cgiargsclass &args, recptprotolistclass *protos,
118 browsermapclass *browsers, displayclass &disp,
119 outconvertclass &outconvert, ostream &textout,
120 ostream &logout)
121{
122 // A valid collection server is vital
123 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
124 if (collectproto == NULL)
125 {
126 logout << "dynamicclassifieraction::do_action called with NULL collectproto\n";
127 return false;
128 }
129
130 textout << outconvert << disp << "_dynamicclassifier:header_\n";
131 textout << outconvert << disp << "_dynamicclassifier:content_\n";
132
133 // Check a dynamic classifier ID has been specified
134 text_t arg_dcl = args["dcl"];
135 if (arg_dcl.empty())
136 {
137 textout << outconvert << disp << "Error: Missing dcl argument.\n";
138 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
139 return true;
140 }
141
142 // Check the dynamic classifier ID is valid (ie. there is an entry in the collect.cfg file for it)
143 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
144 if (cinfo->dynamic_classifiers.find(arg_dcl) == cinfo->dynamic_classifiers.end())
145 {
146 textout << outconvert << disp << "Error: Invalid dcl value \"" << arg_dcl << "\".\n";
147 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
148 return true;
149 }
150
151 // Parse the classifier options from the specification
152 text_t classifier_specification = cinfo->dynamic_classifiers[arg_dcl];
153 text_tmap classifier_options;
154
155 // The metadata element to classify by should be left after all the options have been parsed off
156 classifier_options["metadata_element_name"] = classifier_specification;
157 classifier_options["current_position"] = args["dcn"];
158
159 // Output the "<ID>Header" format statement if there is one
160 text_t classifier_header_format_statement = "";
161 get_formatstring (arg_dcl + "Header", cinfo->format, classifier_header_format_statement);
162 textout << outconvert << disp << classifier_header_format_statement << "\n";
163
164 // Output the dynamic classifier
165 if (args["dcn"].empty())
166 {
167 // Simple case for the top-level page
168 output_top_level_page (classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
169 }
170 else
171 {
172 // More complex case for an internal page
173 output_internal_page (classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
174 }
175
176 // Output the "<ID>Footer" format statement if there is one
177 text_t classifier_footer_format_statement = "";
178 get_formatstring (arg_dcl + "Footer", cinfo->format, classifier_footer_format_statement);
179 textout << outconvert << disp << classifier_footer_format_statement << "\n";
180
181 textout << outconvert << disp << "_dynamicclassifier:footer_\n";
182 return true;
183}
184
185
186void dynamicclassifieraction::output_top_level_page (text_tmap classifier_options, cgiargsclass &args,
187 recptproto *collectproto, browsermapclass *browsers,
188 displayclass &disp, outconvertclass &outconvert,
189 ostream &textout, ostream &logout)
190{
191 // Get all the metadata values for the specified element (these become the classifier nodes at the top level)
192 text_t metadata_element_name = classifier_options["metadata_element_name"];
193 FilterResponse_t metadata_values_response;
194 get_metadata_values (metadata_element_name, "", args["c"], collectproto, metadata_values_response, logout);
195
196 // After processing any hierarchical metadata values, we're left with the top-level classifer nodes
197 map<text_t, int, lttext_t> classifier_nodes = process_metadata_values (classifier_options, metadata_values_response.docInfo);
198
199 // Display the top-level classifier nodes
200 map<text_t, int, lttext_t>::iterator classifier_nodes_iterator = classifier_nodes.begin();
201 while (classifier_nodes_iterator != classifier_nodes.end())
202 {
203 text_t classifier_node_OID = (*classifier_nodes_iterator).first;
204 text_t classifier_node_label = (*classifier_nodes_iterator).first;
205 int classifier_node_numleafdocs = (*classifier_nodes_iterator).second;
206 output_classifier_node (classifier_node_OID, classifier_node_label, classifier_node_numleafdocs, 0, args, collectproto, browsers, disp, outconvert, textout, logout);
207 classifier_nodes_iterator++;
208 }
209}
210
211
212void dynamicclassifieraction::output_internal_page (text_tmap classifier_options, cgiargsclass &args,
213 recptproto *collectproto, browsermapclass *browsers,
214 displayclass &disp, outconvertclass &outconvert,
215 ostream &textout, ostream &logout)
216{
217 text_t arg_dcn = args["dcn"];
218 text_t classifier_node_OID = arg_dcn;
219 text_t classifier_node_metadata_value = arg_dcn;
220 int node_indent = 0;
221
222 // Get all the classifier nodes at this level
223 text_t metadata_element_name = classifier_options["metadata_element_name"];
224 text_t metadata_value_filter = classifier_node_metadata_value + "|*";
225 FilterResponse_t metadata_values_response;
226 get_metadata_values (metadata_element_name, metadata_value_filter, args["c"], collectproto, metadata_values_response, logout);
227
228 // Get all the documents at this level
229 FilterResponse_t document_OIDs_response;
230 get_documents_with_metadata_value (metadata_element_name, classifier_node_metadata_value, "dls.Title", args["c"], collectproto, document_OIDs_response, logout);
231
232 // Check there are some classifier nodes or some documents at this level, otherwise the "dcn" argument was invalid
233 if (metadata_values_response.docInfo.empty() && document_OIDs_response.docInfo.empty())
234 {
235 textout << outconvert << disp << "Error: Invalid dcn value \"" << arg_dcn << "\".\n";
236 return;
237 }
238
239 // Determine the parent classifier nodes
240 text_tarray parent_classifier_node_labels;
241 splitchar(classifier_node_OID.begin(), classifier_node_OID.end(), '|', parent_classifier_node_labels);
242 text_t classifier_node_label = parent_classifier_node_labels.back();
243 parent_classifier_node_labels.pop_back();
244
245 // Display the parent classifier nodes
246 text_t parent_classifier_node_OID = "";
247 text_tarray::iterator parent_classifier_node_labels_iterator = parent_classifier_node_labels.begin();
248 while (parent_classifier_node_labels_iterator != parent_classifier_node_labels.end())
249 {
250 parent_classifier_node_OID += (parent_classifier_node_OID != "" ? "|" : "");
251 parent_classifier_node_OID += *parent_classifier_node_labels_iterator;
252 text_t parent_classifier_node_label = *parent_classifier_node_labels_iterator;
253 text_t parent_classifier_node_numleafdocs = "?"; // We can't determine this without more database requests
254 output_classifier_node (parent_classifier_node_OID, parent_classifier_node_label, parent_classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
255 node_indent++;
256
257 parent_classifier_node_labels_iterator++;
258 }
259
260 // Display the selected classifier node
261 int classifier_node_numleafdocs = metadata_values_response.docInfo.size() + document_OIDs_response.docInfo.size();
262 output_classifier_node (classifier_node_OID, classifier_node_label, classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
263 node_indent++;
264
265 // After processing any hierarchical metadata values, we're left with the child classifer nodes
266 map<text_t, int, lttext_t> child_classifier_nodes = process_metadata_values (classifier_options, metadata_values_response.docInfo);
267
268 // Display the child classifier nodes
269 map<text_t, int, lttext_t>::iterator child_classifier_nodes_iterator = child_classifier_nodes.begin();
270 while (child_classifier_nodes_iterator != child_classifier_nodes.end())
271 {
272 text_t child_classifier_node_OID = classifier_node_OID + "|" + (*child_classifier_nodes_iterator).first;
273 text_t child_classifier_node_label = (*child_classifier_nodes_iterator).first;
274 int child_classifier_node_numleafdocs = (*child_classifier_nodes_iterator).second;
275 output_classifier_node (child_classifier_node_OID, child_classifier_node_label, child_classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
276 child_classifier_nodes_iterator++;
277 }
278
279 // Display the documents at this level
280 text_tarray document_OIDs;
281 ResultDocInfo_tarray::iterator document_OID_iterator = document_OIDs_response.docInfo.begin();
282 while (document_OID_iterator != document_OIDs_response.docInfo.end())
283 {
284 document_OIDs.push_back ((*document_OID_iterator).OID);
285 document_OID_iterator++;
286 }
287
288 output_document_nodes (document_OIDs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
289}
290
291
292map<text_t, int, lttext_t> dynamicclassifieraction::process_metadata_values (text_tmap classifier_options,
293 ResultDocInfo_tarray metadata_values)
294{
295 map<text_t, int, lttext_t> metadata_values_grouped;
296
297 text_t current_position = classifier_options["current_position"];
298 ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values.begin();
299 while (metadata_value_iterator != metadata_values.end())
300 {
301 text_t metadata_value = (*metadata_value_iterator).OID;
302
303 // If we're not at the top-level we need to remove the current position from the metadata values
304 if (current_position != "" && starts_with(metadata_value, current_position + "|"))
305 {
306 metadata_value = substr(metadata_value.begin() + (current_position + "|").size(), metadata_value.end());
307 }
308
309 // Is this metadata value hierarchical?
310 text_t::iterator hierarchy_split_position = findchar(metadata_value.begin(), metadata_value.end(), '|');
311 if (hierarchy_split_position != metadata_value.end())
312 {
313 // Yes, so use the first part of the hierarchy only
314 metadata_value = substr(metadata_value.begin(), hierarchy_split_position);
315 }
316
317 // Create a node for this metadata value if we haven't seen it before
318 if (metadata_values_grouped.find(metadata_value) == metadata_values_grouped.end())
319 {
320 metadata_values_grouped[metadata_value] = 0;
321 }
322
323 // Increment the occurrence count
324 metadata_values_grouped[metadata_value] += (*metadata_value_iterator).result_num;
325
326 metadata_value_iterator++;
327 }
328
329 return metadata_values_grouped;
330}
331
332
333void dynamicclassifieraction::output_classifier_node (text_t classifier_node_OID, text_t classifier_node_label,
334 text_t classifier_node_numleafdocs, int classifier_node_indent,
335 cgiargsclass &args, recptproto *collectproto,
336 browsermapclass *browsers, displayclass &disp,
337 outconvertclass &outconvert, ostream &textout,
338 ostream &logout)
339{
340 // Generate the ResultDocInfo_t containing the information for the classifier node
341 ResultDocInfo_t classifier_node;
342 classifier_node.OID = classifier_node_OID;
343 classifier_node.metadata["doctype"].values.push_back ("classify");
344 classifier_node.metadata["haschildren"].values.push_back ("1");
345 classifier_node.metadata["numleafdocs"].values.push_back (classifier_node_numleafdocs);
346 classifier_node.metadata["Title"].values.push_back (classifier_node_label);
347
348 // Get the format statement for this classifier if there is one, or use the browser's default otherwise
349 text_t formatstring;
350 text_t classifier_type = "VList";
351 browserclass *bptr = browsers->getbrowser (classifier_type);
352 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
353 if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
354 {
355 formatstring = bptr->get_default_formatstring();
356 }
357 format_t *formatlistptr = new format_t();
358 text_tset metadata;
359 bool getParents = false;
360 parse_formatstring (formatstring, formatlistptr, metadata, getParents);
361 bool use_table = is_table_content (formatlistptr);
362
363 // Display the classifier node
364 bptr->output_section_group (classifier_node, args, args["c"], classifier_node_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
365}
366
367
368void dynamicclassifieraction::output_document_nodes (text_tarray document_OIDs, int document_nodes_indent,
369 cgiargsclass &args, recptproto *collectproto,
370 browsermapclass *browsers, displayclass &disp,
371 outconvertclass &outconvert, ostream &textout,
372 ostream &logout)
373{
374 // Get the format statement for this classifier if there is one, or use the browser's default otherwise
375 text_t formatstring;
376 text_t classifier_type = "VList";
377 browserclass *bptr = browsers->getbrowser (classifier_type);
378 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
379 if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
380 {
381 formatstring = bptr->get_default_formatstring();
382 }
383 format_t *formatlistptr = new format_t();
384 text_tset metadata;
385 bool getParents = false;
386 parse_formatstring (formatstring, formatlistptr, metadata, getParents);
387 bool use_table = is_table_content (formatlistptr);
388
389 // Request the necessary metadata for the documents
390 FilterResponse_t document_OIDs_response;
391 get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_OIDs_response, logout);
392
393 // Display the document nodes
394 bptr->output_section_group (document_OIDs_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
395}
Note: See TracBrowser for help on using the repository browser.