1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * dynamicclassifieraction.cpp --
|
---|
4 | * Copyright (C) 2008 DL Consulting Ltd
|
---|
5 | *
|
---|
6 | * A component of the Greenstone digital library software
|
---|
7 | * from the New Zealand Digital Library Project at the
|
---|
8 | * University of Waikato, New Zealand.
|
---|
9 | *
|
---|
10 | * This program is free software; you can redistribute it and/or modify
|
---|
11 | * it under the terms of the GNU General Public License as published by
|
---|
12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
13 | * (at your option) any later version.
|
---|
14 | *
|
---|
15 | * This program is distributed in the hope that it will be useful,
|
---|
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | * GNU General Public License for more details.
|
---|
19 | *
|
---|
20 | * You should have received a copy of the GNU General Public License
|
---|
21 | * along with this program; if not, write to the Free Software
|
---|
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
23 | *
|
---|
24 | *********************************************************************/
|
---|
25 |
|
---|
26 | #include "dynamicclassifieraction.h"
|
---|
27 | #include "recptprototools.h"
|
---|
28 |
|
---|
29 |
|
---|
30 | dynamicclassifieraction::dynamicclassifieraction ()
|
---|
31 | {
|
---|
32 | recpt = NULL;
|
---|
33 |
|
---|
34 | cgiarginfo arg_ainfo;
|
---|
35 | arg_ainfo.shortname = "dcl";
|
---|
36 | arg_ainfo.longname = "dynamic classifier ID";
|
---|
37 | arg_ainfo.multiplechar = true;
|
---|
38 | arg_ainfo.defaultstatus = cgiarginfo::weak;
|
---|
39 | arg_ainfo.argdefault = "";
|
---|
40 | arg_ainfo.savedarginfo = cgiarginfo::must;
|
---|
41 | argsinfo.addarginfo (NULL, arg_ainfo);
|
---|
42 |
|
---|
43 | arg_ainfo.shortname = "dcn";
|
---|
44 | arg_ainfo.longname = "dynamic classifier node";
|
---|
45 | arg_ainfo.multiplechar = true;
|
---|
46 | arg_ainfo.defaultstatus = cgiarginfo::weak;
|
---|
47 | arg_ainfo.argdefault = "";
|
---|
48 | arg_ainfo.savedarginfo = cgiarginfo::must;
|
---|
49 | argsinfo.addarginfo (NULL, arg_ainfo);
|
---|
50 | }
|
---|
51 |
|
---|
52 |
|
---|
53 | dynamicclassifieraction::~dynamicclassifieraction()
|
---|
54 | {
|
---|
55 | }
|
---|
56 |
|
---|
57 |
|
---|
58 | bool dynamicclassifieraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
|
---|
59 | recptprotolistclass *protos, ostream &logout)
|
---|
60 | {
|
---|
61 | return true;
|
---|
62 | }
|
---|
63 |
|
---|
64 |
|
---|
65 | void dynamicclassifieraction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos,
|
---|
66 | response_t &response,text_t &response_data,
|
---|
67 | ostream &logout)
|
---|
68 | {
|
---|
69 | response = content;
|
---|
70 | response_data = "text/html";
|
---|
71 | }
|
---|
72 |
|
---|
73 |
|
---|
74 | // define all the macros which might be used by other actions to produce pages.
|
---|
75 | void dynamicclassifieraction::define_external_macros (displayclass &disp, cgiargsclass &args,
|
---|
76 | recptprotolistclass *protos, ostream &logout)
|
---|
77 | {
|
---|
78 | // A valid collection server is vital
|
---|
79 | recptproto *collectproto = protos->getrecptproto (args["c"], logout);
|
---|
80 | if (collectproto == NULL)
|
---|
81 | {
|
---|
82 | logout << "dynamicclassifieraction::define_external_macros called with NULL collectproto\n";
|
---|
83 | return;
|
---|
84 | }
|
---|
85 |
|
---|
86 | // Define _dynamicclassifiernavbarentries_ to add buttons to the navigation bar for the dynamic classifiers
|
---|
87 | text_t navigation_bar_entries = "";
|
---|
88 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
89 | text_tmap::iterator dynamic_classifier_iterator = cinfo->dynamic_classifiers.begin();
|
---|
90 | while (dynamic_classifier_iterator != cinfo->dynamic_classifiers.end())
|
---|
91 | {
|
---|
92 | text_t dynamic_classifier_id = (*dynamic_classifier_iterator).first;
|
---|
93 | navigation_bar_entries += "_navbarspacer_";
|
---|
94 | navigation_bar_entries += "_navtab_(_gwcgi_?c=" + args["c"] + "&a=dc&dcl=" + dynamic_classifier_id + "," + dynamic_classifier_id;
|
---|
95 | if (args["a"] == "dc" && args["dcl"] == dynamic_classifier_id)
|
---|
96 | {
|
---|
97 | navigation_bar_entries += ",selected";
|
---|
98 | }
|
---|
99 | navigation_bar_entries += ")";
|
---|
100 | dynamic_classifier_iterator++;
|
---|
101 | }
|
---|
102 |
|
---|
103 | disp.setmacro("dynamicclassifiernavbarentries", displayclass::defaultpackage, navigation_bar_entries);
|
---|
104 | }
|
---|
105 |
|
---|
106 |
|
---|
107 | // define all the macros which are related to pages generated
|
---|
108 | // by this action. we also load up the formatinfo structure
|
---|
109 | // here (it's used in do_action as well as here)
|
---|
110 | void dynamicclassifieraction::define_internal_macros (displayclass &disp, cgiargsclass &args,
|
---|
111 | recptprotolistclass *protos, ostream &logout)
|
---|
112 | {
|
---|
113 | // define_internal_macros sets the following macros:
|
---|
114 | }
|
---|
115 |
|
---|
116 |
|
---|
117 | bool dynamicclassifieraction::do_action(cgiargsclass &args, recptprotolistclass *protos,
|
---|
118 | browsermapclass *browsers, displayclass &disp,
|
---|
119 | outconvertclass &outconvert, ostream &textout,
|
---|
120 | ostream &logout)
|
---|
121 | {
|
---|
122 | // A valid collection server is vital
|
---|
123 | recptproto *collectproto = protos->getrecptproto (args["c"], logout);
|
---|
124 | if (collectproto == NULL)
|
---|
125 | {
|
---|
126 | logout << "dynamicclassifieraction::do_action called with NULL collectproto\n";
|
---|
127 | return false;
|
---|
128 | }
|
---|
129 |
|
---|
130 | textout << outconvert << disp << "_dynamicclassifier:header_\n";
|
---|
131 | textout << outconvert << disp << "_dynamicclassifier:content_\n";
|
---|
132 |
|
---|
133 | // Check a dynamic classifier ID has been specified
|
---|
134 | text_t arg_dcl = args["dcl"];
|
---|
135 | if (arg_dcl.empty())
|
---|
136 | {
|
---|
137 | textout << outconvert << disp << "Error: Missing dcl argument.\n";
|
---|
138 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
139 | return true;
|
---|
140 | }
|
---|
141 |
|
---|
142 | // Check the dynamic classifier ID is valid (ie. there is an entry in the collect.cfg file for it)
|
---|
143 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
144 | if (cinfo->dynamic_classifiers.find(arg_dcl) == cinfo->dynamic_classifiers.end())
|
---|
145 | {
|
---|
146 | textout << outconvert << disp << "Error: Invalid dcl value \"" << arg_dcl << "\".\n";
|
---|
147 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
148 | return true;
|
---|
149 | }
|
---|
150 |
|
---|
151 | // Parse the classifier options from the specification
|
---|
152 | text_t classifier_specification = cinfo->dynamic_classifiers[arg_dcl];
|
---|
153 | text_tmap classifier_options = parse_classifier_options (classifier_specification, args);
|
---|
154 |
|
---|
155 | // Output the "<ID>Header" format statement if there is one
|
---|
156 | text_t classifier_header_format_statement = "";
|
---|
157 | get_formatstring (arg_dcl + "Header", cinfo->format, classifier_header_format_statement);
|
---|
158 | textout << outconvert << disp << classifier_header_format_statement << "\n";
|
---|
159 |
|
---|
160 | // Resolve any ".pr" bits at the end of the "dcn" argument
|
---|
161 | text_t classifier_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
162 | if (ends_with (args["dcn"], ".pr"))
|
---|
163 | {
|
---|
164 | // Change the "dcn" argument to be the OID of the parent of the specified classifier node
|
---|
165 | text_tlist args_dcn_parts;
|
---|
166 | splitword (args["dcn"].begin(), args["dcn"].end(), classifier_node_separator, args_dcn_parts);
|
---|
167 | args_dcn_parts.pop_back(); // Remove the last part
|
---|
168 | joinchar (args_dcn_parts, classifier_node_separator, args["dcn"]);
|
---|
169 | }
|
---|
170 |
|
---|
171 | // Prepare to output the dynamic classifier
|
---|
172 | text_t current_classifier_node_OID = "";
|
---|
173 | text_t current_metadata_value_filter = "";
|
---|
174 | int classifier_node_indent = 0;
|
---|
175 |
|
---|
176 | // (Optional) Output an hlist to group the classifier nodes into buckets based on the first character
|
---|
177 | if (classifier_options["-group_by_first_character"] == "1")
|
---|
178 | {
|
---|
179 | text_t selected_grouping_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "substr(value,1,1)", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
180 |
|
---|
181 | current_classifier_node_OID = selected_grouping_node_OID + ".";
|
---|
182 | current_metadata_value_filter = selected_grouping_node_OID + "*";
|
---|
183 | }
|
---|
184 |
|
---|
185 | // (Optional) Output an hlist instead of a vlist at the top level
|
---|
186 | if (classifier_options["-use_hlist_at_top"] == "1")
|
---|
187 | {
|
---|
188 | text_t selected_hlist_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
189 |
|
---|
190 | current_classifier_node_OID = selected_hlist_node_OID + classifier_node_separator;
|
---|
191 | }
|
---|
192 |
|
---|
193 | // args["dcn"] may have been modified by output_hlist_classifier_nodes() above
|
---|
194 | text_t classifier_node_OID = args["dcn"];
|
---|
195 |
|
---|
196 | // If grouping is enabled remove the grouping node bit from the start of the OID
|
---|
197 | text_t classifier_node_OID_sans_grouping = classifier_node_OID;
|
---|
198 | if (classifier_options["-group_by_first_character"] == "1")
|
---|
199 | {
|
---|
200 | text_t::iterator grouping_node_separator = findchar (classifier_node_OID.begin(), classifier_node_OID.end(), '.');
|
---|
201 | if (grouping_node_separator != classifier_node_OID.end())
|
---|
202 | {
|
---|
203 | classifier_node_OID_sans_grouping = substr (grouping_node_separator + 1, classifier_node_OID.end());
|
---|
204 | }
|
---|
205 | else
|
---|
206 | {
|
---|
207 | classifier_node_OID_sans_grouping = "";
|
---|
208 | }
|
---|
209 | }
|
---|
210 |
|
---|
211 | // Split the classifier node OID into its hierarchical parts, then remove any we've already dealt with (HLists)
|
---|
212 | text_tlist classifier_node_OID_parts_remaining;
|
---|
213 | splitword (classifier_node_OID_sans_grouping.begin(), classifier_node_OID_sans_grouping.end(), classifier_node_separator, classifier_node_OID_parts_remaining);
|
---|
214 | if (classifier_options["-use_hlist_at_top"] == "1")
|
---|
215 | {
|
---|
216 | classifier_node_OID_parts_remaining.pop_front();
|
---|
217 | }
|
---|
218 |
|
---|
219 | text_t classifier_node_metadata_value = classifier_node_OID_sans_grouping;
|
---|
220 | text_t classifier_node_metadata_value_filter = classifier_node_metadata_value + classifier_node_separator + "*";
|
---|
221 |
|
---|
222 | // If "-split_greenstone_dates" is on the metadata value and filter doesn't quite match the OID -- fix this
|
---|
223 | if (classifier_options["-split_greenstone_dates"] == "1")
|
---|
224 | {
|
---|
225 | classifier_node_metadata_value.replace (classifier_node_separator, "");
|
---|
226 | classifier_node_metadata_value_filter = classifier_node_metadata_value;
|
---|
227 | if (classifier_node_metadata_value_filter.size() == 8)
|
---|
228 | {
|
---|
229 | classifier_node_metadata_value_filter += classifier_node_separator;
|
---|
230 | }
|
---|
231 | classifier_node_metadata_value_filter += "*";
|
---|
232 | }
|
---|
233 |
|
---|
234 | // Simple case at the top level: just output the child classifier nodes
|
---|
235 | if (classifier_node_metadata_value == "")
|
---|
236 | {
|
---|
237 | output_child_classifier_nodes (current_classifier_node_OID, "", current_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
238 | }
|
---|
239 |
|
---|
240 | // More complex case below the top level
|
---|
241 | else
|
---|
242 | {
|
---|
243 | // Output the parent classifier nodes and the current classifier node
|
---|
244 | output_upper_classifier_nodes (current_classifier_node_OID, classifier_node_OID_parts_remaining, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
245 |
|
---|
246 | // Output the child classifier nodes
|
---|
247 | current_classifier_node_OID = classifier_node_OID + classifier_node_separator;
|
---|
248 | output_child_classifier_nodes (current_classifier_node_OID, classifier_node_metadata_value, classifier_node_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
249 |
|
---|
250 | // Get the document nodes at this level
|
---|
251 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
252 | text_t sort_documents_by = classifier_options["-sort_documents_by"];
|
---|
253 | FilterResponse_t documents_response;
|
---|
254 | get_documents_with_metadata_value (metadata_elements, classifier_node_metadata_value, sort_documents_by, args["c"], collectproto, documents_response, logout);
|
---|
255 |
|
---|
256 | // Display the document nodes
|
---|
257 | display_document_nodes (documents_response, classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
258 | }
|
---|
259 |
|
---|
260 | // Output the "<ID>Footer" format statement if there is one
|
---|
261 | text_t classifier_footer_format_statement = "";
|
---|
262 | get_formatstring (arg_dcl + "Footer", cinfo->format, classifier_footer_format_statement);
|
---|
263 | textout << outconvert << disp << classifier_footer_format_statement << "\n";
|
---|
264 |
|
---|
265 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
266 | return true;
|
---|
267 | }
|
---|
268 |
|
---|
269 |
|
---|
270 | text_tmap dynamicclassifieraction::parse_classifier_options (text_t classifier_specification, cgiargsclass &args)
|
---|
271 | {
|
---|
272 | text_tmap classifier_options;
|
---|
273 |
|
---|
274 | // Default values
|
---|
275 | classifier_options["-split_using_hierarchy_separator"] = "|";
|
---|
276 |
|
---|
277 | // Split the classifier specification string by spaces
|
---|
278 | text_tlist classifier_specification_parts;
|
---|
279 | splitchar (classifier_specification.begin(), classifier_specification.end(), ' ', classifier_specification_parts);
|
---|
280 |
|
---|
281 | // The metadata element(s) to classify by should be the first value
|
---|
282 | classifier_options["metadata_elements"] = classifier_specification_parts.front();
|
---|
283 | classifier_specification_parts.pop_front();
|
---|
284 |
|
---|
285 | // Parse options from the remainder of the classifier specification
|
---|
286 | while (!classifier_specification_parts.empty())
|
---|
287 | {
|
---|
288 | // Parse the option name
|
---|
289 | text_t classifier_option_name = classifier_specification_parts.front();
|
---|
290 | classifier_specification_parts.pop_front();
|
---|
291 |
|
---|
292 | // Check if the option has a value (it may just be a flag, in which case we use "1" as the value)
|
---|
293 | text_t classifier_option_value = "1";
|
---|
294 | if (!classifier_specification_parts.empty() && !starts_with(classifier_specification_parts.front(), "-"))
|
---|
295 | {
|
---|
296 | classifier_option_value = classifier_specification_parts.front();
|
---|
297 | classifier_specification_parts.pop_front();
|
---|
298 | }
|
---|
299 |
|
---|
300 | // Record the option
|
---|
301 | classifier_options[classifier_option_name] = classifier_option_value;
|
---|
302 | }
|
---|
303 |
|
---|
304 | return classifier_options;
|
---|
305 | }
|
---|
306 |
|
---|
307 |
|
---|
308 | text_t dynamicclassifieraction::output_hlist_classifier_nodes (text_t parent_classifier_node_OID,
|
---|
309 | text_t metadata_value_filter,
|
---|
310 | text_t metadata_value_grouping_expression,
|
---|
311 | text_tmap classifier_options, cgiargsclass &args,
|
---|
312 | recptproto *collectproto, browsermapclass *browsers,
|
---|
313 | displayclass &disp, outconvertclass &outconvert,
|
---|
314 | ostream &textout, ostream &logout)
|
---|
315 | {
|
---|
316 | // Get all the metadata values for the specified element(s) that match the filter
|
---|
317 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
318 | FilterResponse_t metadata_values_response;
|
---|
319 | bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, metadata_value_grouping_expression, args["c"], collectproto, metadata_values_response, logout);
|
---|
320 |
|
---|
321 | // If the request failed then it's probably because the collection isn't using an SQL infodbtype
|
---|
322 | if (request_success == false)
|
---|
323 | {
|
---|
324 | textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
|
---|
325 | return "";
|
---|
326 | }
|
---|
327 |
|
---|
328 | // Check some metadata values were returned
|
---|
329 | if (metadata_values_response.docInfo.empty())
|
---|
330 | {
|
---|
331 | return "";
|
---|
332 | }
|
---|
333 |
|
---|
334 | // After processing any hierarchical metadata values we're left with the hlist classifer nodes
|
---|
335 | map<text_t, int, lttext_t> hlist_classifier_nodes;
|
---|
336 | ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
|
---|
337 | while (metadata_value_iterator != metadata_values_response.docInfo.end())
|
---|
338 | {
|
---|
339 | text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options);
|
---|
340 |
|
---|
341 | // Assume for now we're always at the top
|
---|
342 | text_t hlist_metadata_value = hierarchical_metadata_value;
|
---|
343 |
|
---|
344 | // Determine the label for the hlist classifier node from the metadata value
|
---|
345 | text_tlist hlist_metadata_value_parts;
|
---|
346 | text_t hlist_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
347 | splitword (hlist_metadata_value.begin(), hlist_metadata_value.end(), hlist_node_separator, hlist_metadata_value_parts);
|
---|
348 | text_t hlist_classifier_node_label = hlist_metadata_value_parts.front();
|
---|
349 |
|
---|
350 | // Create a node for this value if we haven't seen it before
|
---|
351 | if (hlist_classifier_nodes.find(hlist_classifier_node_label) == hlist_classifier_nodes.end())
|
---|
352 | {
|
---|
353 | hlist_classifier_nodes[hlist_classifier_node_label] = 0;
|
---|
354 | }
|
---|
355 |
|
---|
356 | // Increment the occurrence count
|
---|
357 | hlist_classifier_nodes[hlist_classifier_node_label] += (*metadata_value_iterator).result_num;
|
---|
358 |
|
---|
359 | metadata_value_iterator++;
|
---|
360 | }
|
---|
361 |
|
---|
362 | // Add the necessary metadata to the hlist classifier nodes
|
---|
363 | text_t selected_hlist_node_OID = "";
|
---|
364 | FilterResponse_t hlist_classifier_nodes_response;
|
---|
365 | map<text_t, int, lttext_t>::iterator hlist_classifier_nodes_iterator = hlist_classifier_nodes.begin();
|
---|
366 | while (hlist_classifier_nodes_iterator != hlist_classifier_nodes.end())
|
---|
367 | {
|
---|
368 | text_t hlist_classifier_node_OID = parent_classifier_node_OID + (*hlist_classifier_nodes_iterator).first;
|
---|
369 |
|
---|
370 | // Is this the hlist node that is currently selected?
|
---|
371 | if (starts_with (args["dcn"], hlist_classifier_node_OID))
|
---|
372 | {
|
---|
373 | selected_hlist_node_OID = hlist_classifier_node_OID;
|
---|
374 | }
|
---|
375 |
|
---|
376 | // Add the necessary metadata required to display the hlist nodes correctly
|
---|
377 | ResultDocInfo_t hlist_classifier_node;
|
---|
378 | hlist_classifier_node.OID = hlist_classifier_node_OID;
|
---|
379 | hlist_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
380 | hlist_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
381 | hlist_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests
|
---|
382 | hlist_classifier_node.metadata["Title"].values.push_back ((*hlist_classifier_nodes_iterator).first);
|
---|
383 | hlist_classifier_nodes_response.docInfo.push_back (hlist_classifier_node);
|
---|
384 |
|
---|
385 | hlist_classifier_nodes_iterator++;
|
---|
386 | }
|
---|
387 |
|
---|
388 | // Automatically select the first hlist node if necessary
|
---|
389 | if (selected_hlist_node_OID == "")
|
---|
390 | {
|
---|
391 | selected_hlist_node_OID = (*hlist_classifier_nodes_response.docInfo.begin()).OID;
|
---|
392 |
|
---|
393 | // Don't really like messing with this here, but it needs to be done before display_classifier_nodes() below
|
---|
394 | if (starts_with (parent_classifier_node_OID, args["dcn"]))
|
---|
395 | {
|
---|
396 | args["dcn"] = selected_hlist_node_OID;
|
---|
397 | }
|
---|
398 | }
|
---|
399 |
|
---|
400 | // Display the hlist nodes
|
---|
401 | display_classifier_nodes (hlist_classifier_nodes_response, "HList", 0, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
402 |
|
---|
403 | return selected_hlist_node_OID;
|
---|
404 | }
|
---|
405 |
|
---|
406 |
|
---|
407 | void dynamicclassifieraction::output_upper_classifier_nodes (text_t root_classifier_node_OID,
|
---|
408 | text_tlist upper_classifier_node_labels,
|
---|
409 | int& classifier_node_indent,
|
---|
410 | text_tmap classifier_options, cgiargsclass &args,
|
---|
411 | recptproto *collectproto, browsermapclass *browsers,
|
---|
412 | displayclass &disp, outconvertclass &outconvert,
|
---|
413 | ostream &textout, ostream &logout)
|
---|
414 | {
|
---|
415 | // Display the upper classifier nodes
|
---|
416 | text_t upper_classifier_node_OID = root_classifier_node_OID;
|
---|
417 | text_tlist::iterator upper_classifier_node_labels_iterator = upper_classifier_node_labels.begin();
|
---|
418 | while (upper_classifier_node_labels_iterator != upper_classifier_node_labels.end())
|
---|
419 | {
|
---|
420 | upper_classifier_node_OID += *upper_classifier_node_labels_iterator;
|
---|
421 |
|
---|
422 | ResultDocInfo_t upper_classifier_node;
|
---|
423 | upper_classifier_node.OID = upper_classifier_node_OID;
|
---|
424 | upper_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
425 | upper_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
426 | upper_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests
|
---|
427 | upper_classifier_node.metadata["Title"].values.push_back (*upper_classifier_node_labels_iterator);
|
---|
428 |
|
---|
429 | FilterResponse_t upper_classifier_node_response;
|
---|
430 | upper_classifier_node_response.docInfo.push_back(upper_classifier_node);
|
---|
431 | display_classifier_nodes (upper_classifier_node_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
432 | classifier_node_indent++;
|
---|
433 |
|
---|
434 | upper_classifier_node_OID += classifier_options["-split_using_hierarchy_separator"];
|
---|
435 | upper_classifier_node_labels_iterator++;
|
---|
436 | }
|
---|
437 | }
|
---|
438 |
|
---|
439 |
|
---|
440 | void dynamicclassifieraction::output_child_classifier_nodes (text_t classifier_node_OID,
|
---|
441 | text_t classifier_node_metadata_value,
|
---|
442 | text_t metadata_value_filter,
|
---|
443 | int& classifier_node_indent,
|
---|
444 | text_tmap classifier_options, cgiargsclass &args,
|
---|
445 | recptproto *collectproto, browsermapclass *browsers,
|
---|
446 | displayclass &disp, outconvertclass &outconvert,
|
---|
447 | ostream &textout, ostream &logout)
|
---|
448 | {
|
---|
449 | // Get all the metadata values for the specified element(s) that match the filter
|
---|
450 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
451 | FilterResponse_t metadata_values_response;
|
---|
452 | bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, "", args["c"], collectproto, metadata_values_response, logout);
|
---|
453 |
|
---|
454 | // If the request failed then it's probably because the collection isn't using an SQL infodbtype
|
---|
455 | if (request_success == false)
|
---|
456 | {
|
---|
457 | textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
|
---|
458 | return;
|
---|
459 | }
|
---|
460 |
|
---|
461 | // After processing any hierarchical metadata values we're left with the child classifer nodes
|
---|
462 | map<text_t, int, lttext_t> child_classifier_nodes;
|
---|
463 | ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
|
---|
464 | while (metadata_value_iterator != metadata_values_response.docInfo.end())
|
---|
465 | {
|
---|
466 | text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options);
|
---|
467 | text_t classifier_node_hierarchical_metadata_value = split_metadata_value (classifier_node_metadata_value, classifier_options);
|
---|
468 |
|
---|
469 | // We need to remove the current position from the metadata value to leave the child metadata value
|
---|
470 | text_t child_metadata_value = hierarchical_metadata_value;
|
---|
471 | text_t child_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
472 | if (starts_with(hierarchical_metadata_value, classifier_node_hierarchical_metadata_value + child_node_separator))
|
---|
473 | {
|
---|
474 | child_metadata_value = substr(hierarchical_metadata_value.begin() + (classifier_node_hierarchical_metadata_value + child_node_separator).size(), hierarchical_metadata_value.end());
|
---|
475 | }
|
---|
476 |
|
---|
477 | // Determine the label for the child classifier node from the metadata value
|
---|
478 | text_tlist child_metadata_value_parts;
|
---|
479 | splitword (child_metadata_value.begin(), child_metadata_value.end(), child_node_separator, child_metadata_value_parts);
|
---|
480 | text_t child_classifier_node_label = child_metadata_value_parts.front();
|
---|
481 |
|
---|
482 | // Create a node for this value if we haven't seen it before
|
---|
483 | if (child_classifier_nodes.find(child_classifier_node_label) == child_classifier_nodes.end())
|
---|
484 | {
|
---|
485 | child_classifier_nodes[child_classifier_node_label] = 0;
|
---|
486 | }
|
---|
487 |
|
---|
488 | // Increment the occurrence count
|
---|
489 | child_classifier_nodes[child_classifier_node_label] += (*metadata_value_iterator).result_num;
|
---|
490 |
|
---|
491 | metadata_value_iterator++;
|
---|
492 | }
|
---|
493 |
|
---|
494 | // Add the necessary metadata to the child classifier nodes
|
---|
495 | FilterResponse_t child_classifier_nodes_response;
|
---|
496 | map<text_t, int, lttext_t>::iterator child_classifier_nodes_iterator = child_classifier_nodes.begin();
|
---|
497 | while (child_classifier_nodes_iterator != child_classifier_nodes.end())
|
---|
498 | {
|
---|
499 | text_t child_classifier_node_OID = classifier_node_OID + (*child_classifier_nodes_iterator).first;
|
---|
500 |
|
---|
501 | ResultDocInfo_t child_classifier_node;
|
---|
502 | child_classifier_node.OID = child_classifier_node_OID;
|
---|
503 | child_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
504 | child_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
505 | child_classifier_node.metadata["numleafdocs"].values.push_back ((*child_classifier_nodes_iterator).second);
|
---|
506 | child_classifier_node.metadata["Title"].values.push_back ((*child_classifier_nodes_iterator).first);
|
---|
507 | child_classifier_nodes_response.docInfo.push_back (child_classifier_node);
|
---|
508 |
|
---|
509 | child_classifier_nodes_iterator++;
|
---|
510 | }
|
---|
511 |
|
---|
512 | // Display the child classifier nodes
|
---|
513 | display_classifier_nodes (child_classifier_nodes_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
514 | }
|
---|
515 |
|
---|
516 |
|
---|
517 | text_t dynamicclassifieraction::split_metadata_value (text_t metadata_value, text_tmap classifier_options)
|
---|
518 | {
|
---|
519 | text_t hierarchical_metadata_value = metadata_value;
|
---|
520 | text_t hierarchy_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
521 |
|
---|
522 | // Add hierarchy separators into Greenstone date values (e.g. YYYYMMDD -> YYYY|MM|DD)
|
---|
523 | if (classifier_options["-split_greenstone_dates"] == "1")
|
---|
524 | {
|
---|
525 | if (metadata_value.size() == 4 || metadata_value.size() == 6 || metadata_value.size() == 8)
|
---|
526 | {
|
---|
527 | // Add year
|
---|
528 | hierarchical_metadata_value = substr (metadata_value.begin(), metadata_value.begin() + 4);
|
---|
529 | if (metadata_value.size() == 6 || metadata_value.size() == 8)
|
---|
530 | {
|
---|
531 | // Add month
|
---|
532 | hierarchical_metadata_value += hierarchy_separator;
|
---|
533 | hierarchical_metadata_value += substr (metadata_value.begin() + 4, metadata_value.begin() + 6);
|
---|
534 | if (metadata_value.size() == 8)
|
---|
535 | {
|
---|
536 | // Add day
|
---|
537 | hierarchical_metadata_value += hierarchy_separator;
|
---|
538 | hierarchical_metadata_value += substr (metadata_value.begin() + 6, metadata_value.begin() + 8);
|
---|
539 | }
|
---|
540 | }
|
---|
541 | }
|
---|
542 | }
|
---|
543 |
|
---|
544 | return hierarchical_metadata_value;
|
---|
545 | }
|
---|
546 |
|
---|
547 |
|
---|
548 | void dynamicclassifieraction::display_classifier_nodes (FilterResponse_t classifier_nodes_response,
|
---|
549 | text_t classifier_nodes_type,
|
---|
550 | int classifier_nodes_indent,
|
---|
551 | cgiargsclass &args, recptproto *collectproto,
|
---|
552 | browsermapclass *browsers, displayclass &disp,
|
---|
553 | outconvertclass &outconvert, ostream &textout,
|
---|
554 | ostream &logout)
|
---|
555 | {
|
---|
556 | // Check there are some classifier nodes to display
|
---|
557 | if (classifier_nodes_response.docInfo.empty()) return;
|
---|
558 |
|
---|
559 | // Get the format statement for this classifier if there is one, or use the browser's default otherwise
|
---|
560 | text_t formatstring;
|
---|
561 | browserclass *bptr = browsers->getbrowser (classifier_nodes_type);
|
---|
562 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
563 | if (!get_formatstring (args["dcl"], classifier_nodes_type, cinfo->format, formatstring))
|
---|
564 | {
|
---|
565 | formatstring = bptr->get_default_formatstring();
|
---|
566 | }
|
---|
567 | format_t *formatlistptr = new format_t();
|
---|
568 | text_tset metadata;
|
---|
569 | bool getParents = false;
|
---|
570 | parse_formatstring (formatstring, formatlistptr, metadata, getParents);
|
---|
571 | bool use_table = is_table_content (formatlistptr);
|
---|
572 |
|
---|
573 | // Display the classifier nodes
|
---|
574 | bptr->output_section_group (classifier_nodes_response, args, args["c"], classifier_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
|
---|
575 | }
|
---|
576 |
|
---|
577 |
|
---|
578 | void dynamicclassifieraction::display_document_nodes (FilterResponse_t documents_response,
|
---|
579 | int document_nodes_indent,
|
---|
580 | cgiargsclass &args, recptproto *collectproto,
|
---|
581 | browsermapclass *browsers, displayclass &disp,
|
---|
582 | outconvertclass &outconvert, ostream &textout,
|
---|
583 | ostream &logout)
|
---|
584 | {
|
---|
585 | // Check there are some documents to display
|
---|
586 | if (documents_response.docInfo.empty()) return;
|
---|
587 |
|
---|
588 | // Get the format statement for the document nodes if there is one, or use the browser's default otherwise
|
---|
589 | text_t formatstring;
|
---|
590 | browserclass *bptr = browsers->getbrowser ("VList");
|
---|
591 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
592 | if (!get_formatstring (args["dcl"], "DocumentNodes", cinfo->format, formatstring))
|
---|
593 | {
|
---|
594 | if (!get_formatstring (args["dcl"], "VList", cinfo->format, formatstring))
|
---|
595 | {
|
---|
596 | formatstring = bptr->get_default_formatstring();
|
---|
597 | }
|
---|
598 | }
|
---|
599 | format_t *formatlistptr = new format_t();
|
---|
600 | text_tset metadata;
|
---|
601 | bool getParents = false;
|
---|
602 | parse_formatstring (formatstring, formatlistptr, metadata, getParents);
|
---|
603 | bool use_table = is_table_content (formatlistptr);
|
---|
604 |
|
---|
605 | // Request the necessary metadata for displaying the documents
|
---|
606 | text_tarray document_OIDs;
|
---|
607 | ResultDocInfo_tarray::iterator document_iterator = documents_response.docInfo.begin();
|
---|
608 | while (document_iterator != documents_response.docInfo.end())
|
---|
609 | {
|
---|
610 | document_OIDs.push_back ((*document_iterator).OID);
|
---|
611 | document_iterator++;
|
---|
612 | }
|
---|
613 | FilterResponse_t document_nodes_response;
|
---|
614 | get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_nodes_response, logout);
|
---|
615 |
|
---|
616 | // Display the document nodes
|
---|
617 | bptr->output_section_group (document_nodes_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
|
---|
618 | }
|
---|