1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * dynamicclassifieraction.cpp --
|
---|
4 | * Copyright (C) 2008 DL Consulting Ltd
|
---|
5 | *
|
---|
6 | * By Michael Dewsnip
|
---|
7 | * Please do not edit this file without checking with Michael first!
|
---|
8 | *
|
---|
9 | * A component of the Greenstone digital library software
|
---|
10 | * from the New Zealand Digital Library Project at the
|
---|
11 | * University of Waikato, New Zealand.
|
---|
12 | *
|
---|
13 | * This program is free software; you can redistribute it and/or modify
|
---|
14 | * it under the terms of the GNU General Public License as published by
|
---|
15 | * the Free Software Foundation; either version 2 of the License, or
|
---|
16 | * (at your option) any later version.
|
---|
17 | *
|
---|
18 | * This program is distributed in the hope that it will be useful,
|
---|
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
21 | * GNU General Public License for more details.
|
---|
22 | *
|
---|
23 | * You should have received a copy of the GNU General Public License
|
---|
24 | * along with this program; if not, write to the Free Software
|
---|
25 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
26 | *
|
---|
27 | *********************************************************************/
|
---|
28 |
|
---|
29 | #include "dynamicclassifieraction.h"
|
---|
30 | #include "recptprototools.h"
|
---|
31 |
|
---|
32 |
|
---|
33 | dynamicclassifieraction::dynamicclassifieraction ()
|
---|
34 | {
|
---|
35 | recpt = NULL;
|
---|
36 |
|
---|
37 | cgiarginfo arg_ainfo;
|
---|
38 | arg_ainfo.shortname = "dcl";
|
---|
39 | arg_ainfo.longname = "dynamic classifier ID";
|
---|
40 | arg_ainfo.multiplechar = true;
|
---|
41 | arg_ainfo.multiplevalue = false;
|
---|
42 | arg_ainfo.defaultstatus = cgiarginfo::weak;
|
---|
43 | arg_ainfo.argdefault = "";
|
---|
44 | arg_ainfo.savedarginfo = cgiarginfo::must;
|
---|
45 | argsinfo.addarginfo (NULL, arg_ainfo);
|
---|
46 |
|
---|
47 | arg_ainfo.shortname = "dcn";
|
---|
48 | arg_ainfo.longname = "dynamic classifier node";
|
---|
49 | arg_ainfo.multiplechar = true;
|
---|
50 | arg_ainfo.multiplevalue = false;
|
---|
51 | arg_ainfo.defaultstatus = cgiarginfo::weak;
|
---|
52 | arg_ainfo.argdefault = "";
|
---|
53 | arg_ainfo.savedarginfo = cgiarginfo::must;
|
---|
54 | argsinfo.addarginfo (NULL, arg_ainfo);
|
---|
55 | }
|
---|
56 |
|
---|
57 |
|
---|
58 | dynamicclassifieraction::~dynamicclassifieraction()
|
---|
59 | {
|
---|
60 | }
|
---|
61 |
|
---|
62 |
|
---|
63 | bool dynamicclassifieraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
|
---|
64 | recptprotolistclass *protos, ostream &logout)
|
---|
65 | {
|
---|
66 | return true;
|
---|
67 | }
|
---|
68 |
|
---|
69 |
|
---|
70 | void dynamicclassifieraction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos,
|
---|
71 | response_t &response,text_t &response_data,
|
---|
72 | ostream &logout)
|
---|
73 | {
|
---|
74 | response = content;
|
---|
75 | response_data = "text/html";
|
---|
76 | }
|
---|
77 |
|
---|
78 |
|
---|
79 | void dynamicclassifieraction::define_external_macros (displayclass &disp, cgiargsclass &args,
|
---|
80 | recptprotolistclass *protos, ostream &logout)
|
---|
81 | {
|
---|
82 | // Some pages (e.g. the library home page) are not collection-specific
|
---|
83 | if (args["c"].empty())
|
---|
84 | {
|
---|
85 | return;
|
---|
86 | }
|
---|
87 |
|
---|
88 | // A valid collection server is vital
|
---|
89 | recptproto *collectproto = protos->getrecptproto (args["c"], logout);
|
---|
90 | if (collectproto == NULL)
|
---|
91 | {
|
---|
92 | logout << "dynamicclassifieraction::define_external_macros called with NULL collectproto\n";
|
---|
93 | return;
|
---|
94 | }
|
---|
95 |
|
---|
96 | // Define _dynamicclassifiernavbarentries_ to add buttons to the navigation bar for the dynamic classifiers
|
---|
97 | text_t navigation_bar_entries = "";
|
---|
98 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
99 | text_tmap::iterator dynamic_classifier_iterator = cinfo->dynamic_classifiers.begin();
|
---|
100 | while (dynamic_classifier_iterator != cinfo->dynamic_classifiers.end())
|
---|
101 | {
|
---|
102 | text_t dynamic_classifier_id = (*dynamic_classifier_iterator).first;
|
---|
103 | navigation_bar_entries += "_navbarspacer_";
|
---|
104 | navigation_bar_entries += "_navtab_(_gwcgi_?c=" + args["c"] + "&a=dc&dcl=" + dynamic_classifier_id + "," + dynamic_classifier_id;
|
---|
105 | if (args["a"] == "dc" && args["dcl"] == dynamic_classifier_id)
|
---|
106 | {
|
---|
107 | navigation_bar_entries += ",selected";
|
---|
108 | }
|
---|
109 | navigation_bar_entries += ")";
|
---|
110 | dynamic_classifier_iterator++;
|
---|
111 | }
|
---|
112 |
|
---|
113 | disp.setmacro("dynamicclassifiernavbarentries", displayclass::defaultpackage, navigation_bar_entries);
|
---|
114 | }
|
---|
115 |
|
---|
116 |
|
---|
117 | void dynamicclassifieraction::define_internal_macros (displayclass &disp, cgiargsclass &args,
|
---|
118 | recptprotolistclass *protos, ostream &logout)
|
---|
119 | {
|
---|
120 | // No internal macros set
|
---|
121 | }
|
---|
122 |
|
---|
123 |
|
---|
124 | bool dynamicclassifieraction::do_action(cgiargsclass &args, recptprotolistclass *protos,
|
---|
125 | browsermapclass *browsers, displayclass &disp,
|
---|
126 | outconvertclass &outconvert, ostream &textout,
|
---|
127 | ostream &logout)
|
---|
128 | {
|
---|
129 | // A valid collection server is vital
|
---|
130 | recptproto *collectproto = protos->getrecptproto (args["c"], logout);
|
---|
131 | if (collectproto == NULL)
|
---|
132 | {
|
---|
133 | logout << "dynamicclassifieraction::do_action called with NULL collectproto\n";
|
---|
134 | return false;
|
---|
135 | }
|
---|
136 |
|
---|
137 | textout << outconvert << disp << "_dynamicclassifier:header_\n";
|
---|
138 | textout << outconvert << disp << "_dynamicclassifier:content_\n";
|
---|
139 |
|
---|
140 | // Check a dynamic classifier ID has been specified
|
---|
141 | text_t arg_dcl = args["dcl"];
|
---|
142 | if (arg_dcl.empty())
|
---|
143 | {
|
---|
144 | textout << outconvert << disp << "Error: Missing dcl argument.\n";
|
---|
145 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
146 | return true;
|
---|
147 | }
|
---|
148 |
|
---|
149 | // Check the dynamic classifier ID is valid (ie. there is an entry in the collect.cfg file for it)
|
---|
150 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
151 | if (cinfo->dynamic_classifiers.find(arg_dcl) == cinfo->dynamic_classifiers.end())
|
---|
152 | {
|
---|
153 | textout << outconvert << disp << "Error: Invalid dcl value \"" << encodeForHTML(arg_dcl) << "\".\n";
|
---|
154 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
155 | return true;
|
---|
156 | }
|
---|
157 |
|
---|
158 | // Parse the classifier options from the specification
|
---|
159 | text_t classifier_specification = cinfo->dynamic_classifiers[arg_dcl];
|
---|
160 | text_tmap classifier_options = parse_classifier_options (classifier_specification, args);
|
---|
161 |
|
---|
162 | // Output the "<ID>Header" format statement if there is one
|
---|
163 | text_t classifier_header_format_statement = "";
|
---|
164 | get_formatstring (arg_dcl + "Header", cinfo->format, classifier_header_format_statement);
|
---|
165 | textout << outconvert << disp << classifier_header_format_statement << "\n";
|
---|
166 |
|
---|
167 | // Resolve any ".pr" bits at the end of the "dcn" argument
|
---|
168 | text_t classifier_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
169 | if (ends_with (args["dcn"], ".pr"))
|
---|
170 | {
|
---|
171 | // Change the "dcn" argument to be the OID of the parent of the specified classifier node
|
---|
172 | text_tlist args_dcn_parts;
|
---|
173 | splitword (args["dcn"].begin(), args["dcn"].end(), classifier_node_separator, args_dcn_parts);
|
---|
174 | args_dcn_parts.pop_back(); // Remove the last part
|
---|
175 | joinchar (args_dcn_parts, classifier_node_separator, args["dcn"]);
|
---|
176 | }
|
---|
177 |
|
---|
178 | // Prepare to output the dynamic classifier
|
---|
179 | text_t current_classifier_node_OID = "";
|
---|
180 | text_t current_metadata_value_filter = "";
|
---|
181 | int classifier_node_indent = 0;
|
---|
182 |
|
---|
183 | // (Optional) Output an hlist to group the classifier nodes into buckets based on the first character
|
---|
184 | if (classifier_options["-group_by_first_character"] == "1")
|
---|
185 | {
|
---|
186 | text_t selected_grouping_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "substr(value,1,1)", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
187 |
|
---|
188 | current_classifier_node_OID = selected_grouping_node_OID + ".";
|
---|
189 | current_metadata_value_filter = selected_grouping_node_OID + "*";
|
---|
190 | }
|
---|
191 |
|
---|
192 | // (Optional) Output an hlist instead of a vlist at the top level
|
---|
193 | if (classifier_options["-use_hlist_at_top"] == "1")
|
---|
194 | {
|
---|
195 | text_t selected_hlist_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
196 |
|
---|
197 | current_classifier_node_OID = selected_hlist_node_OID + classifier_node_separator;
|
---|
198 | }
|
---|
199 |
|
---|
200 | // args["dcn"] may have been modified by output_hlist_classifier_nodes() above
|
---|
201 | text_t classifier_node_OID = args["dcn"];
|
---|
202 |
|
---|
203 | // If grouping is enabled remove the grouping node bit from the start of the OID
|
---|
204 | text_t classifier_node_OID_sans_grouping = classifier_node_OID;
|
---|
205 | if (classifier_options["-group_by_first_character"] == "1")
|
---|
206 | {
|
---|
207 | text_t::iterator grouping_node_separator = findchar (classifier_node_OID.begin(), classifier_node_OID.end(), '.');
|
---|
208 | if (grouping_node_separator != classifier_node_OID.end())
|
---|
209 | {
|
---|
210 | classifier_node_OID_sans_grouping = substr (grouping_node_separator + 1, classifier_node_OID.end());
|
---|
211 | }
|
---|
212 | else
|
---|
213 | {
|
---|
214 | classifier_node_OID_sans_grouping = "";
|
---|
215 | }
|
---|
216 | }
|
---|
217 |
|
---|
218 | // Split the classifier node OID into its hierarchical parts, then remove any we've already dealt with (HLists)
|
---|
219 | text_tlist classifier_node_OID_parts_remaining;
|
---|
220 | splitword (classifier_node_OID_sans_grouping.begin(), classifier_node_OID_sans_grouping.end(), classifier_node_separator, classifier_node_OID_parts_remaining);
|
---|
221 | if (classifier_options["-use_hlist_at_top"] == "1")
|
---|
222 | {
|
---|
223 | classifier_node_OID_parts_remaining.pop_front();
|
---|
224 | }
|
---|
225 |
|
---|
226 | text_t classifier_node_metadata_value = classifier_node_OID_sans_grouping;
|
---|
227 | text_t classifier_node_metadata_value_filter = classifier_node_metadata_value + classifier_node_separator + "*";
|
---|
228 |
|
---|
229 | // If "-split_greenstone_dates" is on the metadata value and filter doesn't quite match the OID -- fix this
|
---|
230 | if (classifier_options["-split_greenstone_dates"] == "1")
|
---|
231 | {
|
---|
232 | classifier_node_metadata_value.replace (classifier_node_separator, "");
|
---|
233 | classifier_node_metadata_value_filter = classifier_node_metadata_value;
|
---|
234 | if (classifier_node_metadata_value_filter.size() == 8)
|
---|
235 | {
|
---|
236 | classifier_node_metadata_value_filter += classifier_node_separator;
|
---|
237 | }
|
---|
238 | classifier_node_metadata_value_filter += "*";
|
---|
239 | }
|
---|
240 |
|
---|
241 | // Simple case at the top level: just output the child classifier nodes
|
---|
242 | if (classifier_node_metadata_value == "")
|
---|
243 | {
|
---|
244 | output_child_classifier_nodes (current_classifier_node_OID, "", current_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
245 | }
|
---|
246 |
|
---|
247 | // More complex case below the top level
|
---|
248 | else
|
---|
249 | {
|
---|
250 | // Output the parent classifier nodes and the current classifier node
|
---|
251 | output_upper_classifier_nodes (current_classifier_node_OID, classifier_node_OID_parts_remaining, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
252 |
|
---|
253 | // Output the child classifier nodes
|
---|
254 | current_classifier_node_OID = classifier_node_OID + classifier_node_separator;
|
---|
255 | output_child_classifier_nodes (current_classifier_node_OID, classifier_node_metadata_value, classifier_node_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
256 |
|
---|
257 | // Get the document nodes at this level
|
---|
258 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
259 | text_t sort_documents_by = classifier_options["-sort_documents_by"];
|
---|
260 | FilterResponse_t documents_response;
|
---|
261 | get_documents_with_metadata_value (metadata_elements, classifier_node_metadata_value, sort_documents_by, args["c"], collectproto, documents_response, logout);
|
---|
262 |
|
---|
263 | // Display the document nodes
|
---|
264 | display_document_nodes (documents_response, classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
265 | }
|
---|
266 |
|
---|
267 | // Output the "<ID>Footer" format statement if there is one
|
---|
268 | text_t classifier_footer_format_statement = "";
|
---|
269 | get_formatstring (arg_dcl + "Footer", cinfo->format, classifier_footer_format_statement);
|
---|
270 | textout << outconvert << disp << classifier_footer_format_statement << "\n";
|
---|
271 |
|
---|
272 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
273 | return true;
|
---|
274 | }
|
---|
275 |
|
---|
276 |
|
---|
277 | text_tmap dynamicclassifieraction::parse_classifier_options (text_t classifier_specification, cgiargsclass &args)
|
---|
278 | {
|
---|
279 | text_tmap classifier_options;
|
---|
280 |
|
---|
281 | // Default values
|
---|
282 | classifier_options["-split_using_hierarchy_separator"] = "|";
|
---|
283 |
|
---|
284 | // Split the classifier specification string by spaces
|
---|
285 | text_tlist classifier_specification_parts;
|
---|
286 | splitchar (classifier_specification.begin(), classifier_specification.end(), ' ', classifier_specification_parts);
|
---|
287 |
|
---|
288 | // The metadata element(s) to classify by should be the first value
|
---|
289 | classifier_options["metadata_elements"] = classifier_specification_parts.front();
|
---|
290 | classifier_specification_parts.pop_front();
|
---|
291 |
|
---|
292 | // Parse options from the remainder of the classifier specification
|
---|
293 | while (!classifier_specification_parts.empty())
|
---|
294 | {
|
---|
295 | // Parse the option name
|
---|
296 | text_t classifier_option_name = classifier_specification_parts.front();
|
---|
297 | classifier_specification_parts.pop_front();
|
---|
298 |
|
---|
299 | // Check if the option has a value (it may just be a flag, in which case we use "1" as the value)
|
---|
300 | text_t classifier_option_value = "1";
|
---|
301 | if (!classifier_specification_parts.empty() && !starts_with(classifier_specification_parts.front(), "-"))
|
---|
302 | {
|
---|
303 | classifier_option_value = classifier_specification_parts.front();
|
---|
304 | classifier_specification_parts.pop_front();
|
---|
305 | }
|
---|
306 |
|
---|
307 | // Record the option
|
---|
308 | classifier_options[classifier_option_name] = classifier_option_value;
|
---|
309 | }
|
---|
310 |
|
---|
311 | return classifier_options;
|
---|
312 | }
|
---|
313 |
|
---|
314 |
|
---|
315 | text_t dynamicclassifieraction::output_hlist_classifier_nodes (text_t parent_classifier_node_OID,
|
---|
316 | text_t metadata_value_filter,
|
---|
317 | text_t metadata_value_grouping_expression,
|
---|
318 | text_tmap classifier_options, cgiargsclass &args,
|
---|
319 | recptproto *collectproto, browsermapclass *browsers,
|
---|
320 | displayclass &disp, outconvertclass &outconvert,
|
---|
321 | ostream &textout, ostream &logout)
|
---|
322 | {
|
---|
323 | // Get all the metadata values for the specified element(s) that match the filter
|
---|
324 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
325 | FilterResponse_t metadata_values_response;
|
---|
326 | bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, metadata_value_grouping_expression, args["c"], collectproto, metadata_values_response, logout);
|
---|
327 |
|
---|
328 | // If the request failed then it's probably because the collection isn't using an SQL infodbtype
|
---|
329 | if (request_success == false)
|
---|
330 | {
|
---|
331 | textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
|
---|
332 | return "";
|
---|
333 | }
|
---|
334 |
|
---|
335 | // Check some metadata values were returned
|
---|
336 | if (metadata_values_response.docInfo.empty())
|
---|
337 | {
|
---|
338 | return "";
|
---|
339 | }
|
---|
340 |
|
---|
341 | // After processing any hierarchical metadata values we're left with the hlist classifer nodes
|
---|
342 | map<text_t, int, lttext_t> hlist_classifier_nodes;
|
---|
343 | ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
|
---|
344 | while (metadata_value_iterator != metadata_values_response.docInfo.end())
|
---|
345 | {
|
---|
346 | text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options);
|
---|
347 |
|
---|
348 | // Assume for now we're always at the top
|
---|
349 | text_t hlist_metadata_value = hierarchical_metadata_value;
|
---|
350 |
|
---|
351 | // Determine the label for the hlist classifier node from the metadata value
|
---|
352 | text_tlist hlist_metadata_value_parts;
|
---|
353 | text_t hlist_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
354 | splitword (hlist_metadata_value.begin(), hlist_metadata_value.end(), hlist_node_separator, hlist_metadata_value_parts);
|
---|
355 | text_t hlist_classifier_node_label = hlist_metadata_value_parts.front();
|
---|
356 |
|
---|
357 | // Create a node for this value if we haven't seen it before
|
---|
358 | if (hlist_classifier_nodes.find(hlist_classifier_node_label) == hlist_classifier_nodes.end())
|
---|
359 | {
|
---|
360 | hlist_classifier_nodes[hlist_classifier_node_label] = 0;
|
---|
361 | }
|
---|
362 |
|
---|
363 | // Increment the occurrence count
|
---|
364 | hlist_classifier_nodes[hlist_classifier_node_label] += (*metadata_value_iterator).result_num;
|
---|
365 |
|
---|
366 | metadata_value_iterator++;
|
---|
367 | }
|
---|
368 |
|
---|
369 | // Add the necessary metadata to the hlist classifier nodes
|
---|
370 | text_t selected_hlist_node_OID = "";
|
---|
371 | FilterResponse_t hlist_classifier_nodes_response;
|
---|
372 | map<text_t, int, lttext_t>::iterator hlist_classifier_nodes_iterator = hlist_classifier_nodes.begin();
|
---|
373 | while (hlist_classifier_nodes_iterator != hlist_classifier_nodes.end())
|
---|
374 | {
|
---|
375 | text_t hlist_classifier_node_OID = parent_classifier_node_OID + (*hlist_classifier_nodes_iterator).first;
|
---|
376 |
|
---|
377 | // Is this the hlist node that is currently selected?
|
---|
378 | if (starts_with (args["dcn"], hlist_classifier_node_OID))
|
---|
379 | {
|
---|
380 | selected_hlist_node_OID = hlist_classifier_node_OID;
|
---|
381 | }
|
---|
382 |
|
---|
383 | // Add the necessary metadata required to display the hlist nodes correctly
|
---|
384 | ResultDocInfo_t hlist_classifier_node;
|
---|
385 | hlist_classifier_node.OID = hlist_classifier_node_OID;
|
---|
386 | hlist_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
387 | hlist_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
388 | hlist_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests
|
---|
389 | hlist_classifier_node.metadata["Title"].values.push_back ((*hlist_classifier_nodes_iterator).first);
|
---|
390 | hlist_classifier_nodes_response.docInfo.push_back (hlist_classifier_node);
|
---|
391 |
|
---|
392 | hlist_classifier_nodes_iterator++;
|
---|
393 | }
|
---|
394 |
|
---|
395 | // Automatically select the first hlist node if necessary
|
---|
396 | if (selected_hlist_node_OID == "")
|
---|
397 | {
|
---|
398 | selected_hlist_node_OID = (*hlist_classifier_nodes_response.docInfo.begin()).OID;
|
---|
399 |
|
---|
400 | // Don't really like messing with this here, but it needs to be done before display_classifier_nodes() below
|
---|
401 | if (starts_with (parent_classifier_node_OID, args["dcn"]))
|
---|
402 | {
|
---|
403 | args["dcn"] = selected_hlist_node_OID;
|
---|
404 | }
|
---|
405 | }
|
---|
406 |
|
---|
407 | // Display the hlist nodes
|
---|
408 | display_classifier_nodes (hlist_classifier_nodes_response, "HList", 0, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
409 |
|
---|
410 | return selected_hlist_node_OID;
|
---|
411 | }
|
---|
412 |
|
---|
413 |
|
---|
414 | void dynamicclassifieraction::output_upper_classifier_nodes (text_t root_classifier_node_OID,
|
---|
415 | text_tlist upper_classifier_node_labels,
|
---|
416 | int& classifier_node_indent,
|
---|
417 | text_tmap classifier_options, cgiargsclass &args,
|
---|
418 | recptproto *collectproto, browsermapclass *browsers,
|
---|
419 | displayclass &disp, outconvertclass &outconvert,
|
---|
420 | ostream &textout, ostream &logout)
|
---|
421 | {
|
---|
422 | // Display the upper classifier nodes
|
---|
423 | text_t upper_classifier_node_OID = root_classifier_node_OID;
|
---|
424 | text_tlist::iterator upper_classifier_node_labels_iterator = upper_classifier_node_labels.begin();
|
---|
425 | while (upper_classifier_node_labels_iterator != upper_classifier_node_labels.end())
|
---|
426 | {
|
---|
427 | upper_classifier_node_OID += *upper_classifier_node_labels_iterator;
|
---|
428 |
|
---|
429 | ResultDocInfo_t upper_classifier_node;
|
---|
430 | upper_classifier_node.OID = upper_classifier_node_OID;
|
---|
431 | upper_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
432 | upper_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
433 | upper_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests
|
---|
434 | upper_classifier_node.metadata["Title"].values.push_back (*upper_classifier_node_labels_iterator);
|
---|
435 |
|
---|
436 | FilterResponse_t upper_classifier_node_response;
|
---|
437 | upper_classifier_node_response.docInfo.push_back(upper_classifier_node);
|
---|
438 | display_classifier_nodes (upper_classifier_node_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
439 | classifier_node_indent++;
|
---|
440 |
|
---|
441 | upper_classifier_node_OID += classifier_options["-split_using_hierarchy_separator"];
|
---|
442 | upper_classifier_node_labels_iterator++;
|
---|
443 | }
|
---|
444 | }
|
---|
445 |
|
---|
446 |
|
---|
447 | void dynamicclassifieraction::output_child_classifier_nodes (text_t classifier_node_OID,
|
---|
448 | text_t classifier_node_metadata_value,
|
---|
449 | text_t metadata_value_filter,
|
---|
450 | int& classifier_node_indent,
|
---|
451 | text_tmap classifier_options, cgiargsclass &args,
|
---|
452 | recptproto *collectproto, browsermapclass *browsers,
|
---|
453 | displayclass &disp, outconvertclass &outconvert,
|
---|
454 | ostream &textout, ostream &logout)
|
---|
455 | {
|
---|
456 | // Get all the metadata values for the specified element(s) that match the filter
|
---|
457 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
458 | FilterResponse_t metadata_values_response;
|
---|
459 | bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, "", args["c"], collectproto, metadata_values_response, logout);
|
---|
460 |
|
---|
461 | // If the request failed then it's probably because the collection isn't using an SQL infodbtype
|
---|
462 | if (request_success == false)
|
---|
463 | {
|
---|
464 | textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
|
---|
465 | return;
|
---|
466 | }
|
---|
467 |
|
---|
468 | // After processing any hierarchical metadata values we're left with the child classifer nodes
|
---|
469 | map<text_t, int, lttext_t> child_classifier_nodes;
|
---|
470 | ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
|
---|
471 | while (metadata_value_iterator != metadata_values_response.docInfo.end())
|
---|
472 | {
|
---|
473 | text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options);
|
---|
474 | text_t classifier_node_hierarchical_metadata_value = split_metadata_value (classifier_node_metadata_value, classifier_options);
|
---|
475 |
|
---|
476 | // We need to remove the current position from the metadata value to leave the child metadata value
|
---|
477 | text_t child_metadata_value = hierarchical_metadata_value;
|
---|
478 | text_t child_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
479 | if (starts_with(hierarchical_metadata_value, classifier_node_hierarchical_metadata_value + child_node_separator))
|
---|
480 | {
|
---|
481 | child_metadata_value = substr(hierarchical_metadata_value.begin() + (classifier_node_hierarchical_metadata_value + child_node_separator).size(), hierarchical_metadata_value.end());
|
---|
482 | }
|
---|
483 |
|
---|
484 | // Determine the label for the child classifier node from the metadata value
|
---|
485 | text_tlist child_metadata_value_parts;
|
---|
486 | splitword (child_metadata_value.begin(), child_metadata_value.end(), child_node_separator, child_metadata_value_parts);
|
---|
487 | text_t child_classifier_node_label = child_metadata_value_parts.front();
|
---|
488 |
|
---|
489 | // Create a node for this value if we haven't seen it before
|
---|
490 | if (child_classifier_nodes.find(child_classifier_node_label) == child_classifier_nodes.end())
|
---|
491 | {
|
---|
492 | child_classifier_nodes[child_classifier_node_label] = 0;
|
---|
493 | }
|
---|
494 |
|
---|
495 | // Increment the occurrence count
|
---|
496 | child_classifier_nodes[child_classifier_node_label] += (*metadata_value_iterator).result_num;
|
---|
497 |
|
---|
498 | metadata_value_iterator++;
|
---|
499 | }
|
---|
500 |
|
---|
501 | // Add the necessary metadata to the child classifier nodes
|
---|
502 | FilterResponse_t child_classifier_nodes_response;
|
---|
503 | map<text_t, int, lttext_t>::iterator child_classifier_nodes_iterator = child_classifier_nodes.begin();
|
---|
504 | while (child_classifier_nodes_iterator != child_classifier_nodes.end())
|
---|
505 | {
|
---|
506 | text_t child_classifier_node_OID = classifier_node_OID + (*child_classifier_nodes_iterator).first;
|
---|
507 |
|
---|
508 | ResultDocInfo_t child_classifier_node;
|
---|
509 | child_classifier_node.OID = child_classifier_node_OID;
|
---|
510 | child_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
511 | child_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
512 | child_classifier_node.metadata["numleafdocs"].values.push_back ((*child_classifier_nodes_iterator).second);
|
---|
513 | child_classifier_node.metadata["Title"].values.push_back ((*child_classifier_nodes_iterator).first);
|
---|
514 | child_classifier_nodes_response.docInfo.push_back (child_classifier_node);
|
---|
515 |
|
---|
516 | child_classifier_nodes_iterator++;
|
---|
517 | }
|
---|
518 |
|
---|
519 | // Display the child classifier nodes
|
---|
520 | display_classifier_nodes (child_classifier_nodes_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
521 | }
|
---|
522 |
|
---|
523 |
|
---|
524 | text_t dynamicclassifieraction::split_metadata_value (text_t metadata_value, text_tmap classifier_options)
|
---|
525 | {
|
---|
526 | text_t hierarchical_metadata_value = metadata_value;
|
---|
527 | text_t hierarchy_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
528 |
|
---|
529 | // Add hierarchy separators into Greenstone date values (e.g. YYYYMMDD -> YYYY|MM|DD)
|
---|
530 | if (classifier_options["-split_greenstone_dates"] == "1")
|
---|
531 | {
|
---|
532 | if (metadata_value.size() == 4 || metadata_value.size() == 6 || metadata_value.size() == 8)
|
---|
533 | {
|
---|
534 | // Add year
|
---|
535 | hierarchical_metadata_value = substr (metadata_value.begin(), metadata_value.begin() + 4);
|
---|
536 | if (metadata_value.size() == 6 || metadata_value.size() == 8)
|
---|
537 | {
|
---|
538 | // Add month
|
---|
539 | hierarchical_metadata_value += hierarchy_separator;
|
---|
540 | hierarchical_metadata_value += substr (metadata_value.begin() + 4, metadata_value.begin() + 6);
|
---|
541 | if (metadata_value.size() == 8)
|
---|
542 | {
|
---|
543 | // Add day
|
---|
544 | hierarchical_metadata_value += hierarchy_separator;
|
---|
545 | hierarchical_metadata_value += substr (metadata_value.begin() + 6, metadata_value.begin() + 8);
|
---|
546 | }
|
---|
547 | }
|
---|
548 | }
|
---|
549 | }
|
---|
550 |
|
---|
551 | return hierarchical_metadata_value;
|
---|
552 | }
|
---|
553 |
|
---|
554 |
|
---|
555 | void dynamicclassifieraction::display_classifier_nodes (FilterResponse_t classifier_nodes_response,
|
---|
556 | text_t classifier_nodes_type,
|
---|
557 | int classifier_nodes_indent,
|
---|
558 | cgiargsclass &args, recptproto *collectproto,
|
---|
559 | browsermapclass *browsers, displayclass &disp,
|
---|
560 | outconvertclass &outconvert, ostream &textout,
|
---|
561 | ostream &logout)
|
---|
562 | {
|
---|
563 | // Check there are some classifier nodes to display
|
---|
564 | if (classifier_nodes_response.docInfo.empty()) return;
|
---|
565 |
|
---|
566 | // Get the format statement for this classifier if there is one, or use the browser's default otherwise
|
---|
567 | text_t formatstring;
|
---|
568 | browserclass *bptr = browsers->getbrowser (classifier_nodes_type);
|
---|
569 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
570 | if (!get_formatstring (args["dcl"], classifier_nodes_type, cinfo->format, formatstring))
|
---|
571 | {
|
---|
572 | formatstring = bptr->get_default_formatstring();
|
---|
573 | }
|
---|
574 | format_t *formatlistptr = new format_t();
|
---|
575 | text_tset metadata;
|
---|
576 | bool getParents = false;
|
---|
577 | parse_formatstring (formatstring, formatlistptr, metadata, getParents);
|
---|
578 | bool use_table = is_table_content (formatlistptr);
|
---|
579 |
|
---|
580 | // Display the classifier nodes
|
---|
581 | bptr->output_section_group (classifier_nodes_response, args, args["c"], classifier_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
|
---|
582 | }
|
---|
583 |
|
---|
584 |
|
---|
585 | void dynamicclassifieraction::display_document_nodes (FilterResponse_t documents_response,
|
---|
586 | int document_nodes_indent,
|
---|
587 | cgiargsclass &args, recptproto *collectproto,
|
---|
588 | browsermapclass *browsers, displayclass &disp,
|
---|
589 | outconvertclass &outconvert, ostream &textout,
|
---|
590 | ostream &logout)
|
---|
591 | {
|
---|
592 | // Check there are some documents to display
|
---|
593 | if (documents_response.docInfo.empty()) return;
|
---|
594 |
|
---|
595 | // Get the format statement for the document nodes if there is one, or use the browser's default otherwise
|
---|
596 | text_t formatstring;
|
---|
597 | browserclass *bptr = browsers->getbrowser ("VList");
|
---|
598 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
599 | if (!get_formatstring (args["dcl"], "DocumentNodes", cinfo->format, formatstring))
|
---|
600 | {
|
---|
601 | if (!get_formatstring (args["dcl"], "VList", cinfo->format, formatstring))
|
---|
602 | {
|
---|
603 | formatstring = bptr->get_default_formatstring();
|
---|
604 | }
|
---|
605 | }
|
---|
606 | format_t *formatlistptr = new format_t();
|
---|
607 | text_tset metadata;
|
---|
608 | bool getParents = false;
|
---|
609 | parse_formatstring (formatstring, formatlistptr, metadata, getParents);
|
---|
610 | bool use_table = is_table_content (formatlistptr);
|
---|
611 |
|
---|
612 | // Request the necessary metadata for displaying the documents
|
---|
613 | text_tarray document_OIDs;
|
---|
614 | ResultDocInfo_tarray::iterator document_iterator = documents_response.docInfo.begin();
|
---|
615 | while (document_iterator != documents_response.docInfo.end())
|
---|
616 | {
|
---|
617 | document_OIDs.push_back ((*document_iterator).OID);
|
---|
618 | document_iterator++;
|
---|
619 | }
|
---|
620 | FilterResponse_t document_nodes_response;
|
---|
621 | get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_nodes_response, logout);
|
---|
622 |
|
---|
623 | // Display the document nodes
|
---|
624 | bptr->output_section_group (document_nodes_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
|
---|
625 | }
|
---|