1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * dynamicclassifieraction.cpp --
|
---|
4 | * Copyright (C) 2008 DL Consulting Ltd
|
---|
5 | *
|
---|
6 | * A component of the Greenstone digital library software
|
---|
7 | * from the New Zealand Digital Library Project at the
|
---|
8 | * University of Waikato, New Zealand.
|
---|
9 | *
|
---|
10 | * This program is free software; you can redistribute it and/or modify
|
---|
11 | * it under the terms of the GNU General Public License as published by
|
---|
12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
13 | * (at your option) any later version.
|
---|
14 | *
|
---|
15 | * This program is distributed in the hope that it will be useful,
|
---|
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | * GNU General Public License for more details.
|
---|
19 | *
|
---|
20 | * You should have received a copy of the GNU General Public License
|
---|
21 | * along with this program; if not, write to the Free Software
|
---|
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
23 | *
|
---|
24 | *********************************************************************/
|
---|
25 |
|
---|
26 | #include "dynamicclassifieraction.h"
|
---|
27 | #include "recptprototools.h"
|
---|
28 |
|
---|
29 |
|
---|
30 | dynamicclassifieraction::dynamicclassifieraction ()
|
---|
31 | {
|
---|
32 | recpt = NULL;
|
---|
33 |
|
---|
34 | cgiarginfo arg_ainfo;
|
---|
35 | arg_ainfo.shortname = "dcl";
|
---|
36 | arg_ainfo.longname = "dynamic classifier ID";
|
---|
37 | arg_ainfo.multiplechar = true;
|
---|
38 | arg_ainfo.defaultstatus = cgiarginfo::weak;
|
---|
39 | arg_ainfo.argdefault = "";
|
---|
40 | arg_ainfo.savedarginfo = cgiarginfo::must;
|
---|
41 | argsinfo.addarginfo (NULL, arg_ainfo);
|
---|
42 |
|
---|
43 | arg_ainfo.shortname = "dcn";
|
---|
44 | arg_ainfo.longname = "dynamic classifier node";
|
---|
45 | arg_ainfo.multiplechar = true;
|
---|
46 | arg_ainfo.defaultstatus = cgiarginfo::weak;
|
---|
47 | arg_ainfo.argdefault = "";
|
---|
48 | arg_ainfo.savedarginfo = cgiarginfo::must;
|
---|
49 | argsinfo.addarginfo (NULL, arg_ainfo);
|
---|
50 | }
|
---|
51 |
|
---|
52 |
|
---|
53 | dynamicclassifieraction::~dynamicclassifieraction()
|
---|
54 | {
|
---|
55 | }
|
---|
56 |
|
---|
57 |
|
---|
58 | bool dynamicclassifieraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
|
---|
59 | recptprotolistclass *protos, ostream &logout)
|
---|
60 | {
|
---|
61 | return true;
|
---|
62 | }
|
---|
63 |
|
---|
64 |
|
---|
65 | void dynamicclassifieraction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos,
|
---|
66 | response_t &response,text_t &response_data,
|
---|
67 | ostream &logout)
|
---|
68 | {
|
---|
69 | response = content;
|
---|
70 | response_data = "text/html";
|
---|
71 | }
|
---|
72 |
|
---|
73 |
|
---|
74 | // define all the macros which might be used by other actions to produce pages.
|
---|
75 | void dynamicclassifieraction::define_external_macros (displayclass &disp, cgiargsclass &args,
|
---|
76 | recptprotolistclass *protos, ostream &logout)
|
---|
77 | {
|
---|
78 | // Some pages (e.g. the library home page) are not collection-specific
|
---|
79 | if (args["c"].empty())
|
---|
80 | {
|
---|
81 | return;
|
---|
82 | }
|
---|
83 |
|
---|
84 | // A valid collection server is vital
|
---|
85 | recptproto *collectproto = protos->getrecptproto (args["c"], logout);
|
---|
86 | if (collectproto == NULL)
|
---|
87 | {
|
---|
88 | logout << "dynamicclassifieraction::define_external_macros called with NULL collectproto\n";
|
---|
89 | return;
|
---|
90 | }
|
---|
91 |
|
---|
92 | // Define _dynamicclassifiernavbarentries_ to add buttons to the navigation bar for the dynamic classifiers
|
---|
93 | text_t navigation_bar_entries = "";
|
---|
94 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
95 | text_tmap::iterator dynamic_classifier_iterator = cinfo->dynamic_classifiers.begin();
|
---|
96 | while (dynamic_classifier_iterator != cinfo->dynamic_classifiers.end())
|
---|
97 | {
|
---|
98 | text_t dynamic_classifier_id = (*dynamic_classifier_iterator).first;
|
---|
99 | navigation_bar_entries += "_navbarspacer_";
|
---|
100 | navigation_bar_entries += "_navtab_(_gwcgi_?c=" + args["c"] + "&a=dc&dcl=" + dynamic_classifier_id + "," + dynamic_classifier_id;
|
---|
101 | if (args["a"] == "dc" && args["dcl"] == dynamic_classifier_id)
|
---|
102 | {
|
---|
103 | navigation_bar_entries += ",selected";
|
---|
104 | }
|
---|
105 | navigation_bar_entries += ")";
|
---|
106 | dynamic_classifier_iterator++;
|
---|
107 | }
|
---|
108 |
|
---|
109 | disp.setmacro("dynamicclassifiernavbarentries", displayclass::defaultpackage, navigation_bar_entries);
|
---|
110 | }
|
---|
111 |
|
---|
112 |
|
---|
113 | // define all the macros which are related to pages generated
|
---|
114 | // by this action. we also load up the formatinfo structure
|
---|
115 | // here (it's used in do_action as well as here)
|
---|
116 | void dynamicclassifieraction::define_internal_macros (displayclass &disp, cgiargsclass &args,
|
---|
117 | recptprotolistclass *protos, ostream &logout)
|
---|
118 | {
|
---|
119 | // define_internal_macros sets the following macros:
|
---|
120 | }
|
---|
121 |
|
---|
122 |
|
---|
123 | bool dynamicclassifieraction::do_action(cgiargsclass &args, recptprotolistclass *protos,
|
---|
124 | browsermapclass *browsers, displayclass &disp,
|
---|
125 | outconvertclass &outconvert, ostream &textout,
|
---|
126 | ostream &logout)
|
---|
127 | {
|
---|
128 | // A valid collection server is vital
|
---|
129 | recptproto *collectproto = protos->getrecptproto (args["c"], logout);
|
---|
130 | if (collectproto == NULL)
|
---|
131 | {
|
---|
132 | logout << "dynamicclassifieraction::do_action called with NULL collectproto\n";
|
---|
133 | return false;
|
---|
134 | }
|
---|
135 |
|
---|
136 | textout << outconvert << disp << "_dynamicclassifier:header_\n";
|
---|
137 | textout << outconvert << disp << "_dynamicclassifier:content_\n";
|
---|
138 |
|
---|
139 | // Check a dynamic classifier ID has been specified
|
---|
140 | text_t arg_dcl = args["dcl"];
|
---|
141 | if (arg_dcl.empty())
|
---|
142 | {
|
---|
143 | textout << outconvert << disp << "Error: Missing dcl argument.\n";
|
---|
144 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
145 | return true;
|
---|
146 | }
|
---|
147 |
|
---|
148 | // Check the dynamic classifier ID is valid (ie. there is an entry in the collect.cfg file for it)
|
---|
149 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
150 | if (cinfo->dynamic_classifiers.find(arg_dcl) == cinfo->dynamic_classifiers.end())
|
---|
151 | {
|
---|
152 | textout << outconvert << disp << "Error: Invalid dcl value \"" << arg_dcl << "\".\n";
|
---|
153 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
154 | return true;
|
---|
155 | }
|
---|
156 |
|
---|
157 | // Parse the classifier options from the specification
|
---|
158 | text_t classifier_specification = cinfo->dynamic_classifiers[arg_dcl];
|
---|
159 | text_tmap classifier_options = parse_classifier_options (classifier_specification, args);
|
---|
160 |
|
---|
161 | // Output the "<ID>Header" format statement if there is one
|
---|
162 | text_t classifier_header_format_statement = "";
|
---|
163 | get_formatstring (arg_dcl + "Header", cinfo->format, classifier_header_format_statement);
|
---|
164 | textout << outconvert << disp << classifier_header_format_statement << "\n";
|
---|
165 |
|
---|
166 | // Resolve any ".pr" bits at the end of the "dcn" argument
|
---|
167 | text_t classifier_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
168 | if (ends_with (args["dcn"], ".pr"))
|
---|
169 | {
|
---|
170 | // Change the "dcn" argument to be the OID of the parent of the specified classifier node
|
---|
171 | text_tlist args_dcn_parts;
|
---|
172 | splitword (args["dcn"].begin(), args["dcn"].end(), classifier_node_separator, args_dcn_parts);
|
---|
173 | args_dcn_parts.pop_back(); // Remove the last part
|
---|
174 | joinchar (args_dcn_parts, classifier_node_separator, args["dcn"]);
|
---|
175 | }
|
---|
176 |
|
---|
177 | // Prepare to output the dynamic classifier
|
---|
178 | text_t current_classifier_node_OID = "";
|
---|
179 | text_t current_metadata_value_filter = "";
|
---|
180 | int classifier_node_indent = 0;
|
---|
181 |
|
---|
182 | // (Optional) Output an hlist to group the classifier nodes into buckets based on the first character
|
---|
183 | if (classifier_options["-group_by_first_character"] == "1")
|
---|
184 | {
|
---|
185 | text_t selected_grouping_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "substr(value,1,1)", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
186 |
|
---|
187 | current_classifier_node_OID = selected_grouping_node_OID + ".";
|
---|
188 | current_metadata_value_filter = selected_grouping_node_OID + "*";
|
---|
189 | }
|
---|
190 |
|
---|
191 | // (Optional) Output an hlist instead of a vlist at the top level
|
---|
192 | if (classifier_options["-use_hlist_at_top"] == "1")
|
---|
193 | {
|
---|
194 | text_t selected_hlist_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
195 |
|
---|
196 | current_classifier_node_OID = selected_hlist_node_OID + classifier_node_separator;
|
---|
197 | }
|
---|
198 |
|
---|
199 | // args["dcn"] may have been modified by output_hlist_classifier_nodes() above
|
---|
200 | text_t classifier_node_OID = args["dcn"];
|
---|
201 |
|
---|
202 | // If grouping is enabled remove the grouping node bit from the start of the OID
|
---|
203 | text_t classifier_node_OID_sans_grouping = classifier_node_OID;
|
---|
204 | if (classifier_options["-group_by_first_character"] == "1")
|
---|
205 | {
|
---|
206 | text_t::iterator grouping_node_separator = findchar (classifier_node_OID.begin(), classifier_node_OID.end(), '.');
|
---|
207 | if (grouping_node_separator != classifier_node_OID.end())
|
---|
208 | {
|
---|
209 | classifier_node_OID_sans_grouping = substr (grouping_node_separator + 1, classifier_node_OID.end());
|
---|
210 | }
|
---|
211 | else
|
---|
212 | {
|
---|
213 | classifier_node_OID_sans_grouping = "";
|
---|
214 | }
|
---|
215 | }
|
---|
216 |
|
---|
217 | // Split the classifier node OID into its hierarchical parts, then remove any we've already dealt with (HLists)
|
---|
218 | text_tlist classifier_node_OID_parts_remaining;
|
---|
219 | splitword (classifier_node_OID_sans_grouping.begin(), classifier_node_OID_sans_grouping.end(), classifier_node_separator, classifier_node_OID_parts_remaining);
|
---|
220 | if (classifier_options["-use_hlist_at_top"] == "1")
|
---|
221 | {
|
---|
222 | classifier_node_OID_parts_remaining.pop_front();
|
---|
223 | }
|
---|
224 |
|
---|
225 | text_t classifier_node_metadata_value = classifier_node_OID_sans_grouping;
|
---|
226 | text_t classifier_node_metadata_value_filter = classifier_node_metadata_value + classifier_node_separator + "*";
|
---|
227 |
|
---|
228 | // If "-split_greenstone_dates" is on the metadata value and filter doesn't quite match the OID -- fix this
|
---|
229 | if (classifier_options["-split_greenstone_dates"] == "1")
|
---|
230 | {
|
---|
231 | classifier_node_metadata_value.replace (classifier_node_separator, "");
|
---|
232 | classifier_node_metadata_value_filter = classifier_node_metadata_value;
|
---|
233 | if (classifier_node_metadata_value_filter.size() == 8)
|
---|
234 | {
|
---|
235 | classifier_node_metadata_value_filter += classifier_node_separator;
|
---|
236 | }
|
---|
237 | classifier_node_metadata_value_filter += "*";
|
---|
238 | }
|
---|
239 |
|
---|
240 | // Simple case at the top level: just output the child classifier nodes
|
---|
241 | if (classifier_node_metadata_value == "")
|
---|
242 | {
|
---|
243 | output_child_classifier_nodes (current_classifier_node_OID, "", current_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
244 | }
|
---|
245 |
|
---|
246 | // More complex case below the top level
|
---|
247 | else
|
---|
248 | {
|
---|
249 | // Output the parent classifier nodes and the current classifier node
|
---|
250 | output_upper_classifier_nodes (current_classifier_node_OID, classifier_node_OID_parts_remaining, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
251 |
|
---|
252 | // Output the child classifier nodes
|
---|
253 | current_classifier_node_OID = classifier_node_OID + classifier_node_separator;
|
---|
254 | output_child_classifier_nodes (current_classifier_node_OID, classifier_node_metadata_value, classifier_node_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
255 |
|
---|
256 | // Get the document nodes at this level
|
---|
257 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
258 | text_t sort_documents_by = classifier_options["-sort_documents_by"];
|
---|
259 | FilterResponse_t documents_response;
|
---|
260 | get_documents_with_metadata_value (metadata_elements, classifier_node_metadata_value, sort_documents_by, args["c"], collectproto, documents_response, logout);
|
---|
261 |
|
---|
262 | // Display the document nodes
|
---|
263 | display_document_nodes (documents_response, classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
264 | }
|
---|
265 |
|
---|
266 | // Output the "<ID>Footer" format statement if there is one
|
---|
267 | text_t classifier_footer_format_statement = "";
|
---|
268 | get_formatstring (arg_dcl + "Footer", cinfo->format, classifier_footer_format_statement);
|
---|
269 | textout << outconvert << disp << classifier_footer_format_statement << "\n";
|
---|
270 |
|
---|
271 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
272 | return true;
|
---|
273 | }
|
---|
274 |
|
---|
275 |
|
---|
276 | text_tmap dynamicclassifieraction::parse_classifier_options (text_t classifier_specification, cgiargsclass &args)
|
---|
277 | {
|
---|
278 | text_tmap classifier_options;
|
---|
279 |
|
---|
280 | // Default values
|
---|
281 | classifier_options["-split_using_hierarchy_separator"] = "|";
|
---|
282 |
|
---|
283 | // Split the classifier specification string by spaces
|
---|
284 | text_tlist classifier_specification_parts;
|
---|
285 | splitchar (classifier_specification.begin(), classifier_specification.end(), ' ', classifier_specification_parts);
|
---|
286 |
|
---|
287 | // The metadata element(s) to classify by should be the first value
|
---|
288 | classifier_options["metadata_elements"] = classifier_specification_parts.front();
|
---|
289 | classifier_specification_parts.pop_front();
|
---|
290 |
|
---|
291 | // Parse options from the remainder of the classifier specification
|
---|
292 | while (!classifier_specification_parts.empty())
|
---|
293 | {
|
---|
294 | // Parse the option name
|
---|
295 | text_t classifier_option_name = classifier_specification_parts.front();
|
---|
296 | classifier_specification_parts.pop_front();
|
---|
297 |
|
---|
298 | // Check if the option has a value (it may just be a flag, in which case we use "1" as the value)
|
---|
299 | text_t classifier_option_value = "1";
|
---|
300 | if (!classifier_specification_parts.empty() && !starts_with(classifier_specification_parts.front(), "-"))
|
---|
301 | {
|
---|
302 | classifier_option_value = classifier_specification_parts.front();
|
---|
303 | classifier_specification_parts.pop_front();
|
---|
304 | }
|
---|
305 |
|
---|
306 | // Record the option
|
---|
307 | classifier_options[classifier_option_name] = classifier_option_value;
|
---|
308 | }
|
---|
309 |
|
---|
310 | return classifier_options;
|
---|
311 | }
|
---|
312 |
|
---|
313 |
|
---|
314 | text_t dynamicclassifieraction::output_hlist_classifier_nodes (text_t parent_classifier_node_OID,
|
---|
315 | text_t metadata_value_filter,
|
---|
316 | text_t metadata_value_grouping_expression,
|
---|
317 | text_tmap classifier_options, cgiargsclass &args,
|
---|
318 | recptproto *collectproto, browsermapclass *browsers,
|
---|
319 | displayclass &disp, outconvertclass &outconvert,
|
---|
320 | ostream &textout, ostream &logout)
|
---|
321 | {
|
---|
322 | // Get all the metadata values for the specified element(s) that match the filter
|
---|
323 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
324 | FilterResponse_t metadata_values_response;
|
---|
325 | bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, metadata_value_grouping_expression, args["c"], collectproto, metadata_values_response, logout);
|
---|
326 |
|
---|
327 | // If the request failed then it's probably because the collection isn't using an SQL infodbtype
|
---|
328 | if (request_success == false)
|
---|
329 | {
|
---|
330 | textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
|
---|
331 | return "";
|
---|
332 | }
|
---|
333 |
|
---|
334 | // Check some metadata values were returned
|
---|
335 | if (metadata_values_response.docInfo.empty())
|
---|
336 | {
|
---|
337 | return "";
|
---|
338 | }
|
---|
339 |
|
---|
340 | // After processing any hierarchical metadata values we're left with the hlist classifer nodes
|
---|
341 | map<text_t, int, lttext_t> hlist_classifier_nodes;
|
---|
342 | ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
|
---|
343 | while (metadata_value_iterator != metadata_values_response.docInfo.end())
|
---|
344 | {
|
---|
345 | text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options);
|
---|
346 |
|
---|
347 | // Assume for now we're always at the top
|
---|
348 | text_t hlist_metadata_value = hierarchical_metadata_value;
|
---|
349 |
|
---|
350 | // Determine the label for the hlist classifier node from the metadata value
|
---|
351 | text_tlist hlist_metadata_value_parts;
|
---|
352 | text_t hlist_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
353 | splitword (hlist_metadata_value.begin(), hlist_metadata_value.end(), hlist_node_separator, hlist_metadata_value_parts);
|
---|
354 | text_t hlist_classifier_node_label = hlist_metadata_value_parts.front();
|
---|
355 |
|
---|
356 | // Create a node for this value if we haven't seen it before
|
---|
357 | if (hlist_classifier_nodes.find(hlist_classifier_node_label) == hlist_classifier_nodes.end())
|
---|
358 | {
|
---|
359 | hlist_classifier_nodes[hlist_classifier_node_label] = 0;
|
---|
360 | }
|
---|
361 |
|
---|
362 | // Increment the occurrence count
|
---|
363 | hlist_classifier_nodes[hlist_classifier_node_label] += (*metadata_value_iterator).result_num;
|
---|
364 |
|
---|
365 | metadata_value_iterator++;
|
---|
366 | }
|
---|
367 |
|
---|
368 | // Add the necessary metadata to the hlist classifier nodes
|
---|
369 | text_t selected_hlist_node_OID = "";
|
---|
370 | FilterResponse_t hlist_classifier_nodes_response;
|
---|
371 | map<text_t, int, lttext_t>::iterator hlist_classifier_nodes_iterator = hlist_classifier_nodes.begin();
|
---|
372 | while (hlist_classifier_nodes_iterator != hlist_classifier_nodes.end())
|
---|
373 | {
|
---|
374 | text_t hlist_classifier_node_OID = parent_classifier_node_OID + (*hlist_classifier_nodes_iterator).first;
|
---|
375 |
|
---|
376 | // Is this the hlist node that is currently selected?
|
---|
377 | if (starts_with (args["dcn"], hlist_classifier_node_OID))
|
---|
378 | {
|
---|
379 | selected_hlist_node_OID = hlist_classifier_node_OID;
|
---|
380 | }
|
---|
381 |
|
---|
382 | // Add the necessary metadata required to display the hlist nodes correctly
|
---|
383 | ResultDocInfo_t hlist_classifier_node;
|
---|
384 | hlist_classifier_node.OID = hlist_classifier_node_OID;
|
---|
385 | hlist_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
386 | hlist_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
387 | hlist_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests
|
---|
388 | hlist_classifier_node.metadata["Title"].values.push_back ((*hlist_classifier_nodes_iterator).first);
|
---|
389 | hlist_classifier_nodes_response.docInfo.push_back (hlist_classifier_node);
|
---|
390 |
|
---|
391 | hlist_classifier_nodes_iterator++;
|
---|
392 | }
|
---|
393 |
|
---|
394 | // Automatically select the first hlist node if necessary
|
---|
395 | if (selected_hlist_node_OID == "")
|
---|
396 | {
|
---|
397 | selected_hlist_node_OID = (*hlist_classifier_nodes_response.docInfo.begin()).OID;
|
---|
398 |
|
---|
399 | // Don't really like messing with this here, but it needs to be done before display_classifier_nodes() below
|
---|
400 | if (starts_with (parent_classifier_node_OID, args["dcn"]))
|
---|
401 | {
|
---|
402 | args["dcn"] = selected_hlist_node_OID;
|
---|
403 | }
|
---|
404 | }
|
---|
405 |
|
---|
406 | // Display the hlist nodes
|
---|
407 | display_classifier_nodes (hlist_classifier_nodes_response, "HList", 0, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
408 |
|
---|
409 | return selected_hlist_node_OID;
|
---|
410 | }
|
---|
411 |
|
---|
412 |
|
---|
413 | void dynamicclassifieraction::output_upper_classifier_nodes (text_t root_classifier_node_OID,
|
---|
414 | text_tlist upper_classifier_node_labels,
|
---|
415 | int& classifier_node_indent,
|
---|
416 | text_tmap classifier_options, cgiargsclass &args,
|
---|
417 | recptproto *collectproto, browsermapclass *browsers,
|
---|
418 | displayclass &disp, outconvertclass &outconvert,
|
---|
419 | ostream &textout, ostream &logout)
|
---|
420 | {
|
---|
421 | // Display the upper classifier nodes
|
---|
422 | text_t upper_classifier_node_OID = root_classifier_node_OID;
|
---|
423 | text_tlist::iterator upper_classifier_node_labels_iterator = upper_classifier_node_labels.begin();
|
---|
424 | while (upper_classifier_node_labels_iterator != upper_classifier_node_labels.end())
|
---|
425 | {
|
---|
426 | upper_classifier_node_OID += *upper_classifier_node_labels_iterator;
|
---|
427 |
|
---|
428 | ResultDocInfo_t upper_classifier_node;
|
---|
429 | upper_classifier_node.OID = upper_classifier_node_OID;
|
---|
430 | upper_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
431 | upper_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
432 | upper_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests
|
---|
433 | upper_classifier_node.metadata["Title"].values.push_back (*upper_classifier_node_labels_iterator);
|
---|
434 |
|
---|
435 | FilterResponse_t upper_classifier_node_response;
|
---|
436 | upper_classifier_node_response.docInfo.push_back(upper_classifier_node);
|
---|
437 | display_classifier_nodes (upper_classifier_node_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
438 | classifier_node_indent++;
|
---|
439 |
|
---|
440 | upper_classifier_node_OID += classifier_options["-split_using_hierarchy_separator"];
|
---|
441 | upper_classifier_node_labels_iterator++;
|
---|
442 | }
|
---|
443 | }
|
---|
444 |
|
---|
445 |
|
---|
446 | void dynamicclassifieraction::output_child_classifier_nodes (text_t classifier_node_OID,
|
---|
447 | text_t classifier_node_metadata_value,
|
---|
448 | text_t metadata_value_filter,
|
---|
449 | int& classifier_node_indent,
|
---|
450 | text_tmap classifier_options, cgiargsclass &args,
|
---|
451 | recptproto *collectproto, browsermapclass *browsers,
|
---|
452 | displayclass &disp, outconvertclass &outconvert,
|
---|
453 | ostream &textout, ostream &logout)
|
---|
454 | {
|
---|
455 | // Get all the metadata values for the specified element(s) that match the filter
|
---|
456 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
457 | FilterResponse_t metadata_values_response;
|
---|
458 | bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, "", args["c"], collectproto, metadata_values_response, logout);
|
---|
459 |
|
---|
460 | // If the request failed then it's probably because the collection isn't using an SQL infodbtype
|
---|
461 | if (request_success == false)
|
---|
462 | {
|
---|
463 | textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
|
---|
464 | return;
|
---|
465 | }
|
---|
466 |
|
---|
467 | // After processing any hierarchical metadata values we're left with the child classifer nodes
|
---|
468 | map<text_t, int, lttext_t> child_classifier_nodes;
|
---|
469 | ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
|
---|
470 | while (metadata_value_iterator != metadata_values_response.docInfo.end())
|
---|
471 | {
|
---|
472 | text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options);
|
---|
473 | text_t classifier_node_hierarchical_metadata_value = split_metadata_value (classifier_node_metadata_value, classifier_options);
|
---|
474 |
|
---|
475 | // We need to remove the current position from the metadata value to leave the child metadata value
|
---|
476 | text_t child_metadata_value = hierarchical_metadata_value;
|
---|
477 | text_t child_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
478 | if (starts_with(hierarchical_metadata_value, classifier_node_hierarchical_metadata_value + child_node_separator))
|
---|
479 | {
|
---|
480 | child_metadata_value = substr(hierarchical_metadata_value.begin() + (classifier_node_hierarchical_metadata_value + child_node_separator).size(), hierarchical_metadata_value.end());
|
---|
481 | }
|
---|
482 |
|
---|
483 | // Determine the label for the child classifier node from the metadata value
|
---|
484 | text_tlist child_metadata_value_parts;
|
---|
485 | splitword (child_metadata_value.begin(), child_metadata_value.end(), child_node_separator, child_metadata_value_parts);
|
---|
486 | text_t child_classifier_node_label = child_metadata_value_parts.front();
|
---|
487 |
|
---|
488 | // Create a node for this value if we haven't seen it before
|
---|
489 | if (child_classifier_nodes.find(child_classifier_node_label) == child_classifier_nodes.end())
|
---|
490 | {
|
---|
491 | child_classifier_nodes[child_classifier_node_label] = 0;
|
---|
492 | }
|
---|
493 |
|
---|
494 | // Increment the occurrence count
|
---|
495 | child_classifier_nodes[child_classifier_node_label] += (*metadata_value_iterator).result_num;
|
---|
496 |
|
---|
497 | metadata_value_iterator++;
|
---|
498 | }
|
---|
499 |
|
---|
500 | // Add the necessary metadata to the child classifier nodes
|
---|
501 | FilterResponse_t child_classifier_nodes_response;
|
---|
502 | map<text_t, int, lttext_t>::iterator child_classifier_nodes_iterator = child_classifier_nodes.begin();
|
---|
503 | while (child_classifier_nodes_iterator != child_classifier_nodes.end())
|
---|
504 | {
|
---|
505 | text_t child_classifier_node_OID = classifier_node_OID + (*child_classifier_nodes_iterator).first;
|
---|
506 |
|
---|
507 | ResultDocInfo_t child_classifier_node;
|
---|
508 | child_classifier_node.OID = child_classifier_node_OID;
|
---|
509 | child_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
510 | child_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
511 | child_classifier_node.metadata["numleafdocs"].values.push_back ((*child_classifier_nodes_iterator).second);
|
---|
512 | child_classifier_node.metadata["Title"].values.push_back ((*child_classifier_nodes_iterator).first);
|
---|
513 | child_classifier_nodes_response.docInfo.push_back (child_classifier_node);
|
---|
514 |
|
---|
515 | child_classifier_nodes_iterator++;
|
---|
516 | }
|
---|
517 |
|
---|
518 | // Display the child classifier nodes
|
---|
519 | display_classifier_nodes (child_classifier_nodes_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
520 | }
|
---|
521 |
|
---|
522 |
|
---|
523 | text_t dynamicclassifieraction::split_metadata_value (text_t metadata_value, text_tmap classifier_options)
|
---|
524 | {
|
---|
525 | text_t hierarchical_metadata_value = metadata_value;
|
---|
526 | text_t hierarchy_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
527 |
|
---|
528 | // Add hierarchy separators into Greenstone date values (e.g. YYYYMMDD -> YYYY|MM|DD)
|
---|
529 | if (classifier_options["-split_greenstone_dates"] == "1")
|
---|
530 | {
|
---|
531 | if (metadata_value.size() == 4 || metadata_value.size() == 6 || metadata_value.size() == 8)
|
---|
532 | {
|
---|
533 | // Add year
|
---|
534 | hierarchical_metadata_value = substr (metadata_value.begin(), metadata_value.begin() + 4);
|
---|
535 | if (metadata_value.size() == 6 || metadata_value.size() == 8)
|
---|
536 | {
|
---|
537 | // Add month
|
---|
538 | hierarchical_metadata_value += hierarchy_separator;
|
---|
539 | hierarchical_metadata_value += substr (metadata_value.begin() + 4, metadata_value.begin() + 6);
|
---|
540 | if (metadata_value.size() == 8)
|
---|
541 | {
|
---|
542 | // Add day
|
---|
543 | hierarchical_metadata_value += hierarchy_separator;
|
---|
544 | hierarchical_metadata_value += substr (metadata_value.begin() + 6, metadata_value.begin() + 8);
|
---|
545 | }
|
---|
546 | }
|
---|
547 | }
|
---|
548 | }
|
---|
549 |
|
---|
550 | return hierarchical_metadata_value;
|
---|
551 | }
|
---|
552 |
|
---|
553 |
|
---|
554 | void dynamicclassifieraction::display_classifier_nodes (FilterResponse_t classifier_nodes_response,
|
---|
555 | text_t classifier_nodes_type,
|
---|
556 | int classifier_nodes_indent,
|
---|
557 | cgiargsclass &args, recptproto *collectproto,
|
---|
558 | browsermapclass *browsers, displayclass &disp,
|
---|
559 | outconvertclass &outconvert, ostream &textout,
|
---|
560 | ostream &logout)
|
---|
561 | {
|
---|
562 | // Check there are some classifier nodes to display
|
---|
563 | if (classifier_nodes_response.docInfo.empty()) return;
|
---|
564 |
|
---|
565 | // Get the format statement for this classifier if there is one, or use the browser's default otherwise
|
---|
566 | text_t formatstring;
|
---|
567 | browserclass *bptr = browsers->getbrowser (classifier_nodes_type);
|
---|
568 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
569 | if (!get_formatstring (args["dcl"], classifier_nodes_type, cinfo->format, formatstring))
|
---|
570 | {
|
---|
571 | formatstring = bptr->get_default_formatstring();
|
---|
572 | }
|
---|
573 | format_t *formatlistptr = new format_t();
|
---|
574 | text_tset metadata;
|
---|
575 | bool getParents = false;
|
---|
576 | parse_formatstring (formatstring, formatlistptr, metadata, getParents);
|
---|
577 | bool use_table = is_table_content (formatlistptr);
|
---|
578 |
|
---|
579 | // Display the classifier nodes
|
---|
580 | bptr->output_section_group (classifier_nodes_response, args, args["c"], classifier_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
|
---|
581 | }
|
---|
582 |
|
---|
583 |
|
---|
584 | void dynamicclassifieraction::display_document_nodes (FilterResponse_t documents_response,
|
---|
585 | int document_nodes_indent,
|
---|
586 | cgiargsclass &args, recptproto *collectproto,
|
---|
587 | browsermapclass *browsers, displayclass &disp,
|
---|
588 | outconvertclass &outconvert, ostream &textout,
|
---|
589 | ostream &logout)
|
---|
590 | {
|
---|
591 | // Check there are some documents to display
|
---|
592 | if (documents_response.docInfo.empty()) return;
|
---|
593 |
|
---|
594 | // Get the format statement for the document nodes if there is one, or use the browser's default otherwise
|
---|
595 | text_t formatstring;
|
---|
596 | browserclass *bptr = browsers->getbrowser ("VList");
|
---|
597 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
598 | if (!get_formatstring (args["dcl"], "DocumentNodes", cinfo->format, formatstring))
|
---|
599 | {
|
---|
600 | if (!get_formatstring (args["dcl"], "VList", cinfo->format, formatstring))
|
---|
601 | {
|
---|
602 | formatstring = bptr->get_default_formatstring();
|
---|
603 | }
|
---|
604 | }
|
---|
605 | format_t *formatlistptr = new format_t();
|
---|
606 | text_tset metadata;
|
---|
607 | bool getParents = false;
|
---|
608 | parse_formatstring (formatstring, formatlistptr, metadata, getParents);
|
---|
609 | bool use_table = is_table_content (formatlistptr);
|
---|
610 |
|
---|
611 | // Request the necessary metadata for displaying the documents
|
---|
612 | text_tarray document_OIDs;
|
---|
613 | ResultDocInfo_tarray::iterator document_iterator = documents_response.docInfo.begin();
|
---|
614 | while (document_iterator != documents_response.docInfo.end())
|
---|
615 | {
|
---|
616 | document_OIDs.push_back ((*document_iterator).OID);
|
---|
617 | document_iterator++;
|
---|
618 | }
|
---|
619 | FilterResponse_t document_nodes_response;
|
---|
620 | get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_nodes_response, logout);
|
---|
621 |
|
---|
622 | // Display the document nodes
|
---|
623 | bptr->output_section_group (document_nodes_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
|
---|
624 | }
|
---|