[15744] | 1 | /**********************************************************************
|
---|
| 2 | *
|
---|
| 3 | * dynamicclassifieraction.cpp --
|
---|
| 4 | * Copyright (C) 2008 DL Consulting Ltd
|
---|
| 5 | *
|
---|
[19297] | 6 | * By Michael Dewsnip
|
---|
| 7 | * Please do not edit this file without checking with Michael first!
|
---|
| 8 | *
|
---|
[15744] | 9 | * A component of the Greenstone digital library software
|
---|
| 10 | * from the New Zealand Digital Library Project at the
|
---|
| 11 | * University of Waikato, New Zealand.
|
---|
| 12 | *
|
---|
| 13 | * This program is free software; you can redistribute it and/or modify
|
---|
| 14 | * it under the terms of the GNU General Public License as published by
|
---|
| 15 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 16 | * (at your option) any later version.
|
---|
| 17 | *
|
---|
| 18 | * This program is distributed in the hope that it will be useful,
|
---|
| 19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 21 | * GNU General Public License for more details.
|
---|
| 22 | *
|
---|
| 23 | * You should have received a copy of the GNU General Public License
|
---|
| 24 | * along with this program; if not, write to the Free Software
|
---|
| 25 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 26 | *
|
---|
| 27 | *********************************************************************/
|
---|
| 28 |
|
---|
| 29 | #include "dynamicclassifieraction.h"
|
---|
[15768] | 30 | #include "recptprototools.h"
|
---|
[15744] | 31 |
|
---|
| 32 |
|
---|
| 33 | dynamicclassifieraction::dynamicclassifieraction ()
|
---|
| 34 | {
|
---|
| 35 | recpt = NULL;
|
---|
[15772] | 36 |
|
---|
| 37 | cgiarginfo arg_ainfo;
|
---|
[15795] | 38 | arg_ainfo.shortname = "dcl";
|
---|
| 39 | arg_ainfo.longname = "dynamic classifier ID";
|
---|
[15772] | 40 | arg_ainfo.multiplechar = true;
|
---|
| 41 | arg_ainfo.defaultstatus = cgiarginfo::weak;
|
---|
| 42 | arg_ainfo.argdefault = "";
|
---|
| 43 | arg_ainfo.savedarginfo = cgiarginfo::must;
|
---|
| 44 | argsinfo.addarginfo (NULL, arg_ainfo);
|
---|
[15807] | 45 |
|
---|
| 46 | arg_ainfo.shortname = "dcn";
|
---|
| 47 | arg_ainfo.longname = "dynamic classifier node";
|
---|
| 48 | arg_ainfo.multiplechar = true;
|
---|
| 49 | arg_ainfo.defaultstatus = cgiarginfo::weak;
|
---|
| 50 | arg_ainfo.argdefault = "";
|
---|
| 51 | arg_ainfo.savedarginfo = cgiarginfo::must;
|
---|
| 52 | argsinfo.addarginfo (NULL, arg_ainfo);
|
---|
[15744] | 53 | }
|
---|
| 54 |
|
---|
| 55 |
|
---|
| 56 | dynamicclassifieraction::~dynamicclassifieraction()
|
---|
| 57 | {
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 |
|
---|
| 61 | bool dynamicclassifieraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
|
---|
| 62 | recptprotolistclass *protos, ostream &logout)
|
---|
| 63 | {
|
---|
| 64 | return true;
|
---|
| 65 | }
|
---|
| 66 |
|
---|
| 67 |
|
---|
| 68 | void dynamicclassifieraction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos,
|
---|
| 69 | response_t &response,text_t &response_data,
|
---|
| 70 | ostream &logout)
|
---|
| 71 | {
|
---|
| 72 | response = content;
|
---|
| 73 | response_data = "text/html";
|
---|
| 74 | }
|
---|
| 75 |
|
---|
| 76 |
|
---|
| 77 | void dynamicclassifieraction::define_external_macros (displayclass &disp, cgiargsclass &args,
|
---|
| 78 | recptprotolistclass *protos, ostream &logout)
|
---|
| 79 | {
|
---|
[17931] | 80 | // Some pages (e.g. the library home page) are not collection-specific
|
---|
| 81 | if (args["c"].empty())
|
---|
| 82 | {
|
---|
| 83 | return;
|
---|
| 84 | }
|
---|
| 85 |
|
---|
[15847] | 86 | // A valid collection server is vital
|
---|
| 87 | recptproto *collectproto = protos->getrecptproto (args["c"], logout);
|
---|
| 88 | if (collectproto == NULL)
|
---|
| 89 | {
|
---|
| 90 | logout << "dynamicclassifieraction::define_external_macros called with NULL collectproto\n";
|
---|
| 91 | return;
|
---|
| 92 | }
|
---|
| 93 |
|
---|
| 94 | // Define _dynamicclassifiernavbarentries_ to add buttons to the navigation bar for the dynamic classifiers
|
---|
| 95 | text_t navigation_bar_entries = "";
|
---|
| 96 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
| 97 | text_tmap::iterator dynamic_classifier_iterator = cinfo->dynamic_classifiers.begin();
|
---|
| 98 | while (dynamic_classifier_iterator != cinfo->dynamic_classifiers.end())
|
---|
| 99 | {
|
---|
| 100 | text_t dynamic_classifier_id = (*dynamic_classifier_iterator).first;
|
---|
| 101 | navigation_bar_entries += "_navbarspacer_";
|
---|
| 102 | navigation_bar_entries += "_navtab_(_gwcgi_?c=" + args["c"] + "&a=dc&dcl=" + dynamic_classifier_id + "," + dynamic_classifier_id;
|
---|
| 103 | if (args["a"] == "dc" && args["dcl"] == dynamic_classifier_id)
|
---|
| 104 | {
|
---|
| 105 | navigation_bar_entries += ",selected";
|
---|
| 106 | }
|
---|
| 107 | navigation_bar_entries += ")";
|
---|
| 108 | dynamic_classifier_iterator++;
|
---|
| 109 | }
|
---|
| 110 |
|
---|
| 111 | disp.setmacro("dynamicclassifiernavbarentries", displayclass::defaultpackage, navigation_bar_entries);
|
---|
[15744] | 112 | }
|
---|
| 113 |
|
---|
| 114 |
|
---|
| 115 | void dynamicclassifieraction::define_internal_macros (displayclass &disp, cgiargsclass &args,
|
---|
| 116 | recptprotolistclass *protos, ostream &logout)
|
---|
| 117 | {
|
---|
[19297] | 118 | // No internal macros set
|
---|
[15744] | 119 | }
|
---|
| 120 |
|
---|
| 121 |
|
---|
| 122 | bool dynamicclassifieraction::do_action(cgiargsclass &args, recptprotolistclass *protos,
|
---|
| 123 | browsermapclass *browsers, displayclass &disp,
|
---|
| 124 | outconvertclass &outconvert, ostream &textout,
|
---|
| 125 | ostream &logout)
|
---|
| 126 | {
|
---|
[15795] | 127 | // A valid collection server is vital
|
---|
[15744] | 128 | recptproto *collectproto = protos->getrecptproto (args["c"], logout);
|
---|
| 129 | if (collectproto == NULL)
|
---|
| 130 | {
|
---|
| 131 | logout << "dynamicclassifieraction::do_action called with NULL collectproto\n";
|
---|
| 132 | return false;
|
---|
| 133 | }
|
---|
| 134 |
|
---|
[15988] | 135 | textout << outconvert << disp << "_dynamicclassifier:header_\n";
|
---|
| 136 | textout << outconvert << disp << "_dynamicclassifier:content_\n";
|
---|
[15768] | 137 |
|
---|
[15795] | 138 | // Check a dynamic classifier ID has been specified
|
---|
| 139 | text_t arg_dcl = args["dcl"];
|
---|
| 140 | if (arg_dcl.empty())
|
---|
[15772] | 141 | {
|
---|
[15834] | 142 | textout << outconvert << disp << "Error: Missing dcl argument.\n";
|
---|
[15988] | 143 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
[15795] | 144 | return true;
|
---|
[15772] | 145 | }
|
---|
| 146 |
|
---|
[15795] | 147 | // Check the dynamic classifier ID is valid (ie. there is an entry in the collect.cfg file for it)
|
---|
[15772] | 148 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
[15795] | 149 | if (cinfo->dynamic_classifiers.find(arg_dcl) == cinfo->dynamic_classifiers.end())
|
---|
[15772] | 150 | {
|
---|
[15834] | 151 | textout << outconvert << disp << "Error: Invalid dcl value \"" << arg_dcl << "\".\n";
|
---|
[15988] | 152 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
[15795] | 153 | return true;
|
---|
[15772] | 154 | }
|
---|
| 155 |
|
---|
[15999] | 156 | // Parse the classifier options from the specification
|
---|
| 157 | text_t classifier_specification = cinfo->dynamic_classifiers[arg_dcl];
|
---|
[16032] | 158 | text_tmap classifier_options = parse_classifier_options (classifier_specification, args);
|
---|
[15999] | 159 |
|
---|
[15993] | 160 | // Output the "<ID>Header" format statement if there is one
|
---|
| 161 | text_t classifier_header_format_statement = "";
|
---|
[15999] | 162 | get_formatstring (arg_dcl + "Header", cinfo->format, classifier_header_format_statement);
|
---|
[15993] | 163 | textout << outconvert << disp << classifier_header_format_statement << "\n";
|
---|
| 164 |
|
---|
[16065] | 165 | // Resolve any ".pr" bits at the end of the "dcn" argument
|
---|
[16219] | 166 | text_t classifier_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
[16065] | 167 | if (ends_with (args["dcn"], ".pr"))
|
---|
| 168 | {
|
---|
| 169 | // Change the "dcn" argument to be the OID of the parent of the specified classifier node
|
---|
[16219] | 170 | text_tlist args_dcn_parts;
|
---|
| 171 | splitword (args["dcn"].begin(), args["dcn"].end(), classifier_node_separator, args_dcn_parts);
|
---|
| 172 | args_dcn_parts.pop_back(); // Remove the last part
|
---|
| 173 | joinchar (args_dcn_parts, classifier_node_separator, args["dcn"]);
|
---|
[16065] | 174 | }
|
---|
| 175 |
|
---|
[16114] | 176 | // Prepare to output the dynamic classifier
|
---|
[16118] | 177 | text_t current_classifier_node_OID = "";
|
---|
| 178 | text_t current_metadata_value_filter = "";
|
---|
[16114] | 179 | int classifier_node_indent = 0;
|
---|
| 180 |
|
---|
[16119] | 181 | // (Optional) Output an hlist to group the classifier nodes into buckets based on the first character
|
---|
[16120] | 182 | if (classifier_options["-group_by_first_character"] == "1")
|
---|
[15795] | 183 | {
|
---|
[16131] | 184 | text_t selected_grouping_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "substr(value,1,1)", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
[16115] | 185 |
|
---|
[16215] | 186 | current_classifier_node_OID = selected_grouping_node_OID + ".";
|
---|
[16118] | 187 | current_metadata_value_filter = selected_grouping_node_OID + "*";
|
---|
[16046] | 188 | }
|
---|
[16057] | 189 |
|
---|
[16119] | 190 | // (Optional) Output an hlist instead of a vlist at the top level
|
---|
[16114] | 191 | if (classifier_options["-use_hlist_at_top"] == "1")
|
---|
| 192 | {
|
---|
[16131] | 193 | text_t selected_hlist_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
[16118] | 194 |
|
---|
[16216] | 195 | current_classifier_node_OID = selected_hlist_node_OID + classifier_node_separator;
|
---|
[16114] | 196 | }
|
---|
| 197 |
|
---|
[16118] | 198 | // args["dcn"] may have been modified by output_hlist_classifier_nodes() above
|
---|
| 199 | text_t classifier_node_OID = args["dcn"];
|
---|
| 200 |
|
---|
[16215] | 201 | // If grouping is enabled remove the grouping node bit from the start of the OID
|
---|
| 202 | text_t classifier_node_OID_sans_grouping = classifier_node_OID;
|
---|
[16126] | 203 | if (classifier_options["-group_by_first_character"] == "1")
|
---|
| 204 | {
|
---|
[16215] | 205 | text_t::iterator grouping_node_separator = findchar (classifier_node_OID.begin(), classifier_node_OID.end(), '.');
|
---|
| 206 | if (grouping_node_separator != classifier_node_OID.end())
|
---|
| 207 | {
|
---|
| 208 | classifier_node_OID_sans_grouping = substr (grouping_node_separator + 1, classifier_node_OID.end());
|
---|
| 209 | }
|
---|
| 210 | else
|
---|
| 211 | {
|
---|
| 212 | classifier_node_OID_sans_grouping = "";
|
---|
| 213 | }
|
---|
| 214 | }
|
---|
[16128] | 215 |
|
---|
[16215] | 216 | // Split the classifier node OID into its hierarchical parts, then remove any we've already dealt with (HLists)
|
---|
| 217 | text_tlist classifier_node_OID_parts_remaining;
|
---|
[16216] | 218 | splitword (classifier_node_OID_sans_grouping.begin(), classifier_node_OID_sans_grouping.end(), classifier_node_separator, classifier_node_OID_parts_remaining);
|
---|
[16126] | 219 | if (classifier_options["-use_hlist_at_top"] == "1")
|
---|
| 220 | {
|
---|
| 221 | classifier_node_OID_parts_remaining.pop_front();
|
---|
| 222 | }
|
---|
| 223 |
|
---|
[16215] | 224 | text_t classifier_node_metadata_value = classifier_node_OID_sans_grouping;
|
---|
[16216] | 225 | text_t classifier_node_metadata_value_filter = classifier_node_metadata_value + classifier_node_separator + "*";
|
---|
[16215] | 226 |
|
---|
[16206] | 227 | // If "-split_greenstone_dates" is on the metadata value and filter doesn't quite match the OID -- fix this
|
---|
| 228 | if (classifier_options["-split_greenstone_dates"] == "1")
|
---|
| 229 | {
|
---|
[16220] | 230 | classifier_node_metadata_value.replace (classifier_node_separator, "");
|
---|
[16206] | 231 | classifier_node_metadata_value_filter = classifier_node_metadata_value;
|
---|
[16216] | 232 | if (classifier_node_metadata_value_filter.size() == 8)
|
---|
| 233 | {
|
---|
| 234 | classifier_node_metadata_value_filter += classifier_node_separator;
|
---|
| 235 | }
|
---|
| 236 | classifier_node_metadata_value_filter += "*";
|
---|
[16206] | 237 | }
|
---|
| 238 |
|
---|
[16057] | 239 | // Simple case at the top level: just output the child classifier nodes
|
---|
[16131] | 240 | if (classifier_node_metadata_value == "")
|
---|
[16046] | 241 | {
|
---|
[16212] | 242 | output_child_classifier_nodes (current_classifier_node_OID, "", current_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
[15795] | 243 | }
|
---|
[16057] | 244 |
|
---|
| 245 | // More complex case below the top level
|
---|
[15949] | 246 | else
|
---|
| 247 | {
|
---|
[16061] | 248 | // Output the parent classifier nodes and the current classifier node
|
---|
[16126] | 249 | output_upper_classifier_nodes (current_classifier_node_OID, classifier_node_OID_parts_remaining, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
[16061] | 250 |
|
---|
[16057] | 251 | // Output the child classifier nodes
|
---|
[16216] | 252 | current_classifier_node_OID = classifier_node_OID + classifier_node_separator;
|
---|
| 253 | output_child_classifier_nodes (current_classifier_node_OID, classifier_node_metadata_value, classifier_node_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
[16057] | 254 |
|
---|
| 255 | // Get the document nodes at this level
|
---|
[16181] | 256 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
[16057] | 257 | text_t sort_documents_by = classifier_options["-sort_documents_by"];
|
---|
| 258 | FilterResponse_t documents_response;
|
---|
[16181] | 259 | get_documents_with_metadata_value (metadata_elements, classifier_node_metadata_value, sort_documents_by, args["c"], collectproto, documents_response, logout);
|
---|
[16057] | 260 |
|
---|
| 261 | // Display the document nodes
|
---|
| 262 | display_document_nodes (documents_response, classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
[15949] | 263 | }
|
---|
[15795] | 264 |
|
---|
[15993] | 265 | // Output the "<ID>Footer" format statement if there is one
|
---|
| 266 | text_t classifier_footer_format_statement = "";
|
---|
[15999] | 267 | get_formatstring (arg_dcl + "Footer", cinfo->format, classifier_footer_format_statement);
|
---|
[15993] | 268 | textout << outconvert << disp << classifier_footer_format_statement << "\n";
|
---|
| 269 |
|
---|
[15988] | 270 | textout << outconvert << disp << "_dynamicclassifier:footer_\n";
|
---|
[15949] | 271 | return true;
|
---|
| 272 | }
|
---|
| 273 |
|
---|
| 274 |
|
---|
[16032] | 275 | text_tmap dynamicclassifieraction::parse_classifier_options (text_t classifier_specification, cgiargsclass &args)
|
---|
| 276 | {
|
---|
| 277 | text_tmap classifier_options;
|
---|
| 278 |
|
---|
[16216] | 279 | // Default values
|
---|
| 280 | classifier_options["-split_using_hierarchy_separator"] = "|";
|
---|
| 281 |
|
---|
[16033] | 282 | // Split the classifier specification string by spaces
|
---|
| 283 | text_tlist classifier_specification_parts;
|
---|
| 284 | splitchar (classifier_specification.begin(), classifier_specification.end(), ' ', classifier_specification_parts);
|
---|
| 285 |
|
---|
[16181] | 286 | // The metadata element(s) to classify by should be the first value
|
---|
| 287 | classifier_options["metadata_elements"] = classifier_specification_parts.front();
|
---|
[16033] | 288 | classifier_specification_parts.pop_front();
|
---|
| 289 |
|
---|
| 290 | // Parse options from the remainder of the classifier specification
|
---|
| 291 | while (!classifier_specification_parts.empty())
|
---|
| 292 | {
|
---|
| 293 | // Parse the option name
|
---|
| 294 | text_t classifier_option_name = classifier_specification_parts.front();
|
---|
| 295 | classifier_specification_parts.pop_front();
|
---|
| 296 |
|
---|
| 297 | // Check if the option has a value (it may just be a flag, in which case we use "1" as the value)
|
---|
| 298 | text_t classifier_option_value = "1";
|
---|
| 299 | if (!classifier_specification_parts.empty() && !starts_with(classifier_specification_parts.front(), "-"))
|
---|
| 300 | {
|
---|
| 301 | classifier_option_value = classifier_specification_parts.front();
|
---|
| 302 | classifier_specification_parts.pop_front();
|
---|
| 303 | }
|
---|
| 304 |
|
---|
| 305 | // Record the option
|
---|
| 306 | classifier_options[classifier_option_name] = classifier_option_value;
|
---|
| 307 | }
|
---|
| 308 |
|
---|
[16032] | 309 | return classifier_options;
|
---|
| 310 | }
|
---|
| 311 |
|
---|
| 312 |
|
---|
[16114] | 313 | text_t dynamicclassifieraction::output_hlist_classifier_nodes (text_t parent_classifier_node_OID,
|
---|
| 314 | text_t metadata_value_filter,
|
---|
[16115] | 315 | text_t metadata_value_grouping_expression,
|
---|
[16114] | 316 | text_tmap classifier_options, cgiargsclass &args,
|
---|
| 317 | recptproto *collectproto, browsermapclass *browsers,
|
---|
| 318 | displayclass &disp, outconvertclass &outconvert,
|
---|
| 319 | ostream &textout, ostream &logout)
|
---|
| 320 | {
|
---|
[16181] | 321 | // Get all the metadata values for the specified element(s) that match the filter
|
---|
| 322 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
[16114] | 323 | FilterResponse_t metadata_values_response;
|
---|
[16181] | 324 | bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, metadata_value_grouping_expression, args["c"], collectproto, metadata_values_response, logout);
|
---|
[16114] | 325 |
|
---|
| 326 | // If the request failed then it's probably because the collection isn't using an SQL infodbtype
|
---|
| 327 | if (request_success == false)
|
---|
| 328 | {
|
---|
| 329 | textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
|
---|
| 330 | return "";
|
---|
| 331 | }
|
---|
| 332 |
|
---|
| 333 | // Check some metadata values were returned
|
---|
| 334 | if (metadata_values_response.docInfo.empty())
|
---|
| 335 | {
|
---|
| 336 | return "";
|
---|
| 337 | }
|
---|
| 338 |
|
---|
| 339 | // After processing any hierarchical metadata values we're left with the hlist classifer nodes
|
---|
| 340 | map<text_t, int, lttext_t> hlist_classifier_nodes;
|
---|
| 341 | ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
|
---|
| 342 | while (metadata_value_iterator != metadata_values_response.docInfo.end())
|
---|
| 343 | {
|
---|
[16206] | 344 | text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options);
|
---|
[16114] | 345 |
|
---|
[16206] | 346 | // Assume for now we're always at the top
|
---|
| 347 | text_t hlist_metadata_value = hierarchical_metadata_value;
|
---|
[16114] | 348 |
|
---|
[16206] | 349 | // Determine the label for the hlist classifier node from the metadata value
|
---|
[16216] | 350 | text_tlist hlist_metadata_value_parts;
|
---|
| 351 | text_t hlist_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
| 352 | splitword (hlist_metadata_value.begin(), hlist_metadata_value.end(), hlist_node_separator, hlist_metadata_value_parts);
|
---|
[16206] | 353 | text_t hlist_classifier_node_label = hlist_metadata_value_parts.front();
|
---|
| 354 |
|
---|
| 355 | // Create a node for this value if we haven't seen it before
|
---|
| 356 | if (hlist_classifier_nodes.find(hlist_classifier_node_label) == hlist_classifier_nodes.end())
|
---|
[16114] | 357 | {
|
---|
[16206] | 358 | hlist_classifier_nodes[hlist_classifier_node_label] = 0;
|
---|
[16114] | 359 | }
|
---|
| 360 |
|
---|
| 361 | // Increment the occurrence count
|
---|
[16206] | 362 | hlist_classifier_nodes[hlist_classifier_node_label] += (*metadata_value_iterator).result_num;
|
---|
[16114] | 363 |
|
---|
| 364 | metadata_value_iterator++;
|
---|
| 365 | }
|
---|
| 366 |
|
---|
| 367 | // Add the necessary metadata to the hlist classifier nodes
|
---|
| 368 | text_t selected_hlist_node_OID = "";
|
---|
| 369 | FilterResponse_t hlist_classifier_nodes_response;
|
---|
| 370 | map<text_t, int, lttext_t>::iterator hlist_classifier_nodes_iterator = hlist_classifier_nodes.begin();
|
---|
| 371 | while (hlist_classifier_nodes_iterator != hlist_classifier_nodes.end())
|
---|
| 372 | {
|
---|
[16214] | 373 | text_t hlist_classifier_node_OID = parent_classifier_node_OID + (*hlist_classifier_nodes_iterator).first;
|
---|
[16114] | 374 |
|
---|
| 375 | // Is this the hlist node that is currently selected?
|
---|
| 376 | if (starts_with (args["dcn"], hlist_classifier_node_OID))
|
---|
| 377 | {
|
---|
| 378 | selected_hlist_node_OID = hlist_classifier_node_OID;
|
---|
| 379 | }
|
---|
| 380 |
|
---|
| 381 | // Add the necessary metadata required to display the hlist nodes correctly
|
---|
| 382 | ResultDocInfo_t hlist_classifier_node;
|
---|
| 383 | hlist_classifier_node.OID = hlist_classifier_node_OID;
|
---|
| 384 | hlist_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
| 385 | hlist_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
| 386 | hlist_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests
|
---|
[16123] | 387 | hlist_classifier_node.metadata["Title"].values.push_back ((*hlist_classifier_nodes_iterator).first);
|
---|
[16114] | 388 | hlist_classifier_nodes_response.docInfo.push_back (hlist_classifier_node);
|
---|
| 389 |
|
---|
| 390 | hlist_classifier_nodes_iterator++;
|
---|
| 391 | }
|
---|
| 392 |
|
---|
[16123] | 393 | // Automatically select the first hlist node if necessary
|
---|
| 394 | if (selected_hlist_node_OID == "")
|
---|
| 395 | {
|
---|
| 396 | selected_hlist_node_OID = (*hlist_classifier_nodes_response.docInfo.begin()).OID;
|
---|
| 397 |
|
---|
[16179] | 398 | // Don't really like messing with this here, but it needs to be done before display_classifier_nodes() below
|
---|
[16214] | 399 | if (starts_with (parent_classifier_node_OID, args["dcn"]))
|
---|
[16123] | 400 | {
|
---|
| 401 | args["dcn"] = selected_hlist_node_OID;
|
---|
| 402 | }
|
---|
| 403 | }
|
---|
| 404 |
|
---|
[16114] | 405 | // Display the hlist nodes
|
---|
| 406 | display_classifier_nodes (hlist_classifier_nodes_response, "HList", 0, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
| 407 |
|
---|
| 408 | return selected_hlist_node_OID;
|
---|
| 409 | }
|
---|
| 410 |
|
---|
| 411 |
|
---|
[16061] | 412 | void dynamicclassifieraction::output_upper_classifier_nodes (text_t root_classifier_node_OID,
|
---|
[16095] | 413 | text_tlist upper_classifier_node_labels,
|
---|
[16061] | 414 | int& classifier_node_indent,
|
---|
| 415 | text_tmap classifier_options, cgiargsclass &args,
|
---|
| 416 | recptproto *collectproto, browsermapclass *browsers,
|
---|
| 417 | displayclass &disp, outconvertclass &outconvert,
|
---|
| 418 | ostream &textout, ostream &logout)
|
---|
[15949] | 419 | {
|
---|
[16095] | 420 | // Display the upper classifier nodes
|
---|
| 421 | text_t upper_classifier_node_OID = root_classifier_node_OID;
|
---|
| 422 | text_tlist::iterator upper_classifier_node_labels_iterator = upper_classifier_node_labels.begin();
|
---|
| 423 | while (upper_classifier_node_labels_iterator != upper_classifier_node_labels.end())
|
---|
[15768] | 424 | {
|
---|
[16095] | 425 | upper_classifier_node_OID += *upper_classifier_node_labels_iterator;
|
---|
[16096] | 426 |
|
---|
| 427 | ResultDocInfo_t upper_classifier_node;
|
---|
| 428 | upper_classifier_node.OID = upper_classifier_node_OID;
|
---|
| 429 | upper_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
| 430 | upper_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
| 431 | upper_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests
|
---|
| 432 | upper_classifier_node.metadata["Title"].values.push_back (*upper_classifier_node_labels_iterator);
|
---|
| 433 |
|
---|
| 434 | FilterResponse_t upper_classifier_node_response;
|
---|
| 435 | upper_classifier_node_response.docInfo.push_back(upper_classifier_node);
|
---|
[16107] | 436 | display_classifier_nodes (upper_classifier_node_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
[16055] | 437 | classifier_node_indent++;
|
---|
[15807] | 438 |
|
---|
[16216] | 439 | upper_classifier_node_OID += classifier_options["-split_using_hierarchy_separator"];
|
---|
[16095] | 440 | upper_classifier_node_labels_iterator++;
|
---|
[15949] | 441 | }
|
---|
[16057] | 442 | }
|
---|
[15949] | 443 |
|
---|
[15953] | 444 |
|
---|
[16057] | 445 | void dynamicclassifieraction::output_child_classifier_nodes (text_t classifier_node_OID,
|
---|
[16061] | 446 | text_t classifier_node_metadata_value,
|
---|
[16057] | 447 | text_t metadata_value_filter,
|
---|
| 448 | int& classifier_node_indent,
|
---|
| 449 | text_tmap classifier_options, cgiargsclass &args,
|
---|
| 450 | recptproto *collectproto, browsermapclass *browsers,
|
---|
| 451 | displayclass &disp, outconvertclass &outconvert,
|
---|
| 452 | ostream &textout, ostream &logout)
|
---|
| 453 | {
|
---|
[16181] | 454 | // Get all the metadata values for the specified element(s) that match the filter
|
---|
| 455 | text_t metadata_elements = classifier_options["metadata_elements"];
|
---|
[16057] | 456 | FilterResponse_t metadata_values_response;
|
---|
[16181] | 457 | bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, "", args["c"], collectproto, metadata_values_response, logout);
|
---|
[16057] | 458 |
|
---|
| 459 | // If the request failed then it's probably because the collection isn't using an SQL infodbtype
|
---|
| 460 | if (request_success == false)
|
---|
| 461 | {
|
---|
| 462 | textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n";
|
---|
| 463 | return;
|
---|
| 464 | }
|
---|
| 465 |
|
---|
[16058] | 466 | // After processing any hierarchical metadata values we're left with the child classifer nodes
|
---|
| 467 | map<text_t, int, lttext_t> child_classifier_nodes;
|
---|
| 468 | ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
|
---|
| 469 | while (metadata_value_iterator != metadata_values_response.docInfo.end())
|
---|
[15949] | 470 | {
|
---|
[16206] | 471 | text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options);
|
---|
| 472 | text_t classifier_node_hierarchical_metadata_value = split_metadata_value (classifier_node_metadata_value, classifier_options);
|
---|
[16004] | 473 |
|
---|
[16196] | 474 | // We need to remove the current position from the metadata value to leave the child metadata value
|
---|
[16206] | 475 | text_t child_metadata_value = hierarchical_metadata_value;
|
---|
[16216] | 476 | text_t child_node_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
| 477 | if (starts_with(hierarchical_metadata_value, classifier_node_hierarchical_metadata_value + child_node_separator))
|
---|
[16004] | 478 | {
|
---|
[16216] | 479 | child_metadata_value = substr(hierarchical_metadata_value.begin() + (classifier_node_hierarchical_metadata_value + child_node_separator).size(), hierarchical_metadata_value.end());
|
---|
[16004] | 480 | }
|
---|
| 481 |
|
---|
[16196] | 482 | // Determine the label for the child classifier node from the metadata value
|
---|
[16216] | 483 | text_tlist child_metadata_value_parts;
|
---|
| 484 | splitword (child_metadata_value.begin(), child_metadata_value.end(), child_node_separator, child_metadata_value_parts);
|
---|
[16196] | 485 | text_t child_classifier_node_label = child_metadata_value_parts.front();
|
---|
[16004] | 486 |
|
---|
[16196] | 487 | // Create a node for this value if we haven't seen it before
|
---|
| 488 | if (child_classifier_nodes.find(child_classifier_node_label) == child_classifier_nodes.end())
|
---|
[16004] | 489 | {
|
---|
[16196] | 490 | child_classifier_nodes[child_classifier_node_label] = 0;
|
---|
[16004] | 491 | }
|
---|
| 492 |
|
---|
| 493 | // Increment the occurrence count
|
---|
[16196] | 494 | child_classifier_nodes[child_classifier_node_label] += (*metadata_value_iterator).result_num;
|
---|
[16004] | 495 |
|
---|
| 496 | metadata_value_iterator++;
|
---|
| 497 | }
|
---|
| 498 |
|
---|
[16112] | 499 | // Add the necessary metadata to the child classifier nodes
|
---|
[16097] | 500 | FilterResponse_t child_classifier_nodes_response;
|
---|
[16058] | 501 | map<text_t, int, lttext_t>::iterator child_classifier_nodes_iterator = child_classifier_nodes.begin();
|
---|
| 502 | while (child_classifier_nodes_iterator != child_classifier_nodes.end())
|
---|
| 503 | {
|
---|
[16214] | 504 | text_t child_classifier_node_OID = classifier_node_OID + (*child_classifier_nodes_iterator).first;
|
---|
[16004] | 505 |
|
---|
[16097] | 506 | ResultDocInfo_t child_classifier_node;
|
---|
| 507 | child_classifier_node.OID = child_classifier_node_OID;
|
---|
| 508 | child_classifier_node.metadata["doctype"].values.push_back ("classify");
|
---|
| 509 | child_classifier_node.metadata["haschildren"].values.push_back ("1");
|
---|
| 510 | child_classifier_node.metadata["numleafdocs"].values.push_back ((*child_classifier_nodes_iterator).second);
|
---|
| 511 | child_classifier_node.metadata["Title"].values.push_back ((*child_classifier_nodes_iterator).first);
|
---|
| 512 | child_classifier_nodes_response.docInfo.push_back (child_classifier_node);
|
---|
[16004] | 513 |
|
---|
[16097] | 514 | child_classifier_nodes_iterator++;
|
---|
[15949] | 515 | }
|
---|
| 516 |
|
---|
[16112] | 517 | // Display the child classifier nodes
|
---|
[16107] | 518 | display_classifier_nodes (child_classifier_nodes_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
|
---|
[15949] | 519 | }
|
---|
| 520 |
|
---|
| 521 |
|
---|
[16206] | 522 | text_t dynamicclassifieraction::split_metadata_value (text_t metadata_value, text_tmap classifier_options)
|
---|
| 523 | {
|
---|
| 524 | text_t hierarchical_metadata_value = metadata_value;
|
---|
[16220] | 525 | text_t hierarchy_separator = classifier_options["-split_using_hierarchy_separator"];
|
---|
[16206] | 526 |
|
---|
[16220] | 527 | // Add hierarchy separators into Greenstone date values (e.g. YYYYMMDD -> YYYY|MM|DD)
|
---|
[16206] | 528 | if (classifier_options["-split_greenstone_dates"] == "1")
|
---|
| 529 | {
|
---|
| 530 | if (metadata_value.size() == 4 || metadata_value.size() == 6 || metadata_value.size() == 8)
|
---|
| 531 | {
|
---|
| 532 | // Add year
|
---|
[16220] | 533 | hierarchical_metadata_value = substr (metadata_value.begin(), metadata_value.begin() + 4);
|
---|
[16206] | 534 | if (metadata_value.size() == 6 || metadata_value.size() == 8)
|
---|
| 535 | {
|
---|
| 536 | // Add month
|
---|
[16220] | 537 | hierarchical_metadata_value += hierarchy_separator;
|
---|
| 538 | hierarchical_metadata_value += substr (metadata_value.begin() + 4, metadata_value.begin() + 6);
|
---|
[16206] | 539 | if (metadata_value.size() == 8)
|
---|
| 540 | {
|
---|
| 541 | // Add day
|
---|
[16220] | 542 | hierarchical_metadata_value += hierarchy_separator;
|
---|
| 543 | hierarchical_metadata_value += substr (metadata_value.begin() + 6, metadata_value.begin() + 8);
|
---|
[16206] | 544 | }
|
---|
| 545 | }
|
---|
| 546 | }
|
---|
| 547 | }
|
---|
| 548 |
|
---|
| 549 | return hierarchical_metadata_value;
|
---|
| 550 | }
|
---|
| 551 |
|
---|
| 552 |
|
---|
[16096] | 553 | void dynamicclassifieraction::display_classifier_nodes (FilterResponse_t classifier_nodes_response,
|
---|
[16107] | 554 | text_t classifier_nodes_type,
|
---|
| 555 | int classifier_nodes_indent,
|
---|
[16096] | 556 | cgiargsclass &args, recptproto *collectproto,
|
---|
| 557 | browsermapclass *browsers, displayclass &disp,
|
---|
| 558 | outconvertclass &outconvert, ostream &textout,
|
---|
| 559 | ostream &logout)
|
---|
| 560 | {
|
---|
[16097] | 561 | // Check there are some classifier nodes to display
|
---|
| 562 | if (classifier_nodes_response.docInfo.empty()) return;
|
---|
| 563 |
|
---|
[16096] | 564 | // Get the format statement for this classifier if there is one, or use the browser's default otherwise
|
---|
| 565 | text_t formatstring;
|
---|
[16107] | 566 | browserclass *bptr = browsers->getbrowser (classifier_nodes_type);
|
---|
[16096] | 567 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
[16107] | 568 | if (!get_formatstring (args["dcl"], classifier_nodes_type, cinfo->format, formatstring))
|
---|
[16096] | 569 | {
|
---|
| 570 | formatstring = bptr->get_default_formatstring();
|
---|
| 571 | }
|
---|
| 572 | format_t *formatlistptr = new format_t();
|
---|
| 573 | text_tset metadata;
|
---|
| 574 | bool getParents = false;
|
---|
| 575 | parse_formatstring (formatstring, formatlistptr, metadata, getParents);
|
---|
| 576 | bool use_table = is_table_content (formatlistptr);
|
---|
| 577 |
|
---|
| 578 | // Display the classifier nodes
|
---|
[16107] | 579 | bptr->output_section_group (classifier_nodes_response, args, args["c"], classifier_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
|
---|
[16096] | 580 | }
|
---|
| 581 |
|
---|
| 582 |
|
---|
[16107] | 583 | void dynamicclassifieraction::display_document_nodes (FilterResponse_t documents_response,
|
---|
| 584 | int document_nodes_indent,
|
---|
[16056] | 585 | cgiargsclass &args, recptproto *collectproto,
|
---|
| 586 | browsermapclass *browsers, displayclass &disp,
|
---|
| 587 | outconvertclass &outconvert, ostream &textout,
|
---|
| 588 | ostream &logout)
|
---|
[15949] | 589 | {
|
---|
[16097] | 590 | // Check there are some documents to display
|
---|
| 591 | if (documents_response.docInfo.empty()) return;
|
---|
| 592 |
|
---|
[16108] | 593 | // Get the format statement for the document nodes if there is one, or use the browser's default otherwise
|
---|
[15949] | 594 | text_t formatstring;
|
---|
[16108] | 595 | browserclass *bptr = browsers->getbrowser ("VList");
|
---|
[15949] | 596 | ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
|
---|
[16108] | 597 | if (!get_formatstring (args["dcl"], "DocumentNodes", cinfo->format, formatstring))
|
---|
[15949] | 598 | {
|
---|
[16209] | 599 | if (!get_formatstring (args["dcl"], "VList", cinfo->format, formatstring))
|
---|
| 600 | {
|
---|
| 601 | formatstring = bptr->get_default_formatstring();
|
---|
| 602 | }
|
---|
[15949] | 603 | }
|
---|
| 604 | format_t *formatlistptr = new format_t();
|
---|
| 605 | text_tset metadata;
|
---|
| 606 | bool getParents = false;
|
---|
| 607 | parse_formatstring (formatstring, formatlistptr, metadata, getParents);
|
---|
| 608 | bool use_table = is_table_content (formatlistptr);
|
---|
| 609 |
|
---|
[16007] | 610 | // Request the necessary metadata for displaying the documents
|
---|
| 611 | text_tarray document_OIDs;
|
---|
| 612 | ResultDocInfo_tarray::iterator document_iterator = documents_response.docInfo.begin();
|
---|
| 613 | while (document_iterator != documents_response.docInfo.end())
|
---|
| 614 | {
|
---|
| 615 | document_OIDs.push_back ((*document_iterator).OID);
|
---|
| 616 | document_iterator++;
|
---|
| 617 | }
|
---|
| 618 | FilterResponse_t document_nodes_response;
|
---|
| 619 | get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_nodes_response, logout);
|
---|
[15949] | 620 |
|
---|
| 621 | // Display the document nodes
|
---|
[16007] | 622 | bptr->output_section_group (document_nodes_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
|
---|
[15949] | 623 | }
|
---|