/********************************************************************** * * dynamicclassifieraction.cpp -- * Copyright (C) 2008 DL Consulting Ltd * * By Michael Dewsnip * Please do not edit this file without checking with Michael first! * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "dynamicclassifieraction.h" #include "recptprototools.h" dynamicclassifieraction::dynamicclassifieraction () { recpt = NULL; cgiarginfo arg_ainfo; arg_ainfo.shortname = "dcl"; arg_ainfo.longname = "dynamic classifier ID"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "dcn"; arg_ainfo.longname = "dynamic classifier node"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); } dynamicclassifieraction::~dynamicclassifieraction() { } bool dynamicclassifieraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { return true; } void dynamicclassifieraction::get_cgihead_info (cgiargsclass &args, recptprotolistclass *protos, response_t &response,text_t &response_data, ostream &logout) { response = content; response_data = "text/html"; } void dynamicclassifieraction::define_external_macros (displayclass &disp, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { // Some pages (e.g. the library home page) are not collection-specific if (args["c"].empty()) { return; } // A valid collection server is vital recptproto *collectproto = protos->getrecptproto (args["c"], logout); if (collectproto == NULL) { logout << "dynamicclassifieraction::define_external_macros called with NULL collectproto\n"; return; } // Define _dynamicclassifiernavbarentries_ to add buttons to the navigation bar for the dynamic classifiers text_t navigation_bar_entries = ""; ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout); text_tmap::iterator dynamic_classifier_iterator = cinfo->dynamic_classifiers.begin(); while (dynamic_classifier_iterator != cinfo->dynamic_classifiers.end()) { text_t dynamic_classifier_id = (*dynamic_classifier_iterator).first; navigation_bar_entries += "_navbarspacer_"; navigation_bar_entries += "_navtab_(_gwcgi_?c=" + args["c"] + "&a=dc&dcl=" + dynamic_classifier_id + "," + dynamic_classifier_id; if (args["a"] == "dc" && args["dcl"] == dynamic_classifier_id) { navigation_bar_entries += ",selected"; } navigation_bar_entries += ")"; dynamic_classifier_iterator++; } disp.setmacro("dynamicclassifiernavbarentries", displayclass::defaultpackage, navigation_bar_entries); } void dynamicclassifieraction::define_internal_macros (displayclass &disp, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { // No internal macros set } bool dynamicclassifieraction::do_action(cgiargsclass &args, recptprotolistclass *protos, browsermapclass *browsers, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { // A valid collection server is vital recptproto *collectproto = protos->getrecptproto (args["c"], logout); if (collectproto == NULL) { logout << "dynamicclassifieraction::do_action called with NULL collectproto\n"; return false; } textout << outconvert << disp << "_dynamicclassifier:header_\n"; textout << outconvert << disp << "_dynamicclassifier:content_\n"; // Check a dynamic classifier ID has been specified text_t arg_dcl = args["dcl"]; if (arg_dcl.empty()) { textout << outconvert << disp << "Error: Missing dcl argument.\n"; textout << outconvert << disp << "_dynamicclassifier:footer_\n"; return true; } // Check the dynamic classifier ID is valid (ie. there is an entry in the collect.cfg file for it) ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout); if (cinfo->dynamic_classifiers.find(arg_dcl) == cinfo->dynamic_classifiers.end()) { textout << outconvert << disp << "Error: Invalid dcl value \"" << encodeForHTML(arg_dcl) << "\".\n"; textout << outconvert << disp << "_dynamicclassifier:footer_\n"; return true; } // Parse the classifier options from the specification text_t classifier_specification = cinfo->dynamic_classifiers[arg_dcl]; text_tmap classifier_options = parse_classifier_options (classifier_specification, args); // Output the "Header" format statement if there is one text_t classifier_header_format_statement = ""; get_formatstring (arg_dcl + "Header", cinfo->format, classifier_header_format_statement); textout << outconvert << disp << classifier_header_format_statement << "\n"; // Resolve any ".pr" bits at the end of the "dcn" argument text_t classifier_node_separator = classifier_options["-split_using_hierarchy_separator"]; if (ends_with (args["dcn"], ".pr")) { // Change the "dcn" argument to be the OID of the parent of the specified classifier node text_tlist args_dcn_parts; splitword (args["dcn"].begin(), args["dcn"].end(), classifier_node_separator, args_dcn_parts); args_dcn_parts.pop_back(); // Remove the last part joinchar (args_dcn_parts, classifier_node_separator, args["dcn"]); } // Prepare to output the dynamic classifier text_t current_classifier_node_OID = ""; text_t current_metadata_value_filter = ""; int classifier_node_indent = 0; // (Optional) Output an hlist to group the classifier nodes into buckets based on the first character if (classifier_options["-group_by_first_character"] == "1") { text_t selected_grouping_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "substr(value,1,1)", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout); current_classifier_node_OID = selected_grouping_node_OID + "."; current_metadata_value_filter = selected_grouping_node_OID + "*"; } // (Optional) Output an hlist instead of a vlist at the top level if (classifier_options["-use_hlist_at_top"] == "1") { text_t selected_hlist_node_OID = output_hlist_classifier_nodes (current_classifier_node_OID, current_metadata_value_filter, "", classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout); current_classifier_node_OID = selected_hlist_node_OID + classifier_node_separator; } // args["dcn"] may have been modified by output_hlist_classifier_nodes() above text_t classifier_node_OID = args["dcn"]; // If grouping is enabled remove the grouping node bit from the start of the OID text_t classifier_node_OID_sans_grouping = classifier_node_OID; if (classifier_options["-group_by_first_character"] == "1") { text_t::iterator grouping_node_separator = findchar (classifier_node_OID.begin(), classifier_node_OID.end(), '.'); if (grouping_node_separator != classifier_node_OID.end()) { classifier_node_OID_sans_grouping = substr (grouping_node_separator + 1, classifier_node_OID.end()); } else { classifier_node_OID_sans_grouping = ""; } } // Split the classifier node OID into its hierarchical parts, then remove any we've already dealt with (HLists) text_tlist classifier_node_OID_parts_remaining; splitword (classifier_node_OID_sans_grouping.begin(), classifier_node_OID_sans_grouping.end(), classifier_node_separator, classifier_node_OID_parts_remaining); if (classifier_options["-use_hlist_at_top"] == "1") { classifier_node_OID_parts_remaining.pop_front(); } text_t classifier_node_metadata_value = classifier_node_OID_sans_grouping; text_t classifier_node_metadata_value_filter = classifier_node_metadata_value + classifier_node_separator + "*"; // If "-split_greenstone_dates" is on the metadata value and filter doesn't quite match the OID -- fix this if (classifier_options["-split_greenstone_dates"] == "1") { classifier_node_metadata_value.replace (classifier_node_separator, ""); classifier_node_metadata_value_filter = classifier_node_metadata_value; if (classifier_node_metadata_value_filter.size() == 8) { classifier_node_metadata_value_filter += classifier_node_separator; } classifier_node_metadata_value_filter += "*"; } // Simple case at the top level: just output the child classifier nodes if (classifier_node_metadata_value == "") { output_child_classifier_nodes (current_classifier_node_OID, "", current_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout); } // More complex case below the top level else { // Output the parent classifier nodes and the current classifier node output_upper_classifier_nodes (current_classifier_node_OID, classifier_node_OID_parts_remaining, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout); // Output the child classifier nodes current_classifier_node_OID = classifier_node_OID + classifier_node_separator; output_child_classifier_nodes (current_classifier_node_OID, classifier_node_metadata_value, classifier_node_metadata_value_filter, classifier_node_indent, classifier_options, args, collectproto, browsers, disp, outconvert, textout, logout); // Get the document nodes at this level text_t metadata_elements = classifier_options["metadata_elements"]; text_t sort_documents_by = classifier_options["-sort_documents_by"]; FilterResponse_t documents_response; get_documents_with_metadata_value (metadata_elements, classifier_node_metadata_value, sort_documents_by, args["c"], collectproto, documents_response, logout); // Display the document nodes display_document_nodes (documents_response, classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout); } // Output the "Footer" format statement if there is one text_t classifier_footer_format_statement = ""; get_formatstring (arg_dcl + "Footer", cinfo->format, classifier_footer_format_statement); textout << outconvert << disp << classifier_footer_format_statement << "\n"; textout << outconvert << disp << "_dynamicclassifier:footer_\n"; return true; } text_tmap dynamicclassifieraction::parse_classifier_options (text_t classifier_specification, cgiargsclass &args) { text_tmap classifier_options; // Default values classifier_options["-split_using_hierarchy_separator"] = "|"; // Split the classifier specification string by spaces text_tlist classifier_specification_parts; splitchar (classifier_specification.begin(), classifier_specification.end(), ' ', classifier_specification_parts); // The metadata element(s) to classify by should be the first value classifier_options["metadata_elements"] = classifier_specification_parts.front(); classifier_specification_parts.pop_front(); // Parse options from the remainder of the classifier specification while (!classifier_specification_parts.empty()) { // Parse the option name text_t classifier_option_name = classifier_specification_parts.front(); classifier_specification_parts.pop_front(); // Check if the option has a value (it may just be a flag, in which case we use "1" as the value) text_t classifier_option_value = "1"; if (!classifier_specification_parts.empty() && !starts_with(classifier_specification_parts.front(), "-")) { classifier_option_value = classifier_specification_parts.front(); classifier_specification_parts.pop_front(); } // Record the option classifier_options[classifier_option_name] = classifier_option_value; } return classifier_options; } text_t dynamicclassifieraction::output_hlist_classifier_nodes (text_t parent_classifier_node_OID, text_t metadata_value_filter, text_t metadata_value_grouping_expression, text_tmap classifier_options, cgiargsclass &args, recptproto *collectproto, browsermapclass *browsers, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { // Get all the metadata values for the specified element(s) that match the filter text_t metadata_elements = classifier_options["metadata_elements"]; FilterResponse_t metadata_values_response; bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, metadata_value_grouping_expression, args["c"], collectproto, metadata_values_response, logout); // If the request failed then it's probably because the collection isn't using an SQL infodbtype if (request_success == false) { textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n"; return ""; } // Check some metadata values were returned if (metadata_values_response.docInfo.empty()) { return ""; } // After processing any hierarchical metadata values we're left with the hlist classifer nodes map hlist_classifier_nodes; ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin(); while (metadata_value_iterator != metadata_values_response.docInfo.end()) { text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options); // Assume for now we're always at the top text_t hlist_metadata_value = hierarchical_metadata_value; // Determine the label for the hlist classifier node from the metadata value text_tlist hlist_metadata_value_parts; text_t hlist_node_separator = classifier_options["-split_using_hierarchy_separator"]; splitword (hlist_metadata_value.begin(), hlist_metadata_value.end(), hlist_node_separator, hlist_metadata_value_parts); text_t hlist_classifier_node_label = hlist_metadata_value_parts.front(); // Create a node for this value if we haven't seen it before if (hlist_classifier_nodes.find(hlist_classifier_node_label) == hlist_classifier_nodes.end()) { hlist_classifier_nodes[hlist_classifier_node_label] = 0; } // Increment the occurrence count hlist_classifier_nodes[hlist_classifier_node_label] += (*metadata_value_iterator).result_num; metadata_value_iterator++; } // Add the necessary metadata to the hlist classifier nodes text_t selected_hlist_node_OID = ""; FilterResponse_t hlist_classifier_nodes_response; map::iterator hlist_classifier_nodes_iterator = hlist_classifier_nodes.begin(); while (hlist_classifier_nodes_iterator != hlist_classifier_nodes.end()) { text_t hlist_classifier_node_OID = parent_classifier_node_OID + (*hlist_classifier_nodes_iterator).first; // Is this the hlist node that is currently selected? if (starts_with (args["dcn"], hlist_classifier_node_OID)) { selected_hlist_node_OID = hlist_classifier_node_OID; } // Add the necessary metadata required to display the hlist nodes correctly ResultDocInfo_t hlist_classifier_node; hlist_classifier_node.OID = hlist_classifier_node_OID; hlist_classifier_node.metadata["doctype"].values.push_back ("classify"); hlist_classifier_node.metadata["haschildren"].values.push_back ("1"); hlist_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests hlist_classifier_node.metadata["Title"].values.push_back ((*hlist_classifier_nodes_iterator).first); hlist_classifier_nodes_response.docInfo.push_back (hlist_classifier_node); hlist_classifier_nodes_iterator++; } // Automatically select the first hlist node if necessary if (selected_hlist_node_OID == "") { selected_hlist_node_OID = (*hlist_classifier_nodes_response.docInfo.begin()).OID; // Don't really like messing with this here, but it needs to be done before display_classifier_nodes() below if (starts_with (parent_classifier_node_OID, args["dcn"])) { args["dcn"] = selected_hlist_node_OID; } } // Display the hlist nodes display_classifier_nodes (hlist_classifier_nodes_response, "HList", 0, args, collectproto, browsers, disp, outconvert, textout, logout); return selected_hlist_node_OID; } void dynamicclassifieraction::output_upper_classifier_nodes (text_t root_classifier_node_OID, text_tlist upper_classifier_node_labels, int& classifier_node_indent, text_tmap classifier_options, cgiargsclass &args, recptproto *collectproto, browsermapclass *browsers, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { // Display the upper classifier nodes text_t upper_classifier_node_OID = root_classifier_node_OID; text_tlist::iterator upper_classifier_node_labels_iterator = upper_classifier_node_labels.begin(); while (upper_classifier_node_labels_iterator != upper_classifier_node_labels.end()) { upper_classifier_node_OID += *upper_classifier_node_labels_iterator; ResultDocInfo_t upper_classifier_node; upper_classifier_node.OID = upper_classifier_node_OID; upper_classifier_node.metadata["doctype"].values.push_back ("classify"); upper_classifier_node.metadata["haschildren"].values.push_back ("1"); upper_classifier_node.metadata["numleafdocs"].values.push_back ("?"); // We can't determine this without more database requests upper_classifier_node.metadata["Title"].values.push_back (*upper_classifier_node_labels_iterator); FilterResponse_t upper_classifier_node_response; upper_classifier_node_response.docInfo.push_back(upper_classifier_node); display_classifier_nodes (upper_classifier_node_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout); classifier_node_indent++; upper_classifier_node_OID += classifier_options["-split_using_hierarchy_separator"]; upper_classifier_node_labels_iterator++; } } void dynamicclassifieraction::output_child_classifier_nodes (text_t classifier_node_OID, text_t classifier_node_metadata_value, text_t metadata_value_filter, int& classifier_node_indent, text_tmap classifier_options, cgiargsclass &args, recptproto *collectproto, browsermapclass *browsers, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { // Get all the metadata values for the specified element(s) that match the filter text_t metadata_elements = classifier_options["metadata_elements"]; FilterResponse_t metadata_values_response; bool request_success = get_metadata_values (metadata_elements, metadata_value_filter, "", args["c"], collectproto, metadata_values_response, logout); // If the request failed then it's probably because the collection isn't using an SQL infodbtype if (request_success == false) { textout << outconvert << disp << "Error: Dynamic classifier functionality is not available. Please check you are using an SQL infodbtype and the collection has been rebuilt.\n"; return; } // After processing any hierarchical metadata values we're left with the child classifer nodes map child_classifier_nodes; ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin(); while (metadata_value_iterator != metadata_values_response.docInfo.end()) { text_t hierarchical_metadata_value = split_metadata_value ((*metadata_value_iterator).OID, classifier_options); text_t classifier_node_hierarchical_metadata_value = split_metadata_value (classifier_node_metadata_value, classifier_options); // We need to remove the current position from the metadata value to leave the child metadata value text_t child_metadata_value = hierarchical_metadata_value; text_t child_node_separator = classifier_options["-split_using_hierarchy_separator"]; if (starts_with(hierarchical_metadata_value, classifier_node_hierarchical_metadata_value + child_node_separator)) { child_metadata_value = substr(hierarchical_metadata_value.begin() + (classifier_node_hierarchical_metadata_value + child_node_separator).size(), hierarchical_metadata_value.end()); } // Determine the label for the child classifier node from the metadata value text_tlist child_metadata_value_parts; splitword (child_metadata_value.begin(), child_metadata_value.end(), child_node_separator, child_metadata_value_parts); text_t child_classifier_node_label = child_metadata_value_parts.front(); // Create a node for this value if we haven't seen it before if (child_classifier_nodes.find(child_classifier_node_label) == child_classifier_nodes.end()) { child_classifier_nodes[child_classifier_node_label] = 0; } // Increment the occurrence count child_classifier_nodes[child_classifier_node_label] += (*metadata_value_iterator).result_num; metadata_value_iterator++; } // Add the necessary metadata to the child classifier nodes FilterResponse_t child_classifier_nodes_response; map::iterator child_classifier_nodes_iterator = child_classifier_nodes.begin(); while (child_classifier_nodes_iterator != child_classifier_nodes.end()) { text_t child_classifier_node_OID = classifier_node_OID + (*child_classifier_nodes_iterator).first; ResultDocInfo_t child_classifier_node; child_classifier_node.OID = child_classifier_node_OID; child_classifier_node.metadata["doctype"].values.push_back ("classify"); child_classifier_node.metadata["haschildren"].values.push_back ("1"); child_classifier_node.metadata["numleafdocs"].values.push_back ((*child_classifier_nodes_iterator).second); child_classifier_node.metadata["Title"].values.push_back ((*child_classifier_nodes_iterator).first); child_classifier_nodes_response.docInfo.push_back (child_classifier_node); child_classifier_nodes_iterator++; } // Display the child classifier nodes display_classifier_nodes (child_classifier_nodes_response, "VList", classifier_node_indent, args, collectproto, browsers, disp, outconvert, textout, logout); } text_t dynamicclassifieraction::split_metadata_value (text_t metadata_value, text_tmap classifier_options) { text_t hierarchical_metadata_value = metadata_value; text_t hierarchy_separator = classifier_options["-split_using_hierarchy_separator"]; // Add hierarchy separators into Greenstone date values (e.g. YYYYMMDD -> YYYY|MM|DD) if (classifier_options["-split_greenstone_dates"] == "1") { if (metadata_value.size() == 4 || metadata_value.size() == 6 || metadata_value.size() == 8) { // Add year hierarchical_metadata_value = substr (metadata_value.begin(), metadata_value.begin() + 4); if (metadata_value.size() == 6 || metadata_value.size() == 8) { // Add month hierarchical_metadata_value += hierarchy_separator; hierarchical_metadata_value += substr (metadata_value.begin() + 4, metadata_value.begin() + 6); if (metadata_value.size() == 8) { // Add day hierarchical_metadata_value += hierarchy_separator; hierarchical_metadata_value += substr (metadata_value.begin() + 6, metadata_value.begin() + 8); } } } } return hierarchical_metadata_value; } void dynamicclassifieraction::display_classifier_nodes (FilterResponse_t classifier_nodes_response, text_t classifier_nodes_type, int classifier_nodes_indent, cgiargsclass &args, recptproto *collectproto, browsermapclass *browsers, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { // Check there are some classifier nodes to display if (classifier_nodes_response.docInfo.empty()) return; // Get the format statement for this classifier if there is one, or use the browser's default otherwise text_t formatstring; browserclass *bptr = browsers->getbrowser (classifier_nodes_type); ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout); if (!get_formatstring (args["dcl"], classifier_nodes_type, cinfo->format, formatstring)) { formatstring = bptr->get_default_formatstring(); } format_t *formatlistptr = new format_t(); text_tset metadata; bool getParents = false; parse_formatstring (formatstring, formatlistptr, metadata, getParents); bool use_table = is_table_content (formatlistptr); // Display the classifier nodes bptr->output_section_group (classifier_nodes_response, args, args["c"], classifier_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout); } void dynamicclassifieraction::display_document_nodes (FilterResponse_t documents_response, int document_nodes_indent, cgiargsclass &args, recptproto *collectproto, browsermapclass *browsers, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { // Check there are some documents to display if (documents_response.docInfo.empty()) return; // Get the format statement for the document nodes if there is one, or use the browser's default otherwise text_t formatstring; browserclass *bptr = browsers->getbrowser ("VList"); ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout); if (!get_formatstring (args["dcl"], "DocumentNodes", cinfo->format, formatstring)) { if (!get_formatstring (args["dcl"], "VList", cinfo->format, formatstring)) { formatstring = bptr->get_default_formatstring(); } } format_t *formatlistptr = new format_t(); text_tset metadata; bool getParents = false; parse_formatstring (formatstring, formatlistptr, metadata, getParents); bool use_table = is_table_content (formatlistptr); // Request the necessary metadata for displaying the documents text_tarray document_OIDs; ResultDocInfo_tarray::iterator document_iterator = documents_response.docInfo.begin(); while (document_iterator != documents_response.docInfo.end()) { document_OIDs.push_back ((*document_iterator).OID); document_iterator++; } FilterResponse_t document_nodes_response; get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_nodes_response, logout); // Display the document nodes bptr->output_section_group (document_nodes_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout); }