Changeset 15949


Ignore:
Timestamp:
2008-06-10T15:55:54+12:00 (16 years ago)
Author:
mdewsnip
Message:

(Adding dynamic classifiers) Added initial support for multi-level hierarchical classifiers.

Location:
gsdl/trunk/src/recpt
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/src/recpt/dynamicclassifieraction.cpp

    r15940 r15949  
    149149  }
    150150
     151  // Produce the page
    151152  text_t metadata_element_name = cinfo->dynamic_classifiers[arg_dcl];
     153  if (args["dcn"].empty())
     154  {
     155    // Simple case for the top-level page
     156    output_top_level_page (metadata_element_name, args, collectproto, browsers, disp, outconvert, textout, logout);
     157  }
     158  else
     159  {
     160    // More complex case for an internal page
     161    output_internal_page (metadata_element_name, args, collectproto, browsers, disp, outconvert, textout, logout);
     162  }
     163
     164  textout << outconvert << disp << "_document:footer_\n";
     165  return true;
     166}
     167
     168
     169void dynamicclassifieraction::output_top_level_page (text_t metadata_element_name, cgiargsclass &args,
     170                             recptproto *collectproto, browsermapclass *browsers,
     171                             displayclass &disp, outconvertclass &outconvert,
     172                             ostream &textout, ostream &logout)
     173{
     174  // Get all the metadata values for the specified element (these become the classifier nodes at the top level)
     175  FilterResponse_t metadata_values_response;
     176  get_metadata_values (metadata_element_name, "", args["c"], collectproto, metadata_values_response, logout);
     177
     178  // Deal with any hierarchical metadata values
     179  map<text_t, int, lttext_t> classifier_nodes;
     180  ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
     181  while (metadata_value_iterator != metadata_values_response.docInfo.end())
     182  {
     183    // Is this metadata value hierarchical?
     184    text_t metadata_value = (*metadata_value_iterator).OID;
     185    text_t::iterator hierarchy_split_position = findchar(metadata_value.begin(), metadata_value.end(), '|');
     186    if (hierarchy_split_position != metadata_value.end())
     187    {
     188      // Yes, so use the first part of the hierarchy only
     189      metadata_value = substr(metadata_value.begin(), hierarchy_split_position);
     190    }
     191
     192    // Create a node for this metadata value if we haven't seen it before
     193    if (classifier_nodes.find(metadata_value) == classifier_nodes.end())
     194    {
     195      classifier_nodes[metadata_value] = 0;
     196    }
     197
     198    // Increment the occurrence count
     199    classifier_nodes[metadata_value] += (*metadata_value_iterator).result_num;
     200
     201    metadata_value_iterator++;
     202  }
     203
     204  // Display the top-level classifier nodes
     205  map<text_t, int, lttext_t>::iterator classifier_nodes_iterator = classifier_nodes.begin();
     206  while (classifier_nodes_iterator != classifier_nodes.end())
     207  {
     208    text_t classifier_node_OID = (*classifier_nodes_iterator).first;
     209    text_t classifier_node_label = (*classifier_nodes_iterator).first;
     210    int classifier_node_numleafdocs = (*classifier_nodes_iterator).second;
     211    output_classifier_node (classifier_node_OID, classifier_node_label, classifier_node_numleafdocs, 0, args, collectproto, browsers, disp, outconvert, textout, logout);
     212    classifier_nodes_iterator++;
     213  }
     214}
     215
     216
     217void dynamicclassifieraction::output_internal_page (text_t metadata_element_name, cgiargsclass &args,
     218                            recptproto *collectproto, browsermapclass *browsers,
     219                            displayclass &disp, outconvertclass &outconvert,
     220                            ostream &textout, ostream &logout)
     221{
     222  text_t arg_dcn = args["dcn"];
     223  text_t classifier_node_metadata_value = arg_dcn;
     224  int node_indent = 0;
     225
     226  // Get all the classifier nodes at this level
     227  text_t metadata_value_filter = classifier_node_metadata_value + "|*";
     228  FilterResponse_t metadata_values_response;
     229  get_metadata_values (metadata_element_name, metadata_value_filter, args["c"], collectproto, metadata_values_response, logout);
     230
     231  // Get all the documents at this level
     232  FilterResponse_t document_OIDs_response;
     233  get_documents_with_metadata_value (metadata_element_name, classifier_node_metadata_value, "dls.Title", args["c"], collectproto, document_OIDs_response, logout);
     234
     235  // Check there are some classifier nodes or some documents at this level, otherwise the "dcn" argument was invalid
     236  if (metadata_values_response.docInfo.empty() && document_OIDs_response.docInfo.empty())
     237  {
     238    textout << outconvert << disp << "Error: Invalid dcn value \"" << arg_dcn << "\".\n";
     239    return;
     240  }
     241
     242  // Determine the parent classifier nodes
     243  text_tarray parent_classifier_node_labels;
     244  splitchar(classifier_node_metadata_value.begin(), classifier_node_metadata_value.end(), '|', parent_classifier_node_labels);
     245
     246  // Display the parent classifier nodes
     247  text_t parent_classifier_node_OID = "";
     248  text_tarray::iterator parent_classifier_node_labels_iterator = parent_classifier_node_labels.begin();
     249  while (parent_classifier_node_labels_iterator != parent_classifier_node_labels.end())
     250  {
     251    parent_classifier_node_OID += *parent_classifier_node_labels_iterator;
     252    text_t parent_classifier_node_label = *parent_classifier_node_labels_iterator;
     253    text_t parent_classifier_node_numleafdocs = "?";
     254    output_classifier_node (parent_classifier_node_OID, parent_classifier_node_label, parent_classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
     255    node_indent++;
     256
     257    parent_classifier_node_labels_iterator++;
     258  }
     259
     260  // Display the classifier nodes at this level
     261  map<text_t, int, lttext_t> classifier_nodes;
     262  ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
     263  while (metadata_value_iterator != metadata_values_response.docInfo.end())
     264  {
     265    text_t metadata_value = (*metadata_value_iterator).OID;
     266
     267    if (starts_with(metadata_value, arg_dcn + "|"))
     268    {
     269      metadata_value = substr(metadata_value.begin() + (arg_dcn + "|").size(), metadata_value.end());
     270    }
     271
     272    // Is this metadata value hierarchical?
     273    text_t::iterator hierarchy_split_position = findchar(metadata_value.begin(), metadata_value.end(), '|');
     274    if (hierarchy_split_position != metadata_value.end())
     275    {
     276      // Yes, so split off the first part of the hierarchy for the classifier node
     277      metadata_value = substr(metadata_value.begin(), hierarchy_split_position);
     278    }
     279
     280    // Create a node for this metadata value if we haven't seen it before
     281    if (classifier_nodes.find(metadata_value) == classifier_nodes.end())
     282    {
     283      classifier_nodes[metadata_value] = 0;
     284    }
     285
     286    // Increment the occurrence count
     287    classifier_nodes[metadata_value] += (*metadata_value_iterator).result_num;
     288
     289    metadata_value_iterator++;
     290  }
     291
     292  // Display the classifier nodes at this level
     293  map<text_t, int, lttext_t>::iterator classifier_nodes_iterator = classifier_nodes.begin();
     294  while (classifier_nodes_iterator != classifier_nodes.end())
     295  {
     296    text_t classifier_node_OID = parent_classifier_node_OID + "|" + (*classifier_nodes_iterator).first;
     297    text_t classifier_node_label = (*classifier_nodes_iterator).first;
     298    int classifier_node_numleafdocs = (*classifier_nodes_iterator).second;
     299    output_classifier_node (classifier_node_OID, classifier_node_label, classifier_node_numleafdocs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
     300    classifier_nodes_iterator++;
     301  }
     302
     303  // Display the documents at this level
     304  text_tarray document_OIDs;
     305  ResultDocInfo_tarray::iterator document_OID_iterator = document_OIDs_response.docInfo.begin();
     306  while (document_OID_iterator != document_OIDs_response.docInfo.end())
     307  {
     308    document_OIDs.push_back ((*document_OID_iterator).OID);
     309    document_OID_iterator++;
     310  }
     311
     312  output_document_nodes (document_OIDs, node_indent, args, collectproto, browsers, disp, outconvert, textout, logout);
     313}
     314
     315
     316void dynamicclassifieraction::output_classifier_node (text_t classifier_node_OID, text_t classifier_node_label,
     317                              text_t classifier_node_numleafdocs, int classifier_node_indent,
     318                              cgiargsclass &args, recptproto *collectproto,
     319                              browsermapclass *browsers, displayclass &disp,
     320                              outconvertclass &outconvert, ostream &textout,
     321                              ostream &logout)
     322{
     323  // Generate the ResultDocInfo_t containing the information for the classifier node
     324  ResultDocInfo_t classifier_node;
     325  classifier_node.OID = classifier_node_OID;
     326  classifier_node.metadata["doctype"].values.push_back ("classify");
     327  classifier_node.metadata["haschildren"].values.push_back ("1");
     328  classifier_node.metadata["numleafdocs"].values.push_back (classifier_node_numleafdocs);
     329  classifier_node.metadata["Title"].values.push_back (classifier_node_label);
     330
     331  // Get the format statement for this classifier if there is one, or use the browser's default otherwise
     332  text_t formatstring;
    152333  text_t classifier_type = "VList";
    153334  browserclass *bptr = browsers->getbrowser (classifier_type);
    154 
    155   // Get the formatstring if there is one, or use the browser's default otherwise
    156   text_t formatstring;
    157   if (!get_formatstring (arg_dcl, classifier_type, cinfo->format, formatstring))
     335  ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
     336  if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
    158337  {
    159338    formatstring = bptr->get_default_formatstring();
     
    165344  bool use_table = is_table_content (formatlistptr);
    166345
    167   // Check if a dynamic classifier node has been specified
    168   text_t arg_dcn = args["dcn"];
    169   if (arg_dcn.empty())
    170   {
    171     // No, so display the top-level classifier page, containing all the metadata values for the specified element
    172     FilterResponse_t metadata_values_response;
    173     get_metadata_values (metadata_element_name, args["c"], collectproto, metadata_values_response, logout);
    174 
    175     // Deal with hierarchical metadata values
    176     map<text_t, int, lttext_t> classifier_nodes;
    177     ResultDocInfo_tarray::iterator metadata_value_iterator = metadata_values_response.docInfo.begin();
    178     while (metadata_value_iterator != metadata_values_response.docInfo.end())
    179     {
    180       text_t metadata_value = (*metadata_value_iterator).OID;
    181 
    182       // Is this metadata value hierarchical?
    183       text_t::iterator hierarchy_split_position = findchar(metadata_value.begin(), metadata_value.end(), '|');
    184       if (hierarchy_split_position != metadata_value.end())
    185       {
    186     // Yes, so split off the first part of the hierarchy for the classifier node
    187     metadata_value = substr(metadata_value.begin(), hierarchy_split_position);
    188       }
    189 
    190       // Create a node for this metadata value if we haven't seen it before
    191       if (classifier_nodes.find(metadata_value) == classifier_nodes.end())
    192       {
    193     classifier_nodes[metadata_value] = 0;
    194       }
    195 
    196       // Increment the occurrence count
    197       classifier_nodes[metadata_value] += (*metadata_value_iterator).result_num;
    198 
    199       metadata_value_iterator++;
    200     }
    201 
    202     // Create the structure containing the classifer nodes to pass to output_section_group(), with the right info
    203     FilterResponse_t classifier_nodes_response;
    204     map<text_t, int, lttext_t>::iterator classifier_nodes_iterator = classifier_nodes.begin();
    205     while (classifier_nodes_iterator != classifier_nodes.end())
    206     {
    207       ResultDocInfo_t classifier_node;
    208       classifier_node.OID = (*classifier_nodes_iterator).first;
    209 
    210       // Add metadata necessary for output_section_group() to display the results as classifier nodes
    211       classifier_node.metadata["doctype"].values.push_back ("classify");
    212       classifier_node.metadata["haschildren"].values.push_back ("1");
    213       classifier_node.metadata["numleafdocs"].values.push_back ((*classifier_nodes_iterator).second);
    214       classifier_node.metadata["Title"].values.push_back ((*classifier_nodes_iterator).first);
    215 
    216       classifier_nodes_response.docInfo.push_back (classifier_node);
    217       classifier_nodes_iterator++;
    218     }
    219 
    220     // Display the classifier nodes
    221     bptr->output_section_group (classifier_nodes_response, args, args["c"], 0, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
    222   }
    223   else
    224   {
    225     // Yes, so get all the documents that fall under this node
    226     text_t metadata_value = arg_dcn;
    227     FilterResponse_t document_OIDs_response;
    228     get_documents_with_metadata_value (metadata_element_name, metadata_value, "dls.Title", args["c"], collectproto, document_OIDs_response, logout);
    229 
    230     // Check the metadata value is valid
    231     if (document_OIDs_response.docInfo.empty())
    232     {
    233       textout << outconvert << disp << "Error: No documents have metadata value \"" << arg_dcn << "\".\n";
    234       textout << outconvert << disp << "_document:footer_\n";
    235       return true;
    236     }
    237 
    238     // Make an array of matching document OIDs
    239     text_tarray document_OIDs;
    240     ResultDocInfo_tarray::iterator document_OID_iterator = document_OIDs_response.docInfo.begin();
    241     while (document_OID_iterator != document_OIDs_response.docInfo.end())
    242     {
    243       document_OIDs.push_back ((*document_OID_iterator).OID);
    244       document_OID_iterator++;
    245     }
    246 
    247     // Display the classifier node
    248     ResultDocInfo_t classifier_node;
    249     classifier_node.OID = arg_dcn;
    250     classifier_node.metadata["doctype"].values.push_back("classify");
    251     classifier_node.metadata["haschildren"].values.push_back("1");
    252     classifier_node.metadata["numleafdocs"].values.push_back(document_OIDs.size());
    253     classifier_node.metadata["Title"].values.push_back(classifier_node.OID);
    254     bptr->output_section_group (classifier_node, args, args["c"], 0, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
    255 
    256     // Request the necessary metadata for the documents under this node
    257     FilterResponse_t document_info_response;
    258     get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_info_response, logout);
    259 
    260     // Display the document nodes
    261     bptr->output_section_group (document_info_response, args, args["c"], 1, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
    262   }
    263 
    264   textout << outconvert << disp << "_document:footer_\n";
    265   return true;
    266 }
     346  // Display the classifier node
     347  bptr->output_section_group (classifier_node, args, args["c"], classifier_node_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
     348}
     349
     350
     351void dynamicclassifieraction::output_document_nodes (text_tarray document_OIDs, int document_nodes_indent,
     352                             cgiargsclass &args, recptproto *collectproto,
     353                             browsermapclass *browsers, displayclass &disp,
     354                             outconvertclass &outconvert, ostream &textout,
     355                             ostream &logout)
     356{
     357  // Get the format statement for this classifier if there is one, or use the browser's default otherwise
     358  text_t formatstring;
     359  text_t classifier_type = "VList";
     360  browserclass *bptr = browsers->getbrowser (classifier_type);
     361  ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, args["c"], logout);
     362  if (!get_formatstring (args["dcl"], classifier_type, cinfo->format, formatstring))
     363  {
     364    formatstring = bptr->get_default_formatstring();
     365  }
     366  format_t *formatlistptr = new format_t();
     367  text_tset metadata;
     368  bool getParents = false;
     369  parse_formatstring (formatstring, formatlistptr, metadata, getParents);
     370  bool use_table = is_table_content (formatlistptr);
     371
     372  // Request the necessary metadata for the documents
     373  FilterResponse_t document_OIDs_response;
     374  get_info (document_OIDs, args["c"], args["l"], metadata, getParents, collectproto, document_OIDs_response, logout);
     375
     376  // Display the document nodes
     377  bptr->output_section_group (document_OIDs_response, args, args["c"], document_nodes_indent, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout);
     378}
  • gsdl/trunk/src/recpt/dynamicclassifieraction.h

    r15744 r15949  
    6262          outconvertclass &outconvert, ostream &textout,
    6363          ostream &logout);
     64
     65  void output_top_level_page (text_t metadata_element_name, cgiargsclass &args,
     66                  recptproto *collectproto, browsermapclass *browsers,
     67                  displayclass &disp, outconvertclass &outconvert,
     68                  ostream &textout, ostream &logout);
     69
     70  void output_internal_page (text_t metadata_element_name, cgiargsclass &args,
     71                 recptproto *collectproto, browsermapclass *browsers,
     72                 displayclass &disp, outconvertclass &outconvert,
     73                 ostream &textout, ostream &logout);
     74
     75  void output_classifier_node (text_t classifier_node_OID, text_t classifier_node_label,
     76                   text_t classifier_node_numleafdocs, int classifier_node_indent,
     77                   cgiargsclass &args, recptproto *collectproto,
     78                   browsermapclass *browsers, displayclass &disp,
     79                   outconvertclass &outconvert, ostream &textout,
     80                   ostream &logout);
     81
     82  void output_document_nodes (text_tarray document_OIDs, int document_nodes_indent,
     83                  cgiargsclass &args, recptproto *collectproto,
     84                  browsermapclass *browsers, displayclass &disp,
     85                  outconvertclass &outconvert, ostream &textout,
     86                  ostream &logout);
    6487};
    6588
Note: See TracChangeset for help on using the changeset viewer.