source: gs2-extensions/ngramj/src/wiki/wiki2xml/php/xml2tree.php@ 25141

Last change on this file since 25141 was 25141, checked in by papitha, 12 years ago

NGRAMJ PERL MODULE ADDED /MAORI LANGUAGE GUESSING WORKING WELL!!

File size: 2.9 KB
Line 
1<?php
2
3/**
4 * This class converts an XML string to a tree structure
5 * based on the "element" class that must be defined outside
6 * prior to including this file
7*/
8
9$ancStack = array (); // the stack with ancestral elements
10
11// START Three global functions needed for parsing, sorry guys
12/** @todo document */
13function wgXMLstartElement($parser, $name, $attrs) {
14 global $ancStack;
15
16 $newElem = new element;
17 $newElem->name = $name;
18 $newElem->attrs = $attrs;
19
20 array_push($ancStack, $newElem);
21}
22
23/** @todo document */
24function wgXMLendElement($parser, $name) {
25 global $ancStack, $rootElem;
26 // pop element off stack
27 $elem = array_pop($ancStack);
28 if (count($ancStack) == 0)
29 $rootElem = $elem;
30 else
31 // add it to its parent
32 array_push($ancStack[count($ancStack) - 1]->children, $elem);
33}
34
35/** @todo document */
36function wgXMLcharacterData($parser, $data) {
37 global $ancStack;
38 // add to parent if parent exists
39 if ($ancStack && trim ( $data ) != "") {
40 array_push($ancStack[count($ancStack) - 1]->children, $data);
41 }
42}
43// END Three global functions needed for parsing, sorry guys
44
45/**
46 * Here's the class that generates a nice tree
47 * @package MediaWiki
48 * @subpackage Experimental
49 */
50class xml2php {
51
52 /** @todo document */
53 function & scanFile($filename) {
54 global $ancStack, $rootElem;
55 $ancStack = array ();
56
57 $xml_parser = xml_parser_create();
58 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
59 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
60 if (!($fp = fopen($filename, 'r'))) {
61 die('could not open XML input');
62 }
63 while ($data = fread($fp, 4096)) {
64 if (!xml_parse($xml_parser, $data, feof($fp))) {
65 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
66 }
67 }
68 xml_parser_free($xml_parser);
69
70 // return the remaining root element we copied in the beginning
71 return $rootElem;
72 }
73
74 /** @todo document */
75 function scanString($input) {
76 global $ancStack, $rootElem;
77 $ancStack = array ();
78
79 $xml_parser = xml_parser_create();
80 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
81 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
82
83 if ( is_array ( $input ) ) {
84 xml_parse($xml_parser, xml_articles_header() , false) ;
85 while ( $x = xml_shift ( $input ) ) {
86 xml_parse($xml_parser, $x, false) ;
87 }
88
89 xml_parse($xml_parser, '</articles>', true) ;
90 } else {
91 xml_parse($xml_parser, xml_articles_header() , false) ;
92 if (!xml_parse($xml_parser, $input, false)) {
93 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
94 }
95 xml_parse($xml_parser, '</articles>', true) ;
96 }
97
98 xml_parser_free($xml_parser);
99
100 // return the remaining root element we copied in the beginning
101 return $rootElem;
102 }
103
104}
105
106?>
Note: See TracBrowser for help on using the repository browser.