source: gs2-extensions/ngramj/src/wiki/wikipedia2text/wiki2xml/php/browse_texts.php@ 25141

Last change on this file since 25141 was 25141, checked in by papitha, 12 years ago

NGRAMJ PERL MODULE ADDED /MAORI LANGUAGE GUESSING WORKING WELL!!

File size: 1.8 KB
Line 
1<?php
2
3require_once ( "default.php" ) ;
4require_once ( "global_functions.php" ) ;
5require_once ( "filter_named_entities.php" ) ;
6require_once ( "content_provider.php" ) ;
7require_once ( "wiki2xml.php" ) ;
8require_once ( "xml2xhtml.php" ) ;
9require_once ( "mediawiki_converter.php" ) ;
10
11# FUNCTIONS
12
13function get_param ( $key , $default = "" ) {
14 if ( !isset ( $_REQUEST[$key] ) ) return $default ;
15 return $_REQUEST[$key] ;
16}
17
18# MAIN
19
20@set_time_limit ( 0 ) ; # No time limit
21
22$xmlg = array (
23 'site_base_url' => "SBU" ,
24 'resolvetemplates' => true ,
25 'templates' => array () ,
26 'namespace_template' => 'Vorlage' ,
27) ;
28
29$content_provider = new ContentProviderTextFile ;
30$converter = new MediaWikiConverter ;
31
32$title = urldecode ( get_param ( 'title' , urlencode ( 'Main Page' ) ) ) ;
33$xmlg['page_title'] = $title ;
34
35$format = strtolower ( get_param ( 'format' , 'xhtml' ) ) ;
36$content_provider->basedir = $base_text_dir ;
37
38$text = $content_provider->get_wiki_text ( $title ) ;
39$xml = $converter->article2xml ( $title , $text , $xmlg ) ;
40
41if ( $format =="xml" ) {
42 # XML
43 header('Content-type: text/xml; charset=utf-8');
44 print "<?xml version='1.0' encoding='UTF-8' ?>\n" ;
45 print $xml ;
46} else if ( $format == "text" ) {
47 # Plain text
48 $xmlg['plaintext_markup'] = true ;
49 $xmlg['plaintext_prelink'] = true ;
50 $out = $converter->articles2text ( $xml , $xmlg ) ;
51 $out = str_replace ( "\n" , "<br/>" , $out ) ;
52 header('Content-type: text/html; charset=utf-8');
53 print $out ;
54} else {
55 # XHTML
56 if ( stristr($_SERVER["HTTP_ACCEPT"],"application/xhtml+xml") ) {
57 # Skipping the "strict" part ;-)
58 header("Content-type: text/html; charset=utf-8");
59# header("Content-type: application/xhtml+xml");
60 } else {
61 # Header hack for IE
62 header("Content-type: text/html; charset=utf-8");
63 }
64 print $converter->articles2xhtml ( $xml , $xmlg ) ;
65}
66
67?>
Note: See TracBrowser for help on using the repository browser.