root/gsdl/trunk/perllib/muread.pm @ 17110

Revision 15894, 4.0 KB (checked in by mdewsnip, 11 years ago)

Added "use strict" to the files missing it.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# muread.pm -- read a marked-up file
4#
5# Copyright (C) 1999 DigiLib Systems Limited, NZ
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20#
21###########################################################################
22
23
24package muread;
25
26use strict;
27use unicode;
28use multiread;
29
30sub new {
31    my ($class) = @_;
32
33    my $self = {'filename'=>"",
34        'encoding'=>"",
35        'handle'=>"",
36        'reader'=>"",
37        'buffer'=>""};
38
39    return bless $self, $class;
40}
41
42# returns a new tag with a tag name and any options
43sub parse_tag {
44    my $self = shift (@_);
45    my ($orgtagtext) = @_;
46    my $tagtext = $orgtagtext;
47    my $newtag = {};
48    my $misformed = 0;
49
50#    print STDERR "parsing \"$tagtext\"\n";
51
52    # get tag name (if there is one)
53    if ($tagtext =~ /^(\w+)/) {
54    $newtag->{'_tagname'} = $1;
55    $tagtext =~ s/^(\w+)//;
56    } else {
57    print STDERR "muread::parse_tag error - no tag name found\n";
58    }
59
60    # get the tag arguments
61    while ($tagtext =~ /\S/) {
62    $tagtext =~ s/^\s+//s;
63    if ($tagtext =~ /^(\w+)\s*=\s*\"([^\"]*)\"/s) {
64        $newtag->{$1} = (defined $2) ? $2 : "";
65        $tagtext =~ s/^\w+\s*=\s*\"[^\"]*\"//s;
66
67    } else {
68        if (!$misformed) {
69        print STDERR "muread::parse_tag error - miss-formed tag <$orgtagtext>\n";
70        $misformed = 1;
71        }
72        $tagtext =~ s/^\S+//s;
73    }
74    }
75
76    return $newtag;
77}
78
79sub read_tag_content {
80    my $self = shift (@_);
81    my ($tag) = @_;
82
83    # all tags contain a _tagname except the tag for the document
84
85    my $line = "";
86    while (1) {
87    # deal with preceeding text
88    if ($self->{'buffer'} =~ /^([^<]+)</s) {
89        # add preceeding text
90        $tag->{'_contains'} = [] unless defined $tag->{'_contains'};
91        push (@{$tag->{'_contains'}}, {'_text'=>$1});
92
93        $self->{'buffer'} =~ s/^[^<]+</</s;
94    }
95
96    if ($self->{'buffer'} =~ /^<([^>\/]+)>/s) {
97        # add info from this tag
98        my $tagtext = $1;
99        my $newtag = $self->parse_tag ($tagtext);
100        push (@{$tag->{'_contains'}}, $newtag);
101        $self->{'buffer'} =~ s/^<[^>\/]+>//s;
102       
103        # deal with the contents of this tag
104        $self->read_tag_content ($newtag);
105
106    } elsif ($self->{'buffer'} =~ /^<\/([^>\/]+)>/s) {
107        my $tagname = $1;
108        $self->{'buffer'} =~ s/^<\/[^>\/]+>//s;
109
110        # check that this tag is the right tag
111        if (!defined $tag->{'_tagname'} || $tag->{'_tagname'} ne $tagname) {
112        print STDERR "muread::read_tag_content error - mismatched tag </$tagname>, " .
113            "expected </$tag->{'_tagname'}>\n";
114        } else {
115        return;
116        }
117    } elsif (defined ($line = $self->{'reader'}->read_line())) {
118        $self->{'buffer'} .= $line;
119    } else {
120        if ($self->{'buffer'} =~ /\S/) {
121        print STDERR "muread::read_tag_content error - can't parse text \"$self->{'buffer'}\"\n";
122        }
123        last;
124    }
125    }
126   
127    if (defined $tag->{'_tagname'}) {
128    print STDERR "muread::read_tag_content error - eof reached before closing " .
129        "tag \"$tag->{'_tagname'}\" found\n";
130    }
131}
132
133sub read_file {
134    my $self = shift (@_);
135    ($self->{'handle'}, $self->{'filename'}, $self->{'encoding'}) = @_;
136    $self->{'encoding'} = "utf8" unless defined $self->{'encoding'};
137
138    my $doc = {};
139
140    # get reader set up
141    $self->{'reader'} = new multiread ();
142    $self->{'reader'}->set_handle ($self->{'handle'});
143    $self->{'reader'}->set_encoding ($self->{'encoding'});
144
145    # read in the file
146    $self->read_tag_content ($doc);
147
148    $self->{'handle'} = "";
149    return $doc;
150}
151
1521;
Note: See TracBrowser for help on using the browser.