source: gsdl/trunk/perllib/muread.pm@ 18430

Last change on this file since 18430 was 15894, checked in by mdewsnip, 16 years ago

Added "use strict" to the files missing it.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1###########################################################################
2#
3# muread.pm -- read a marked-up file
4#
5# Copyright (C) 1999 DigiLib Systems Limited, NZ
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20#
21###########################################################################
22
23
24package muread;
25
26use strict;
27use unicode;
28use multiread;
29
30sub new {
31 my ($class) = @_;
32
33 my $self = {'filename'=>"",
34 'encoding'=>"",
35 'handle'=>"",
36 'reader'=>"",
37 'buffer'=>""};
38
39 return bless $self, $class;
40}
41
42# returns a new tag with a tag name and any options
43sub parse_tag {
44 my $self = shift (@_);
45 my ($orgtagtext) = @_;
46 my $tagtext = $orgtagtext;
47 my $newtag = {};
48 my $misformed = 0;
49
50# print STDERR "parsing \"$tagtext\"\n";
51
52 # get tag name (if there is one)
53 if ($tagtext =~ /^(\w+)/) {
54 $newtag->{'_tagname'} = $1;
55 $tagtext =~ s/^(\w+)//;
56 } else {
57 print STDERR "muread::parse_tag error - no tag name found\n";
58 }
59
60 # get the tag arguments
61 while ($tagtext =~ /\S/) {
62 $tagtext =~ s/^\s+//s;
63 if ($tagtext =~ /^(\w+)\s*=\s*\"([^\"]*)\"/s) {
64 $newtag->{$1} = (defined $2) ? $2 : "";
65 $tagtext =~ s/^\w+\s*=\s*\"[^\"]*\"//s;
66
67 } else {
68 if (!$misformed) {
69 print STDERR "muread::parse_tag error - miss-formed tag <$orgtagtext>\n";
70 $misformed = 1;
71 }
72 $tagtext =~ s/^\S+//s;
73 }
74 }
75
76 return $newtag;
77}
78
79sub read_tag_content {
80 my $self = shift (@_);
81 my ($tag) = @_;
82
83 # all tags contain a _tagname except the tag for the document
84
85 my $line = "";
86 while (1) {
87 # deal with preceeding text
88 if ($self->{'buffer'} =~ /^([^<]+)</s) {
89 # add preceeding text
90 $tag->{'_contains'} = [] unless defined $tag->{'_contains'};
91 push (@{$tag->{'_contains'}}, {'_text'=>$1});
92
93 $self->{'buffer'} =~ s/^[^<]+</</s;
94 }
95
96 if ($self->{'buffer'} =~ /^<([^>\/]+)>/s) {
97 # add info from this tag
98 my $tagtext = $1;
99 my $newtag = $self->parse_tag ($tagtext);
100 push (@{$tag->{'_contains'}}, $newtag);
101 $self->{'buffer'} =~ s/^<[^>\/]+>//s;
102
103 # deal with the contents of this tag
104 $self->read_tag_content ($newtag);
105
106 } elsif ($self->{'buffer'} =~ /^<\/([^>\/]+)>/s) {
107 my $tagname = $1;
108 $self->{'buffer'} =~ s/^<\/[^>\/]+>//s;
109
110 # check that this tag is the right tag
111 if (!defined $tag->{'_tagname'} || $tag->{'_tagname'} ne $tagname) {
112 print STDERR "muread::read_tag_content error - mismatched tag </$tagname>, " .
113 "expected </$tag->{'_tagname'}>\n";
114 } else {
115 return;
116 }
117 } elsif (defined ($line = $self->{'reader'}->read_line())) {
118 $self->{'buffer'} .= $line;
119 } else {
120 if ($self->{'buffer'} =~ /\S/) {
121 print STDERR "muread::read_tag_content error - can't parse text \"$self->{'buffer'}\"\n";
122 }
123 last;
124 }
125 }
126
127 if (defined $tag->{'_tagname'}) {
128 print STDERR "muread::read_tag_content error - eof reached before closing " .
129 "tag \"$tag->{'_tagname'}\" found\n";
130 }
131}
132
133sub read_file {
134 my $self = shift (@_);
135 ($self->{'handle'}, $self->{'filename'}, $self->{'encoding'}) = @_;
136 $self->{'encoding'} = "utf8" unless defined $self->{'encoding'};
137
138 my $doc = {};
139
140 # get reader set up
141 $self->{'reader'} = new multiread ();
142 $self->{'reader'}->set_handle ($self->{'handle'});
143 $self->{'reader'}->set_encoding ($self->{'encoding'});
144
145 # read in the file
146 $self->read_tag_content ($doc);
147
148 $self->{'handle'} = "";
149 return $doc;
150}
151
1521;
Note: See TracBrowser for help on using the repository browser.