1 | # Assembler.pm
|
---|
2 | #
|
---|
3 | # Copyright (c) 1996 Malcolm Beattie
|
---|
4 | #
|
---|
5 | # You may distribute under the terms of either the GNU General Public
|
---|
6 | # License or the Artistic License, as specified in the README file.
|
---|
7 |
|
---|
8 | package B::Assembler;
|
---|
9 | use Exporter;
|
---|
10 | use B qw(ppname);
|
---|
11 | use B::Asmdata qw(%insn_data @insn_name);
|
---|
12 | use Config qw(%Config);
|
---|
13 | require ByteLoader; # we just need its $VERSION
|
---|
14 |
|
---|
15 | no warnings; # XXX
|
---|
16 |
|
---|
17 | @ISA = qw(Exporter);
|
---|
18 | @EXPORT_OK = qw(assemble_fh newasm endasm assemble asm);
|
---|
19 | $VERSION = 0.07;
|
---|
20 |
|
---|
21 | use strict;
|
---|
22 | my %opnumber;
|
---|
23 | my ($i, $opname);
|
---|
24 | for ($i = 0; defined($opname = ppname($i)); $i++) {
|
---|
25 | $opnumber{$opname} = $i;
|
---|
26 | }
|
---|
27 |
|
---|
28 | my($linenum, $errors, $out); # global state, set up by newasm
|
---|
29 |
|
---|
30 | sub error {
|
---|
31 | my $str = shift;
|
---|
32 | warn "$linenum: $str\n";
|
---|
33 | $errors++;
|
---|
34 | }
|
---|
35 |
|
---|
36 | my $debug = 0;
|
---|
37 | sub debug { $debug = shift }
|
---|
38 |
|
---|
39 | sub limcheck($$$$){
|
---|
40 | my( $val, $lo, $hi, $loc ) = @_;
|
---|
41 | if( $val < $lo || $hi < $val ){
|
---|
42 | error "argument for $loc outside [$lo, $hi]: $val";
|
---|
43 | $val = $hi;
|
---|
44 | }
|
---|
45 | return $val;
|
---|
46 | }
|
---|
47 |
|
---|
48 | #
|
---|
49 | # First define all the data conversion subs to which Asmdata will refer
|
---|
50 | #
|
---|
51 |
|
---|
52 | sub B::Asmdata::PUT_U8 {
|
---|
53 | my $arg = shift;
|
---|
54 | my $c = uncstring($arg);
|
---|
55 | if (defined($c)) {
|
---|
56 | if (length($c) != 1) {
|
---|
57 | error "argument for U8 is too long: $c";
|
---|
58 | $c = substr($c, 0, 1);
|
---|
59 | }
|
---|
60 | } else {
|
---|
61 | $arg = limcheck( $arg, 0, 0xff, 'U8' );
|
---|
62 | $c = chr($arg);
|
---|
63 | }
|
---|
64 | return $c;
|
---|
65 | }
|
---|
66 |
|
---|
67 | sub B::Asmdata::PUT_U16 {
|
---|
68 | my $arg = limcheck( $_[0], 0, 0xffff, 'U16' );
|
---|
69 | pack("S", $arg);
|
---|
70 | }
|
---|
71 | sub B::Asmdata::PUT_U32 {
|
---|
72 | my $arg = limcheck( $_[0], 0, 0xffffffff, 'U32' );
|
---|
73 | pack("L", $arg);
|
---|
74 | }
|
---|
75 | sub B::Asmdata::PUT_I32 {
|
---|
76 | my $arg = limcheck( $_[0], -0x80000000, 0x7fffffff, 'I32' );
|
---|
77 | pack("l", $arg);
|
---|
78 | }
|
---|
79 | sub B::Asmdata::PUT_NV { sprintf("%s\0", $_[0]) } # "%lf" looses precision and pack('d',...)
|
---|
80 | # may not even be portable between compilers
|
---|
81 | sub B::Asmdata::PUT_objindex { # could allow names here
|
---|
82 | my $arg = limcheck( $_[0], 0, 0xffffffff, '*index' );
|
---|
83 | pack("L", $arg);
|
---|
84 | }
|
---|
85 | sub B::Asmdata::PUT_svindex { &B::Asmdata::PUT_objindex }
|
---|
86 | sub B::Asmdata::PUT_opindex { &B::Asmdata::PUT_objindex }
|
---|
87 | sub B::Asmdata::PUT_pvindex { &B::Asmdata::PUT_objindex }
|
---|
88 |
|
---|
89 | sub B::Asmdata::PUT_strconst {
|
---|
90 | my $arg = shift;
|
---|
91 | my $str = uncstring($arg);
|
---|
92 | if (!defined($str)) {
|
---|
93 | error "bad string constant: $arg";
|
---|
94 | $str = '';
|
---|
95 | }
|
---|
96 | if ($str =~ s/\0//g) {
|
---|
97 | error "string constant argument contains NUL: $arg";
|
---|
98 | $str = '';
|
---|
99 | }
|
---|
100 | return $str . "\0";
|
---|
101 | }
|
---|
102 |
|
---|
103 | sub B::Asmdata::PUT_pvcontents {
|
---|
104 | my $arg = shift;
|
---|
105 | error "extraneous argument: $arg" if defined $arg;
|
---|
106 | return "";
|
---|
107 | }
|
---|
108 | sub B::Asmdata::PUT_PV {
|
---|
109 | my $arg = shift;
|
---|
110 | my $str = uncstring($arg);
|
---|
111 | if( ! defined($str) ){
|
---|
112 | error "bad string argument: $arg";
|
---|
113 | $str = '';
|
---|
114 | }
|
---|
115 | return pack("L", length($str)) . $str;
|
---|
116 | }
|
---|
117 | sub B::Asmdata::PUT_comment_t {
|
---|
118 | my $arg = shift;
|
---|
119 | $arg = uncstring($arg);
|
---|
120 | error "bad string argument: $arg" unless defined($arg);
|
---|
121 | if ($arg =~ s/\n//g) {
|
---|
122 | error "comment argument contains linefeed: $arg";
|
---|
123 | }
|
---|
124 | return $arg . "\n";
|
---|
125 | }
|
---|
126 | sub B::Asmdata::PUT_double { sprintf("%s\0", $_[0]) } # see PUT_NV above
|
---|
127 | sub B::Asmdata::PUT_none {
|
---|
128 | my $arg = shift;
|
---|
129 | error "extraneous argument: $arg" if defined $arg;
|
---|
130 | return "";
|
---|
131 | }
|
---|
132 | sub B::Asmdata::PUT_op_tr_array {
|
---|
133 | my @ary = split /\s*,\s*/, shift;
|
---|
134 | return pack "S*", @ary;
|
---|
135 | }
|
---|
136 |
|
---|
137 | sub B::Asmdata::PUT_IV64 {
|
---|
138 | return pack "Q", shift;
|
---|
139 | }
|
---|
140 |
|
---|
141 | sub B::Asmdata::PUT_IV {
|
---|
142 | $Config{ivsize} == 4 ? &B::Asmdata::PUT_I32 : &B::Asmdata::PUT_IV64;
|
---|
143 | }
|
---|
144 |
|
---|
145 | sub B::Asmdata::PUT_PADOFFSET {
|
---|
146 | $Config{ptrsize} == 8 ? &B::Asmdata::PUT_IV64 : &B::Asmdata::PUT_U32;
|
---|
147 | }
|
---|
148 |
|
---|
149 | sub B::Asmdata::PUT_long {
|
---|
150 | $Config{longsize} == 8 ? &B::Asmdata::PUT_IV64 : &B::Asmdata::PUT_U32;
|
---|
151 | }
|
---|
152 |
|
---|
153 | my %unesc = (n => "\n", r => "\r", t => "\t", a => "\a",
|
---|
154 | b => "\b", f => "\f", v => "\013");
|
---|
155 |
|
---|
156 | sub uncstring {
|
---|
157 | my $s = shift;
|
---|
158 | $s =~ s/^"// and $s =~ s/"$// or return undef;
|
---|
159 | $s =~ s/\\(\d\d\d|.)/length($1) == 3 ? chr(oct($1)) : ($unesc{$1}||$1)/eg;
|
---|
160 | return $s;
|
---|
161 | }
|
---|
162 |
|
---|
163 | sub strip_comments {
|
---|
164 | my $stmt = shift;
|
---|
165 | # Comments only allowed in instructions which don't take string arguments
|
---|
166 | # Treat string as a single line so .* eats \n characters.
|
---|
167 | $stmt =~ s{
|
---|
168 | ^\s* # Ignore leading whitespace
|
---|
169 | (
|
---|
170 | [^"]* # A double quote '"' indicates a string argument. If we
|
---|
171 | # find a double quote, the match fails and we strip nothing.
|
---|
172 | )
|
---|
173 | \s*\# # Any amount of whitespace plus the comment marker...
|
---|
174 | .*$ # ...which carries on to end-of-string.
|
---|
175 | }{$1}sx; # Keep only the instruction and optional argument.
|
---|
176 | return $stmt;
|
---|
177 | }
|
---|
178 |
|
---|
179 | # create the ByteCode header: magic, archname, ByteLoader $VERSION, ivsize,
|
---|
180 | # ptrsize, byteorder
|
---|
181 | # nvtype is irrelevant (floats are stored as strings)
|
---|
182 | # byteorder is strconst not U32 because of varying size issues
|
---|
183 |
|
---|
184 | sub gen_header {
|
---|
185 | my $header = "";
|
---|
186 |
|
---|
187 | $header .= B::Asmdata::PUT_U32(0x43424c50); # 'PLBC'
|
---|
188 | $header .= B::Asmdata::PUT_strconst('"' . $Config{archname}. '"');
|
---|
189 | $header .= B::Asmdata::PUT_strconst(qq["$ByteLoader::VERSION"]);
|
---|
190 | $header .= B::Asmdata::PUT_U32($Config{ivsize});
|
---|
191 | $header .= B::Asmdata::PUT_U32($Config{ptrsize});
|
---|
192 | $header;
|
---|
193 | }
|
---|
194 |
|
---|
195 | sub parse_statement {
|
---|
196 | my $stmt = shift;
|
---|
197 | my ($insn, $arg) = $stmt =~ m{
|
---|
198 | ^\s* # allow (but ignore) leading whitespace
|
---|
199 | (.*?) # Instruction continues up until...
|
---|
200 | (?: # ...an optional whitespace+argument group
|
---|
201 | \s+ # first whitespace.
|
---|
202 | (.*) # The argument is all the rest (newlines included).
|
---|
203 | )?$ # anchor at end-of-line
|
---|
204 | }sx;
|
---|
205 | if (defined($arg)) {
|
---|
206 | if ($arg =~ s/^0x(?=[0-9a-fA-F]+$)//) {
|
---|
207 | $arg = hex($arg);
|
---|
208 | } elsif ($arg =~ s/^0(?=[0-7]+$)//) {
|
---|
209 | $arg = oct($arg);
|
---|
210 | } elsif ($arg =~ /^pp_/) {
|
---|
211 | $arg =~ s/\s*$//; # strip trailing whitespace
|
---|
212 | my $opnum = $opnumber{$arg};
|
---|
213 | if (defined($opnum)) {
|
---|
214 | $arg = $opnum;
|
---|
215 | } else {
|
---|
216 | error qq(No such op type "$arg");
|
---|
217 | $arg = 0;
|
---|
218 | }
|
---|
219 | }
|
---|
220 | }
|
---|
221 | return ($insn, $arg);
|
---|
222 | }
|
---|
223 |
|
---|
224 | sub assemble_insn {
|
---|
225 | my ($insn, $arg) = @_;
|
---|
226 | my $data = $insn_data{$insn};
|
---|
227 | if (defined($data)) {
|
---|
228 | my ($bytecode, $putsub) = @{$data}[0, 1];
|
---|
229 | my $argcode = &$putsub($arg);
|
---|
230 | return chr($bytecode).$argcode;
|
---|
231 | } else {
|
---|
232 | error qq(no such instruction "$insn");
|
---|
233 | return "";
|
---|
234 | }
|
---|
235 | }
|
---|
236 |
|
---|
237 | sub assemble_fh {
|
---|
238 | my ($fh, $out) = @_;
|
---|
239 | my $line;
|
---|
240 | my $asm = newasm($out);
|
---|
241 | while ($line = <$fh>) {
|
---|
242 | assemble($line);
|
---|
243 | }
|
---|
244 | endasm();
|
---|
245 | }
|
---|
246 |
|
---|
247 | sub newasm {
|
---|
248 | my($outsub) = @_;
|
---|
249 |
|
---|
250 | die "Invalid printing routine for B::Assembler\n" unless ref $outsub eq 'CODE';
|
---|
251 | die <<EOD if ref $out;
|
---|
252 | Can't have multiple byteassembly sessions at once!
|
---|
253 | (perhaps you forgot an endasm()?)
|
---|
254 | EOD
|
---|
255 |
|
---|
256 | $linenum = $errors = 0;
|
---|
257 | $out = $outsub;
|
---|
258 |
|
---|
259 | $out->(gen_header());
|
---|
260 | }
|
---|
261 |
|
---|
262 | sub endasm {
|
---|
263 | if ($errors) {
|
---|
264 | die "There were $errors assembly errors\n";
|
---|
265 | }
|
---|
266 | $linenum = $errors = $out = 0;
|
---|
267 | }
|
---|
268 |
|
---|
269 | sub assemble {
|
---|
270 | my($line) = @_;
|
---|
271 | my ($insn, $arg);
|
---|
272 | $linenum++;
|
---|
273 | chomp $line;
|
---|
274 | if ($debug) {
|
---|
275 | my $quotedline = $line;
|
---|
276 | $quotedline =~ s/\\/\\\\/g;
|
---|
277 | $quotedline =~ s/"/\\"/g;
|
---|
278 | $out->(assemble_insn("comment", qq("$quotedline")));
|
---|
279 | }
|
---|
280 | if( $line = strip_comments($line) ){
|
---|
281 | ($insn, $arg) = parse_statement($line);
|
---|
282 | $out->(assemble_insn($insn, $arg));
|
---|
283 | if ($debug) {
|
---|
284 | $out->(assemble_insn("nop", undef));
|
---|
285 | }
|
---|
286 | }
|
---|
287 | }
|
---|
288 |
|
---|
289 | ### temporary workaround
|
---|
290 |
|
---|
291 | sub asm {
|
---|
292 | return if $_[0] =~ /\s*\W/;
|
---|
293 | if (defined $_[1]) {
|
---|
294 | return if $_[1] eq "0" and
|
---|
295 | $_[0] !~ /^(?:newsvx?|av_pushx?|av_extend|xav_flags)$/;
|
---|
296 | return if $_[1] eq "1" and $_[0] =~ /^(?:sv_refcnt)$/;
|
---|
297 | }
|
---|
298 | assemble "@_";
|
---|
299 | }
|
---|
300 |
|
---|
301 | 1;
|
---|
302 |
|
---|
303 | __END__
|
---|
304 |
|
---|
305 | =head1 NAME
|
---|
306 |
|
---|
307 | B::Assembler - Assemble Perl bytecode
|
---|
308 |
|
---|
309 | =head1 SYNOPSIS
|
---|
310 |
|
---|
311 | use B::Assembler qw(newasm endasm assemble);
|
---|
312 | newasm(\&printsub); # sets up for assembly
|
---|
313 | assemble($buf); # assembles one line
|
---|
314 | endasm(); # closes down
|
---|
315 |
|
---|
316 | use B::Assembler qw(assemble_fh);
|
---|
317 | assemble_fh($fh, \&printsub); # assemble everything in $fh
|
---|
318 |
|
---|
319 | =head1 DESCRIPTION
|
---|
320 |
|
---|
321 | See F<ext/B/B/Assembler.pm>.
|
---|
322 |
|
---|
323 | =head1 AUTHORS
|
---|
324 |
|
---|
325 | Malcolm Beattie, C<[email protected]>
|
---|
326 | Per-statement interface by Benjamin Stuhl, C<[email protected]>
|
---|
327 |
|
---|
328 | =cut
|
---|