1 | #include <stdio.h>
|
---|
2 | #include <stdlib.h>
|
---|
3 | #include <ctype.h>
|
---|
4 | #include <string.h>
|
---|
5 | #include <time.h>
|
---|
6 | #include "wv.h"
|
---|
7 |
|
---|
8 | extern FILE *erroroutput;
|
---|
9 | extern FILE *outputfile;
|
---|
10 |
|
---|
11 | extern long int cp;
|
---|
12 | extern long int realcp;
|
---|
13 |
|
---|
14 | /*
|
---|
15 | this attempts to parse the HYPERLINK field that ms uses to encode
|
---|
16 | hyperlink information
|
---|
17 | */
|
---|
18 | U16 *decode_hyperlink(int letter, unsigned long int *swallowcp1, unsigned long int *swallowcp2, U16 **deleteme)
|
---|
19 | {
|
---|
20 | /* a little state machine then */
|
---|
21 | /*
|
---|
22 | 1) a hyperlink may or may not start with a space,
|
---|
23 | 2) then a word HYPERLINK, though sometimes in another language i believe,
|
---|
24 | 3) then another space,
|
---|
25 | 4) after this usually theres a link
|
---|
26 | 5) followed by an optional final space and an optional 0x01
|
---|
27 | but 4 might start with a space then \l then a space then a string then a
|
---|
28 | space.
|
---|
29 | --
|
---|
30 | so we need to get the full len of what we're dealing with, read it all in
|
---|
31 | then strip out the HYPERLINK portion, scan for a " \l ", remove it, also
|
---|
32 | remove any "0x01" and output the remainder.
|
---|
33 |
|
---|
34 | as this eats the 0x14 we have to manually do the > in the calling function.
|
---|
35 | */
|
---|
36 |
|
---|
37 | /*if these are zero initialize them from the input*/
|
---|
38 | static long int from=-1;
|
---|
39 | static long to=-1;
|
---|
40 | static int no;
|
---|
41 | static U16 *array;
|
---|
42 | U16 *begin,*begin2;
|
---|
43 | static int state;
|
---|
44 |
|
---|
45 |
|
---|
46 | error(erroroutput,"incoming, letter is %c %x\n",letter,letter);
|
---|
47 |
|
---|
48 | if (from == -1)
|
---|
49 | {
|
---|
50 | from = *swallowcp1;
|
---|
51 | to = *swallowcp2;
|
---|
52 | array = (U16 *) malloc (sizeof(U16) * ((to+1)-from));
|
---|
53 | if (array==NULL)
|
---|
54 | {
|
---|
55 | error(erroroutput,"no mem for hyperlink\n");
|
---|
56 | exit(-1);
|
---|
57 | }
|
---|
58 | }
|
---|
59 |
|
---|
60 | switch(state)
|
---|
61 | {
|
---|
62 | case 0:
|
---|
63 | state=1;
|
---|
64 | break;
|
---|
65 | case 1:
|
---|
66 | if (letter == 0x20)
|
---|
67 | state=2;
|
---|
68 | break;
|
---|
69 | case 2:
|
---|
70 | if (letter != 0x01)
|
---|
71 | array[no++] = letter;
|
---|
72 | else
|
---|
73 | {
|
---|
74 | state=3;
|
---|
75 | if (letter != 0x14)
|
---|
76 | break;
|
---|
77 | else
|
---|
78 | no--;
|
---|
79 | }
|
---|
80 | /*let 0x14 fall into the next state*/
|
---|
81 | case 3:
|
---|
82 | if ((letter == 0x14) || (letter == 0x15))
|
---|
83 | {
|
---|
84 | state=0;
|
---|
85 | no--;
|
---|
86 | if (array[no] == ' ')
|
---|
87 | no--;
|
---|
88 | array[no+1] = '\0';
|
---|
89 | /*
|
---|
90 | error(erroroutput,"the current string is <!--%s-->\n",array);
|
---|
91 | */
|
---|
92 | begin = array;
|
---|
93 | /*analyse this string looking for a backslash l, which for now
|
---|
94 | we will assume is always found as a space a blackslask an l and a space*/
|
---|
95 | if (array[0] == ' ')
|
---|
96 | if (array[1] == '\\')
|
---|
97 | if (array[2] == 'l')
|
---|
98 | if (array[3] == ' ')
|
---|
99 | {
|
---|
100 | begin = array+3;
|
---|
101 | if (array[4] == '\"')
|
---|
102 | {
|
---|
103 | *begin='\"';
|
---|
104 | *(begin+1)='#';
|
---|
105 | }
|
---|
106 | else
|
---|
107 | *begin='#';
|
---|
108 | }
|
---|
109 | error(erroroutput,"the current string is <!--");
|
---|
110 | begin2 = begin;
|
---|
111 | while (*begin2 != '\0')
|
---|
112 | error(erroroutput,"%c",*begin2++);
|
---|
113 | error(erroroutput,"-->\n");
|
---|
114 |
|
---|
115 | from=-1;
|
---|
116 | to=-1;
|
---|
117 | no=0;
|
---|
118 | *deleteme = array;
|
---|
119 | return(begin);
|
---|
120 | }
|
---|
121 | break;
|
---|
122 | }
|
---|
123 |
|
---|
124 | return(NULL);
|
---|
125 | }
|
---|
126 |
|
---|
127 | /*
|
---|
128 | this attempts to parse the REF field that ms uses to encode
|
---|
129 | crosslink information
|
---|
130 | */
|
---|
131 | U16 *decode_crosslink(int letter,unsigned long int *swallowcp1, unsigned long int *swallowcp2)
|
---|
132 | {
|
---|
133 | /* a little state machine then */
|
---|
134 | /*
|
---|
135 | 1) a reference may or may not start with a space,
|
---|
136 | 2) then a word PAGEREF, though ill not assume this
|
---|
137 | 3) then another space
|
---|
138 | 4) after this theres the name of the bookmark
|
---|
139 | 5) followed by a space
|
---|
140 | 6) there may be a few flags here e.g [\h] [\p] followed by
|
---|
141 | a space.
|
---|
142 | 7) probably terminating 0x01
|
---|
143 | --
|
---|
144 | so we need to get the full len of what we're dealing with, read it all in
|
---|
145 | then strip out the HYPERLINK portion, scan for a " \l ", remove it, also
|
---|
146 | remove any "0x01" and output the remainder.
|
---|
147 |
|
---|
148 | as this eats the 0x14 we have to manually do the > in the calling function.
|
---|
149 | */
|
---|
150 |
|
---|
151 | static int no=0;
|
---|
152 | static int state;
|
---|
153 |
|
---|
154 | static long int from=-1;
|
---|
155 | static long int to=-1;
|
---|
156 | static U16 *array;
|
---|
157 |
|
---|
158 | if (from == -1)
|
---|
159 | {
|
---|
160 | from = *swallowcp1;
|
---|
161 | to = *swallowcp2;
|
---|
162 | error(erroroutput,"a mallocing %d\n",(to+1)-from);
|
---|
163 | array = (U16 *) malloc (sizeof(U16) * ((to+1)-from));
|
---|
164 | if (array==NULL)
|
---|
165 | {
|
---|
166 | error(erroroutput,"no mem for hyperlink\n");
|
---|
167 | exit(-1);
|
---|
168 | }
|
---|
169 | }
|
---|
170 |
|
---|
171 | switch(state)
|
---|
172 | {
|
---|
173 | case 0:
|
---|
174 | state=1;
|
---|
175 | break;
|
---|
176 | case 1:
|
---|
177 | if (letter == 0x20)
|
---|
178 | state=2;
|
---|
179 | break;
|
---|
180 | case 2:
|
---|
181 | if ((letter != 0x20) && (letter != 0x01) && (letter != 0x14))
|
---|
182 | array[no++] = letter;
|
---|
183 | else
|
---|
184 | {
|
---|
185 | state=0;
|
---|
186 | from=-1;
|
---|
187 | to=-1;
|
---|
188 | array[no] = '\0';
|
---|
189 | no=0;
|
---|
190 | return(array);
|
---|
191 | }
|
---|
192 | break;
|
---|
193 | }
|
---|
194 | return(NULL);
|
---|
195 | }
|
---|