source: trunk/gsdl/packages/wv-0.5.44-gs/src/hyperlink.c@ 1468

Last change on this file since 1468 was 1468, checked in by paynter, 24 years ago

The wv Packages from www.wvware.com is used to convert Word documents into
HTML. This is an adaptation of wv version 0.5.44 for greenstone; it is
called by the gsConvert.pl script.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.2 KB
Line 
1#include <stdio.h>
2#include <stdlib.h>
3#include <ctype.h>
4#include <string.h>
5#include <time.h>
6#include "wv.h"
7
8extern FILE *erroroutput;
9extern FILE *outputfile;
10
11extern long int cp;
12extern long int realcp;
13
14/*
15this attempts to parse the HYPERLINK field that ms uses to encode
16hyperlink information
17*/
18U16 *decode_hyperlink(int letter, unsigned long int *swallowcp1, unsigned long int *swallowcp2, U16 **deleteme)
19 {
20 /* a little state machine then */
21 /*
22 1) a hyperlink may or may not start with a space,
23 2) then a word HYPERLINK, though sometimes in another language i believe,
24 3) then another space,
25 4) after this usually theres a link
26 5) followed by an optional final space and an optional 0x01
27 but 4 might start with a space then \l then a space then a string then a
28 space.
29 --
30 so we need to get the full len of what we're dealing with, read it all in
31 then strip out the HYPERLINK portion, scan for a " \l ", remove it, also
32 remove any "0x01" and output the remainder.
33
34 as this eats the 0x14 we have to manually do the > in the calling function.
35 */
36
37 /*if these are zero initialize them from the input*/
38 static long int from=-1;
39 static long to=-1;
40 static int no;
41 static U16 *array;
42 U16 *begin,*begin2;
43 static int state;
44
45
46 error(erroroutput,"incoming, letter is %c %x\n",letter,letter);
47
48 if (from == -1)
49 {
50 from = *swallowcp1;
51 to = *swallowcp2;
52 array = (U16 *) malloc (sizeof(U16) * ((to+1)-from));
53 if (array==NULL)
54 {
55 error(erroroutput,"no mem for hyperlink\n");
56 exit(-1);
57 }
58 }
59
60 switch(state)
61 {
62 case 0:
63 state=1;
64 break;
65 case 1:
66 if (letter == 0x20)
67 state=2;
68 break;
69 case 2:
70 if (letter != 0x01)
71 array[no++] = letter;
72 else
73 {
74 state=3;
75 if (letter != 0x14)
76 break;
77 else
78 no--;
79 }
80 /*let 0x14 fall into the next state*/
81 case 3:
82 if ((letter == 0x14) || (letter == 0x15))
83 {
84 state=0;
85 no--;
86 if (array[no] == ' ')
87 no--;
88 array[no+1] = '\0';
89 /*
90 error(erroroutput,"the current string is <!--%s-->\n",array);
91 */
92 begin = array;
93 /*analyse this string looking for a backslash l, which for now
94 we will assume is always found as a space a blackslask an l and a space*/
95 if (array[0] == ' ')
96 if (array[1] == '\\')
97 if (array[2] == 'l')
98 if (array[3] == ' ')
99 {
100 begin = array+3;
101 if (array[4] == '\"')
102 {
103 *begin='\"';
104 *(begin+1)='#';
105 }
106 else
107 *begin='#';
108 }
109 error(erroroutput,"the current string is <!--");
110 begin2 = begin;
111 while (*begin2 != '\0')
112 error(erroroutput,"%c",*begin2++);
113 error(erroroutput,"-->\n");
114
115 from=-1;
116 to=-1;
117 no=0;
118 *deleteme = array;
119 return(begin);
120 }
121 break;
122 }
123
124 return(NULL);
125 }
126
127/*
128this attempts to parse the REF field that ms uses to encode
129crosslink information
130*/
131U16 *decode_crosslink(int letter,unsigned long int *swallowcp1, unsigned long int *swallowcp2)
132 {
133 /* a little state machine then */
134 /*
135 1) a reference may or may not start with a space,
136 2) then a word PAGEREF, though ill not assume this
137 3) then another space
138 4) after this theres the name of the bookmark
139 5) followed by a space
140 6) there may be a few flags here e.g [\h] [\p] followed by
141 a space.
142 7) probably terminating 0x01
143 --
144 so we need to get the full len of what we're dealing with, read it all in
145 then strip out the HYPERLINK portion, scan for a " \l ", remove it, also
146 remove any "0x01" and output the remainder.
147
148 as this eats the 0x14 we have to manually do the > in the calling function.
149 */
150
151 static int no=0;
152 static int state;
153
154 static long int from=-1;
155 static long int to=-1;
156 static U16 *array;
157
158 if (from == -1)
159 {
160 from = *swallowcp1;
161 to = *swallowcp2;
162 error(erroroutput,"a mallocing %d\n",(to+1)-from);
163 array = (U16 *) malloc (sizeof(U16) * ((to+1)-from));
164 if (array==NULL)
165 {
166 error(erroroutput,"no mem for hyperlink\n");
167 exit(-1);
168 }
169 }
170
171 switch(state)
172 {
173 case 0:
174 state=1;
175 break;
176 case 1:
177 if (letter == 0x20)
178 state=2;
179 break;
180 case 2:
181 if ((letter != 0x20) && (letter != 0x01) && (letter != 0x14))
182 array[no++] = letter;
183 else
184 {
185 state=0;
186 from=-1;
187 to=-1;
188 array[no] = '\0';
189 no=0;
190 return(array);
191 }
192 break;
193 }
194 return(NULL);
195 }
Note: See TracBrowser for help on using the repository browser.