source: trunk/indexers/mg/src/images/extractor.c@ 3745

Last change on this file since 3745 was 3745, checked in by mdewsnip, 21 years ago

Addition of MG package for search and retrieval

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.9 KB
Line 
1/**************************************************************************
2 *
3 * extractor.c -- Functions related to extracting marks
4 * Copyright (C) 1994 Stuart Inglis
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: extractor.c 3745 2003-02-20 21:20:24Z mdewsnip $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25
26#include "extractor.h"
27#include "pbmtools.h"
28#include "marklist.h"
29#include "utils.h"
30
31
32/*
33 'splitjoined' defines whether the extractor modules tries to break
34 "seemingly" merged/joined marks.
35 */
36int splitjoined = 1;
37
38#define MAX_FILL_STACK_SIZE 10000
39
40/* RunRegionFill uses line conherence to implement a better fill
41 routine. The stack has currently got 10000 elements, although the
42 biggest I have encountered used a maximum stack size of 1294. usage:
43 RunRegionFill(bitmap,mark,i,j,xpos,ypos); xpos and ypos are the top
44 left corners of the mark. */
45
46int maxst = 0, st = 0;
47struct
48 {
49 int x, y;
50 }
51a[MAX_FILL_STACK_SIZE];
52
53#define push(i,j) a[st].x=i;a[st].y=j;st++;if(st>MAX_FILL_STACK_SIZE){error_msg ("extractor/flood_fill","woooo! Fill stack overflow!","");}
54#define pop(i,j) st--;i=a[st].x;j=a[st].y
55
56void
57RunRegionFill (Pixel ** bitmap, marktype * mark, int i, int j, int xl, int yt, int cols, int rows)
58{
59 int l, r, t, start_up, start_down;
60
61 push (i, j);
62 while (st > 0)
63 {
64 if (st > maxst)
65 maxst = st;
66
67 pop (l, j);
68 r = l;
69
70 while (pbm_getpixel (bitmap, l - 1, j))
71 l--;
72 while (pbm_getpixel (bitmap, r + 1, j))
73 r++;
74 start_up = start_down = 1;
75 for (t = min (r + 1, cols - 1); t >= max (l - 1, 0); t--)
76 {
77 if ((t >= l) && (t <= r))
78 {
79 pbm_putpixel (bitmap, t, j, 0);
80
81 if (mark)
82 pbm_putpixel (mark->bitmap, t - xl, j - yt, 1);
83 }
84
85 if ((j - 1 >= 0) && (pbm_getpixel (bitmap, t, j - 1)))
86 {
87 if (start_up)
88 {
89 push (t, j - 1);
90 start_up = 0;
91 }
92 }
93 else
94 start_up = 1;
95 if ((j + 1 < rows) && (pbm_getpixel (bitmap, t, j + 1)))
96 {
97 if (start_down)
98 {
99 push (t, j + 1);
100 start_down = 0;
101 }
102 }
103 else
104 start_down = 1;
105 }
106 }
107}
108
109
110
111
112/*
113
114 ExtractNextMark starts at (tx,ty) and looks for a set (==1) pixel.
115 The mark is then boundary traced to give an outline {The process is a
116 4-pixel exterior check -- giving 8 way connectivity}, and then
117 RunRegionFill is used to remove the mark and set the pixels in the
118 mark.
119
120 usage: ExtractNextMark(bitmap,&marklist,&startx,&starty,int cols,int rows);
121 Defines:
122 gp - gets a pixel, but returns 0 for outside regions.
123 boundary_{x,y}comp - returns the {x,y} component of the direction.
124 dir 0=right, 1=down, 2=left, 3=up.
125 */
126
127#define gp(i,j) ( (((i)<0)||((j)<0)||((i)>=cols)||((j)>=rows))? 0 : pbm_getpixel(bitmap,(i),(j)))
128
129#define boundary_xcomp(x) (x==0?1:(x==2?-1 : 0))
130#define boundary_ycomp(x) (x==1?1:(x==3?-1 : 0))
131
132int
133ExtractNextMark (Pixel ** bitmap, marklistptr * marklist, int *tx, int *ty, int cols, int rows)
134{
135 int i, j, xl, xr, yt, yb;
136 register int dir, tempdir;
137 marktype d;
138
139 xl = INT_MAX;
140 xr = (-1);
141 yt = INT_MAX;
142 yb = (-1);
143
144 i = *tx;
145 j = *ty;
146 if ((i < 0) || (j < 0))
147 error_msg ("ExtractNextMark()", "Stupid values for x,y in findnextmark()", "");
148 while (!pbm_getpixel (bitmap, i, j))
149 {
150 i++;
151 if (i >= cols)
152 {
153 i = 0;
154 j++;
155 if (j >= rows)
156 return 0;
157 }
158 }
159
160 *tx = i;
161 *ty = j;
162 dir = 0;
163 j = j - 1; /* start above */
164 do
165 {
166 if (i > xr)
167 xr = i - 1;
168 if (i < xl)
169 xl = i + 1;
170 if (j < yt)
171 yt = j + 1;
172 if (j > yb)
173 yb = j - 1;
174 tempdir = (dir + 1) % 4;
175 if (gp (i + boundary_xcomp (tempdir), j + boundary_ycomp (tempdir)) == 0)
176 dir = tempdir;
177 else
178 {
179 tempdir = dir;
180 if (gp (i + boundary_xcomp (tempdir), j + boundary_ycomp (tempdir)) == 0)
181 dir = tempdir;
182 else
183 {
184 tempdir = (dir + 3) % 4;
185 if (gp (i + boundary_xcomp (tempdir), j + boundary_ycomp (tempdir)) == 0)
186 dir = tempdir;
187 else
188 dir = (dir + 2) % 4;
189 }
190 }
191 i += boundary_xcomp (dir);
192 j += boundary_ycomp (dir);
193 }
194 while ((i != (*tx)) || (j != (*ty - 1)));
195
196 d.w = (xr - xl + 1);
197 d.h = (yb - yt + 1);
198 d.xpos = xl;
199 d.ypos = yt;
200 d.bitmap = pbm_allocarray (d.w, d.h);
201
202 RunRegionFill (bitmap, &d, *tx, *ty, xl, yt, cols, rows);
203
204 marktype_calc_features (&d, "?");
205 marklist_add (marklist, d);
206
207 *tx = *tx + 1;
208 return 1;
209}
210
211
212
213
214static void
215normalise (float *p, int w)
216{
217 int x;
218 float mx;
219
220 mx = p[0];
221 for (x = 0; x < w; x++)
222 if (p[x] > mx)
223 mx = p[x];
224 if (mx != 0)
225 {
226 for (x = 0; x < w; x++)
227 p[x] = p[x] / mx;
228 }
229}
230
231
232
233#define MAXPROFILE 1000
234float profile[MAXPROFILE], p2[MAXPROFILE];
235
236
237static int
238possibly_joined (marktype * d)
239{
240 float aspect;
241 int i, n, x, y;
242 int leftable, l2;
243
244
245 aspect = d->w / (float) d->h;
246
247 if ((aspect >= 1.1) || (d->w < MAXPROFILE))
248 {
249 /* examine it more closely */
250
251
252 /* set up profile[] with a vertical profile */
253 for (x = 0; x < d->w; x++)
254 {
255 profile[x] = 0;
256 for (y = 0; y < d->h; y++)
257 if (pbm_getpixel (d->bitmap, x, y))
258 profile[x]++;
259 }
260
261 /* average the profile from (up to) 3 either way */
262 for (x = 0; x < d->w; x++)
263 {
264 n = 0;
265 p2[x] = 0;
266 for (i = x - 3; i <= x + 3; i++)
267 if ((i >= 0) && (i < d->w))
268 {
269 n++;
270 p2[x] += profile[x];
271 }
272 p2[x] = p2[x] / (float) n;
273 }
274 for (x = 0; x < d->w; x++)
275 profile[x] = p2[x];
276
277 /* normalise profile[] */
278 normalise (profile, d->w);
279
280 /* find a little local minimum, with profile <0.25, make p2[] either 0 or 1 */
281 for (x = 0; x < d->w; x++)
282 p2[x] = 0;
283 for (x = 3; x < d->w - 3; x++)
284 {
285 if (((profile[x - 2] - profile[x]) > 0.00) &&
286 ((profile[x + 2] - profile[x]) > 0.00) &&
287 (profile[x] < .25))
288 p2[x] = 1;
289 }
290
291 /* travel rightwards, checking aspect ratio, looking at p2 now */
292 leftable = l2 = 0;
293 for (x = 0; x < d->w; x++)
294 if (p2[x] == 1)
295 {
296 if ((fabs ((abs (x - leftable) / (float) d->h) - 0.7) > 0.3) &&
297 (fabs ((abs (x - l2) / (float) d->h) - 0.7) > 0.3))
298 p2[x] = 0;
299 if (p2[x] == 1)
300 {
301 leftable = l2;
302 l2 = x;
303 }
304 }
305
306 /* travel inwards, checking widths of chars, looking at p2 */
307 /* if a char has an aspect ratio of <= ? kill it */
308 l2 = 0;
309 leftable = d->w - 1;
310 x = 0;
311 y = d->w - 1;
312 while (x <= y)
313 {
314 if (p2[x])
315 {
316 if (abs (x - l2) < max (d->h * 0.6, 5))
317 p2[x] = 0;
318 if (p2[x])
319 l2 = x;
320 }
321 if (p2[y])
322 {
323 if (abs (y - leftable) < max (d->h * 0.6, 5))
324 p2[y] = 0;
325 if (p2[y])
326 leftable = y;
327 }
328 x++;
329 y--;
330 }
331
332
333 for (x = 0; x < d->w; x++)
334 if (p2[x])
335 return 1;
336 }
337 return 0;
338}
339
340
341static void
342split_mark (marklistptr * list, marktype * joinedmark)
343{
344 marktype new, old;
345 int x, found = 0, left = 0;
346 int i, j;
347 int oldw, oldh;
348 int overwrite = 1;
349
350 old = marktype_copy (*joinedmark);
351 oldw = joinedmark->w;
352 oldh = joinedmark->h;
353 for (x = 0; x < old.w; x++)
354 if (p2[x])
355 {
356 found = x;
357 break;
358 }
359 do
360 {
361 /* split at (found) position */
362 new.w = found + 1 - left;
363 new.h = old.h;
364 new.xpos = old.xpos + left;
365 new.ypos = old.ypos;
366 new.bitmap = pbm_allocarray (new.w, new.h);
367
368 for (i = 0; i < new.w; i++)
369 for (j = 0; j < new.h; j++)
370 pbm_putpixel (new.bitmap, i, j, pbm_getpixel (old.bitmap, left + i, j));
371
372 marktype_adj_bound (&new);
373
374 if (overwrite)
375 {
376 overwrite = 0;
377 pbm_freearray (&(joinedmark->bitmap), oldh);
378 *joinedmark = new;
379 }
380 else
381 marklist_add (list, new);
382
383
384 p2[found] = 0;
385 left = found;
386 found = 0;
387 for (x = left; x < old.w; x++)
388 if (p2[x])
389 {
390 found = x;
391 break;
392 }
393 if (left == old.w - 1)
394 break; /* if we've got to the right */
395 if (found == 0)
396 found = old.w - 1;
397 }
398 while (found);
399 pbm_freearray (&(old.bitmap), oldh);
400}
401
402
403
404
405
406void
407ExtractAllMarks (Pixel ** bitmap, marklistptr * list, int cols, int rows)
408{
409 int startx, starty, count;
410 marklistptr step;
411
412 startx = starty = 0;
413 while (ExtractNextMark (bitmap, list, &startx, &starty, cols, rows))
414 ;
415
416 if (splitjoined)
417 for (step = (*list), count = 0; step; step = step->next, count++)
418 if (possibly_joined (&(step->data)))
419 split_mark (list, &(step->data));
420
421 for (step = (*list), count = 0; step; step = step->next, count++)
422 step->data.symnum = count;
423}
Note: See TracBrowser for help on using the repository browser.