source: other-projects/trunk/gs3-release-maker/apache-ant-1.6.5/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java@ 14627

Last change on this file since 14627 was 14627, checked in by oranfry, 17 years ago

initial import of the gs3-release-maker

File size: 46.5 KB
Line 
1/*
2 * Copyright 2001-2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18/*
19 * This package is based on the work done by Keiron Liddle, Aftex Software
20 * <[email protected]> to whom the Ant project is very grateful for his
21 * great code.
22 */
23
24package org.apache.tools.bzip2;
25
26import java.io.OutputStream;
27import java.io.IOException;
28
29/**
30 * An output stream that compresses into the BZip2 format (without the file
31 * header chars) into another stream.
32 *
33 *
34 * TODO: Update to BZip2 1.0.1
35 */
36public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
37 protected static final int SETMASK = (1 << 21);
38 protected static final int CLEARMASK = (~SETMASK);
39 protected static final int GREATER_ICOST = 15;
40 protected static final int LESSER_ICOST = 0;
41 protected static final int SMALL_THRESH = 20;
42 protected static final int DEPTH_THRESH = 10;
43
44 /*
45 If you are ever unlucky/improbable enough
46 to get a stack overflow whilst sorting,
47 increase the following constant and try
48 again. In practice I have never seen the
49 stack go above 27 elems, so the following
50 limit seems very generous.
51 */
52 protected static final int QSORT_STACK_SIZE = 1000;
53
54 private static void panic() {
55 System.out.println("panic");
56 //throw new CError();
57 }
58
59 private void makeMaps() {
60 int i;
61 nInUse = 0;
62 for (i = 0; i < 256; i++) {
63 if (inUse[i]) {
64 seqToUnseq[nInUse] = (char) i;
65 unseqToSeq[i] = (char) nInUse;
66 nInUse++;
67 }
68 }
69 }
70
71 protected static void hbMakeCodeLengths(char[] len, int[] freq,
72 int alphaSize, int maxLen) {
73 /*
74 Nodes and heap entries run from 1. Entry 0
75 for both the heap and nodes is a sentinel.
76 */
77 int nNodes, nHeap, n1, n2, i, j, k;
78 boolean tooLong;
79
80 int[] heap = new int[MAX_ALPHA_SIZE + 2];
81 int[] weight = new int[MAX_ALPHA_SIZE * 2];
82 int[] parent = new int[MAX_ALPHA_SIZE * 2];
83
84 for (i = 0; i < alphaSize; i++) {
85 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
86 }
87
88 while (true) {
89 nNodes = alphaSize;
90 nHeap = 0;
91
92 heap[0] = 0;
93 weight[0] = 0;
94 parent[0] = -2;
95
96 for (i = 1; i <= alphaSize; i++) {
97 parent[i] = -1;
98 nHeap++;
99 heap[nHeap] = i;
100 {
101 int zz, tmp;
102 zz = nHeap;
103 tmp = heap[zz];
104 while (weight[tmp] < weight[heap[zz >> 1]]) {
105 heap[zz] = heap[zz >> 1];
106 zz >>= 1;
107 }
108 heap[zz] = tmp;
109 }
110 }
111 if (!(nHeap < (MAX_ALPHA_SIZE + 2))) {
112 panic();
113 }
114
115 while (nHeap > 1) {
116 n1 = heap[1];
117 heap[1] = heap[nHeap];
118 nHeap--;
119 {
120 int zz = 0, yy = 0, tmp = 0;
121 zz = 1;
122 tmp = heap[zz];
123 while (true) {
124 yy = zz << 1;
125 if (yy > nHeap) {
126 break;
127 }
128 if (yy < nHeap
129 && weight[heap[yy + 1]] < weight[heap[yy]]) {
130 yy++;
131 }
132 if (weight[tmp] < weight[heap[yy]]) {
133 break;
134 }
135 heap[zz] = heap[yy];
136 zz = yy;
137 }
138 heap[zz] = tmp;
139 }
140 n2 = heap[1];
141 heap[1] = heap[nHeap];
142 nHeap--;
143 {
144 int zz = 0, yy = 0, tmp = 0;
145 zz = 1;
146 tmp = heap[zz];
147 while (true) {
148 yy = zz << 1;
149 if (yy > nHeap) {
150 break;
151 }
152 if (yy < nHeap
153 && weight[heap[yy + 1]] < weight[heap[yy]]) {
154 yy++;
155 }
156 if (weight[tmp] < weight[heap[yy]]) {
157 break;
158 }
159 heap[zz] = heap[yy];
160 zz = yy;
161 }
162 heap[zz] = tmp;
163 }
164 nNodes++;
165 parent[n1] = parent[n2] = nNodes;
166
167 weight[nNodes] = ((weight[n1] & 0xffffff00)
168 + (weight[n2] & 0xffffff00))
169 | (1 + (((weight[n1] & 0x000000ff)
170 > (weight[n2] & 0x000000ff))
171 ? (weight[n1] & 0x000000ff)
172 : (weight[n2] & 0x000000ff)));
173
174 parent[nNodes] = -1;
175 nHeap++;
176 heap[nHeap] = nNodes;
177 {
178 int zz = 0, tmp = 0;
179 zz = nHeap;
180 tmp = heap[zz];
181 while (weight[tmp] < weight[heap[zz >> 1]]) {
182 heap[zz] = heap[zz >> 1];
183 zz >>= 1;
184 }
185 heap[zz] = tmp;
186 }
187 }
188 if (!(nNodes < (MAX_ALPHA_SIZE * 2))) {
189 panic();
190 }
191
192 tooLong = false;
193 for (i = 1; i <= alphaSize; i++) {
194 j = 0;
195 k = i;
196 while (parent[k] >= 0) {
197 k = parent[k];
198 j++;
199 }
200 len[i - 1] = (char) j;
201 if (j > maxLen) {
202 tooLong = true;
203 }
204 }
205
206 if (!tooLong) {
207 break;
208 }
209
210 for (i = 1; i < alphaSize; i++) {
211 j = weight[i] >> 8;
212 j = 1 + (j / 2);
213 weight[i] = j << 8;
214 }
215 }
216 }
217
218 /*
219 index of the last char in the block, so
220 the block size == last + 1.
221 */
222 int last;
223
224 /*
225 index in zptr[] of original string after sorting.
226 */
227 int origPtr;
228
229 /*
230 always: in the range 0 .. 9.
231 The current block size is 100000 * this number.
232 */
233 int blockSize100k;
234
235 boolean blockRandomised;
236
237 int bytesOut;
238 int bsBuff;
239 int bsLive;
240 CRC mCrc = new CRC();
241
242 private boolean[] inUse = new boolean[256];
243 private int nInUse;
244
245 private char[] seqToUnseq = new char[256];
246 private char[] unseqToSeq = new char[256];
247
248 private char[] selector = new char[MAX_SELECTORS];
249 private char[] selectorMtf = new char[MAX_SELECTORS];
250
251 private char[] block;
252 private int[] quadrant;
253 private int[] zptr;
254 private short[] szptr;
255 private int[] ftab;
256
257 private int nMTF;
258
259 private int[] mtfFreq = new int[MAX_ALPHA_SIZE];
260
261 /*
262 * Used when sorting. If too many long comparisons
263 * happen, we stop sorting, randomise the block
264 * slightly, and try again.
265 */
266 private int workFactor;
267 private int workDone;
268 private int workLimit;
269 private boolean firstAttempt;
270 private int nBlocksRandomised;
271
272 private int currentChar = -1;
273 private int runLength = 0;
274
275 public CBZip2OutputStream(OutputStream inStream) throws IOException {
276 this(inStream, 9);
277 }
278
279 public CBZip2OutputStream(OutputStream inStream, int inBlockSize)
280 throws IOException {
281 block = null;
282 quadrant = null;
283 zptr = null;
284 ftab = null;
285
286 bsSetStream(inStream);
287
288 workFactor = 50;
289 if (inBlockSize > 9) {
290 inBlockSize = 9;
291 }
292 if (inBlockSize < 1) {
293 inBlockSize = 1;
294 }
295 blockSize100k = inBlockSize;
296 allocateCompressStructures();
297 initialize();
298 initBlock();
299 }
300
301 /**
302 *
303 * modified by Oliver Merkel, 010128
304 *
305 */
306 public void write(int bv) throws IOException {
307 int b = (256 + bv) % 256;
308 if (currentChar != -1) {
309 if (currentChar == b) {
310 runLength++;
311 if (runLength > 254) {
312 writeRun();
313 currentChar = -1;
314 runLength = 0;
315 }
316 } else {
317 writeRun();
318 runLength = 1;
319 currentChar = b;
320 }
321 } else {
322 currentChar = b;
323 runLength++;
324 }
325 }
326
327 private void writeRun() throws IOException {
328 if (last < allowableBlockSize) {
329 inUse[currentChar] = true;
330 for (int i = 0; i < runLength; i++) {
331 mCrc.updateCRC((char) currentChar);
332 }
333 switch (runLength) {
334 case 1:
335 last++;
336 block[last + 1] = (char) currentChar;
337 break;
338 case 2:
339 last++;
340 block[last + 1] = (char) currentChar;
341 last++;
342 block[last + 1] = (char) currentChar;
343 break;
344 case 3:
345 last++;
346 block[last + 1] = (char) currentChar;
347 last++;
348 block[last + 1] = (char) currentChar;
349 last++;
350 block[last + 1] = (char) currentChar;
351 break;
352 default:
353 inUse[runLength - 4] = true;
354 last++;
355 block[last + 1] = (char) currentChar;
356 last++;
357 block[last + 1] = (char) currentChar;
358 last++;
359 block[last + 1] = (char) currentChar;
360 last++;
361 block[last + 1] = (char) currentChar;
362 last++;
363 block[last + 1] = (char) (runLength - 4);
364 break;
365 }
366 } else {
367 endBlock();
368 initBlock();
369 writeRun();
370 }
371 }
372
373 boolean closed = false;
374
375 protected void finalize() throws Throwable {
376 close();
377 super.finalize();
378 }
379
380 public void close() throws IOException {
381 if (closed) {
382 return;
383 }
384
385 if (runLength > 0) {
386 writeRun();
387 }
388 currentChar = -1;
389 endBlock();
390 endCompression();
391 closed = true;
392 super.close();
393 bsStream.close();
394 }
395
396 public void flush() throws IOException {
397 super.flush();
398 bsStream.flush();
399 }
400
401 private int blockCRC, combinedCRC;
402
403 private void initialize() throws IOException {
404 bytesOut = 0;
405 nBlocksRandomised = 0;
406
407 /* Write `magic' bytes h indicating file-format == huffmanised,
408 followed by a digit indicating blockSize100k.
409 */
410 bsPutUChar('h');
411 bsPutUChar('0' + blockSize100k);
412
413 combinedCRC = 0;
414 }
415
416 private int allowableBlockSize;
417
418 private void initBlock() {
419 // blockNo++;
420 mCrc.initialiseCRC();
421 last = -1;
422 // ch = 0;
423
424 for (int i = 0; i < 256; i++) {
425 inUse[i] = false;
426 }
427
428 /* 20 is just a paranoia constant */
429 allowableBlockSize = baseBlockSize * blockSize100k - 20;
430 }
431
432 private void endBlock() throws IOException {
433 blockCRC = mCrc.getFinalCRC();
434 combinedCRC = (combinedCRC << 1) | (combinedCRC >>> 31);
435 combinedCRC ^= blockCRC;
436
437 /* sort the block and establish posn of original string */
438 doReversibleTransformation();
439
440 /*
441 A 6-byte block header, the value chosen arbitrarily
442 as 0x314159265359 :-). A 32 bit value does not really
443 give a strong enough guarantee that the value will not
444 appear by chance in the compressed datastream. Worst-case
445 probability of this event, for a 900k block, is about
446 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
447 For a compressed file of size 100Gb -- about 100000 blocks --
448 only a 48-bit marker will do. NB: normal compression/
449 decompression do *not* rely on these statistical properties.
450 They are only important when trying to recover blocks from
451 damaged files.
452 */
453 bsPutUChar(0x31);
454 bsPutUChar(0x41);
455 bsPutUChar(0x59);
456 bsPutUChar(0x26);
457 bsPutUChar(0x53);
458 bsPutUChar(0x59);
459
460 /* Now the block's CRC, so it is in a known place. */
461 bsPutint(blockCRC);
462
463 /* Now a single bit indicating randomisation. */
464 if (blockRandomised) {
465 bsW(1, 1);
466 nBlocksRandomised++;
467 } else {
468 bsW(1, 0);
469 }
470
471 /* Finally, block's contents proper. */
472 moveToFrontCodeAndSend();
473 }
474
475 private void endCompression() throws IOException {
476 /*
477 Now another magic 48-bit number, 0x177245385090, to
478 indicate the end of the last block. (sqrt(pi), if
479 you want to know. I did want to use e, but it contains
480 too much repetition -- 27 18 28 18 28 46 -- for me
481 to feel statistically comfortable. Call me paranoid.)
482 */
483 bsPutUChar(0x17);
484 bsPutUChar(0x72);
485 bsPutUChar(0x45);
486 bsPutUChar(0x38);
487 bsPutUChar(0x50);
488 bsPutUChar(0x90);
489
490 bsPutint(combinedCRC);
491
492 bsFinishedWithStream();
493 }
494
495 private void hbAssignCodes (int[] code, char[] length, int minLen,
496 int maxLen, int alphaSize) {
497 int n, vec, i;
498
499 vec = 0;
500 for (n = minLen; n <= maxLen; n++) {
501 for (i = 0; i < alphaSize; i++) {
502 if (length[i] == n) {
503 code[i] = vec;
504 vec++;
505 }
506 };
507 vec <<= 1;
508 }
509 }
510
511 private void bsSetStream(OutputStream f) {
512 bsStream = f;
513 bsLive = 0;
514 bsBuff = 0;
515 bytesOut = 0;
516 }
517
518 private void bsFinishedWithStream() throws IOException {
519 while (bsLive > 0) {
520 int ch = (bsBuff >> 24);
521 try {
522 bsStream.write(ch); // write 8-bit
523 } catch (IOException e) {
524 throw e;
525 }
526 bsBuff <<= 8;
527 bsLive -= 8;
528 bytesOut++;
529 }
530 }
531
532 private void bsW(int n, int v) throws IOException {
533 while (bsLive >= 8) {
534 int ch = (bsBuff >> 24);
535 try {
536 bsStream.write(ch); // write 8-bit
537 } catch (IOException e) {
538 throw e;
539 }
540 bsBuff <<= 8;
541 bsLive -= 8;
542 bytesOut++;
543 }
544 bsBuff |= (v << (32 - bsLive - n));
545 bsLive += n;
546 }
547
548 private void bsPutUChar(int c) throws IOException {
549 bsW(8, c);
550 }
551
552 private void bsPutint(int u) throws IOException {
553 bsW(8, (u >> 24) & 0xff);
554 bsW(8, (u >> 16) & 0xff);
555 bsW(8, (u >> 8) & 0xff);
556 bsW(8, u & 0xff);
557 }
558
559 private void bsPutIntVS(int numBits, int c) throws IOException {
560 bsW(numBits, c);
561 }
562
563 private void sendMTFValues() throws IOException {
564 char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
565
566 int v, t, i, j, gs, ge, totc, bt, bc, iter;
567 int nSelectors = 0, alphaSize, minLen, maxLen, selCtr;
568 int nGroups, nBytes;
569
570 alphaSize = nInUse + 2;
571 for (t = 0; t < N_GROUPS; t++) {
572 for (v = 0; v < alphaSize; v++) {
573 len[t][v] = (char) GREATER_ICOST;
574 }
575 }
576
577 /* Decide how many coding tables to use */
578 if (nMTF <= 0) {
579 panic();
580 }
581
582 if (nMTF < 200) {
583 nGroups = 2;
584 } else if (nMTF < 600) {
585 nGroups = 3;
586 } else if (nMTF < 1200) {
587 nGroups = 4;
588 } else if (nMTF < 2400) {
589 nGroups = 5;
590 } else {
591 nGroups = 6;
592 }
593
594 /* Generate an initial set of coding tables */ {
595 int nPart, remF, tFreq, aFreq;
596
597 nPart = nGroups;
598 remF = nMTF;
599 gs = 0;
600 while (nPart > 0) {
601 tFreq = remF / nPart;
602 ge = gs - 1;
603 aFreq = 0;
604 while (aFreq < tFreq && ge < alphaSize - 1) {
605 ge++;
606 aFreq += mtfFreq[ge];
607 }
608
609 if (ge > gs && nPart != nGroups && nPart != 1
610 && ((nGroups - nPart) % 2 == 1)) {
611 aFreq -= mtfFreq[ge];
612 ge--;
613 }
614
615 for (v = 0; v < alphaSize; v++) {
616 if (v >= gs && v <= ge) {
617 len[nPart - 1][v] = (char) LESSER_ICOST;
618 } else {
619 len[nPart - 1][v] = (char) GREATER_ICOST;
620 }
621 }
622
623 nPart--;
624 gs = ge + 1;
625 remF -= aFreq;
626 }
627 }
628
629 int[][] rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE];
630 int[] fave = new int[N_GROUPS];
631 short[] cost = new short[N_GROUPS];
632 /*
633 Iterate up to N_ITERS times to improve the tables.
634 */
635 for (iter = 0; iter < N_ITERS; iter++) {
636 for (t = 0; t < nGroups; t++) {
637 fave[t] = 0;
638 }
639
640 for (t = 0; t < nGroups; t++) {
641 for (v = 0; v < alphaSize; v++) {
642 rfreq[t][v] = 0;
643 }
644 }
645
646 nSelectors = 0;
647 totc = 0;
648 gs = 0;
649 while (true) {
650
651 /* Set group start & end marks. */
652 if (gs >= nMTF) {
653 break;
654 }
655 ge = gs + G_SIZE - 1;
656 if (ge >= nMTF) {
657 ge = nMTF - 1;
658 }
659
660 /*
661 Calculate the cost of this group as coded
662 by each of the coding tables.
663 */
664 for (t = 0; t < nGroups; t++) {
665 cost[t] = 0;
666 }
667
668 if (nGroups == 6) {
669 short cost0, cost1, cost2, cost3, cost4, cost5;
670 cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
671 for (i = gs; i <= ge; i++) {
672 short icv = szptr[i];
673 cost0 += len[0][icv];
674 cost1 += len[1][icv];
675 cost2 += len[2][icv];
676 cost3 += len[3][icv];
677 cost4 += len[4][icv];
678 cost5 += len[5][icv];
679 }
680 cost[0] = cost0;
681 cost[1] = cost1;
682 cost[2] = cost2;
683 cost[3] = cost3;
684 cost[4] = cost4;
685 cost[5] = cost5;
686 } else {
687 for (i = gs; i <= ge; i++) {
688 short icv = szptr[i];
689 for (t = 0; t < nGroups; t++) {
690 cost[t] += len[t][icv];
691 }
692 }
693 }
694
695 /*
696 Find the coding table which is best for this group,
697 and record its identity in the selector table.
698 */
699 bc = 999999999;
700 bt = -1;
701 for (t = 0; t < nGroups; t++) {
702 if (cost[t] < bc) {
703 bc = cost[t];
704 bt = t;
705 }
706 };
707 totc += bc;
708 fave[bt]++;
709 selector[nSelectors] = (char) bt;
710 nSelectors++;
711
712 /*
713 Increment the symbol frequencies for the selected table.
714 */
715 for (i = gs; i <= ge; i++) {
716 rfreq[bt][szptr[i]]++;
717 }
718
719 gs = ge + 1;
720 }
721
722 /*
723 Recompute the tables based on the accumulated frequencies.
724 */
725 for (t = 0; t < nGroups; t++) {
726 hbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20);
727 }
728 }
729
730 rfreq = null;
731 fave = null;
732 cost = null;
733
734 if (!(nGroups < 8)) {
735 panic();
736 }
737 if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / G_SIZE)))) {
738 panic();
739 }
740
741
742 /* Compute MTF values for the selectors. */
743 {
744 char[] pos = new char[N_GROUPS];
745 char ll_i, tmp2, tmp;
746 for (i = 0; i < nGroups; i++) {
747 pos[i] = (char) i;
748 }
749 for (i = 0; i < nSelectors; i++) {
750 ll_i = selector[i];
751 j = 0;
752 tmp = pos[j];
753 while (ll_i != tmp) {
754 j++;
755 tmp2 = tmp;
756 tmp = pos[j];
757 pos[j] = tmp2;
758 }
759 pos[0] = tmp;
760 selectorMtf[i] = (char) j;
761 }
762 }
763
764 int[][] code = new int[N_GROUPS][MAX_ALPHA_SIZE];
765
766 /* Assign actual codes for the tables. */
767 for (t = 0; t < nGroups; t++) {
768 minLen = 32;
769 maxLen = 0;
770 for (i = 0; i < alphaSize; i++) {
771 if (len[t][i] > maxLen) {
772 maxLen = len[t][i];
773 }
774 if (len[t][i] < minLen) {
775 minLen = len[t][i];
776 }
777 }
778 if (maxLen > 20) {
779 panic();
780 }
781 if (minLen < 1) {
782 panic();
783 }
784 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
785 }
786
787 /* Transmit the mapping table. */
788 {
789 boolean[] inUse16 = new boolean[16];
790 for (i = 0; i < 16; i++) {
791 inUse16[i] = false;
792 for (j = 0; j < 16; j++) {
793 if (inUse[i * 16 + j]) {
794 inUse16[i] = true;
795 }
796 }
797 }
798
799 nBytes = bytesOut;
800 for (i = 0; i < 16; i++) {
801 if (inUse16[i]) {
802 bsW(1, 1);
803 } else {
804 bsW(1, 0);
805 }
806 }
807
808 for (i = 0; i < 16; i++) {
809 if (inUse16[i]) {
810 for (j = 0; j < 16; j++) {
811 if (inUse[i * 16 + j]) {
812 bsW(1, 1);
813 } else {
814 bsW(1, 0);
815 }
816 }
817 }
818 }
819
820 }
821
822 /* Now the selectors. */
823 nBytes = bytesOut;
824 bsW (3, nGroups);
825 bsW (15, nSelectors);
826 for (i = 0; i < nSelectors; i++) {
827 for (j = 0; j < selectorMtf[i]; j++) {
828 bsW(1, 1);
829 }
830 bsW(1, 0);
831 }
832
833 /* Now the coding tables. */
834 nBytes = bytesOut;
835
836 for (t = 0; t < nGroups; t++) {
837 int curr = len[t][0];
838 bsW(5, curr);
839 for (i = 0; i < alphaSize; i++) {
840 while (curr < len[t][i]) {
841 bsW(2, 2);
842 curr++; /* 10 */
843 }
844 while (curr > len[t][i]) {
845 bsW(2, 3);
846 curr--; /* 11 */
847 }
848 bsW (1, 0);
849 }
850 }
851
852 /* And finally, the block data proper */
853 nBytes = bytesOut;
854 selCtr = 0;
855 gs = 0;
856 while (true) {
857 if (gs >= nMTF) {
858 break;
859 }
860 ge = gs + G_SIZE - 1;
861 if (ge >= nMTF) {
862 ge = nMTF - 1;
863 }
864 for (i = gs; i <= ge; i++) {
865 bsW(len[selector[selCtr]][szptr[i]],
866 code[selector[selCtr]][szptr[i]]);
867 }
868
869 gs = ge + 1;
870 selCtr++;
871 }
872 if (!(selCtr == nSelectors)) {
873 panic();
874 }
875 }
876
877 private void moveToFrontCodeAndSend () throws IOException {
878 bsPutIntVS(24, origPtr);
879 generateMTFValues();
880 sendMTFValues();
881 }
882
883 private OutputStream bsStream;
884
885 private void simpleSort(int lo, int hi, int d) {
886 int i, j, h, bigN, hp;
887 int v;
888
889 bigN = hi - lo + 1;
890 if (bigN < 2) {
891 return;
892 }
893
894 hp = 0;
895 while (incs[hp] < bigN) {
896 hp++;
897 }
898 hp--;
899
900 for (; hp >= 0; hp--) {
901 h = incs[hp];
902
903 i = lo + h;
904 while (true) {
905 /* copy 1 */
906 if (i > hi) {
907 break;
908 }
909 v = zptr[i];
910 j = i;
911 while (fullGtU(zptr[j - h] + d, v + d)) {
912 zptr[j] = zptr[j - h];
913 j = j - h;
914 if (j <= (lo + h - 1)) {
915 break;
916 }
917 }
918 zptr[j] = v;
919 i++;
920
921 /* copy 2 */
922 if (i > hi) {
923 break;
924 }
925 v = zptr[i];
926 j = i;
927 while (fullGtU(zptr[j - h] + d, v + d)) {
928 zptr[j] = zptr[j - h];
929 j = j - h;
930 if (j <= (lo + h - 1)) {
931 break;
932 }
933 }
934 zptr[j] = v;
935 i++;
936
937 /* copy 3 */
938 if (i > hi) {
939 break;
940 }
941 v = zptr[i];
942 j = i;
943 while (fullGtU(zptr[j - h] + d, v + d)) {
944 zptr[j] = zptr[j - h];
945 j = j - h;
946 if (j <= (lo + h - 1)) {
947 break;
948 }
949 }
950 zptr[j] = v;
951 i++;
952
953 if (workDone > workLimit && firstAttempt) {
954 return;
955 }
956 }
957 }
958 }
959
960 private void vswap(int p1, int p2, int n) {
961 int temp = 0;
962 while (n > 0) {
963 temp = zptr[p1];
964 zptr[p1] = zptr[p2];
965 zptr[p2] = temp;
966 p1++;
967 p2++;
968 n--;
969 }
970 }
971
972 private char med3(char a, char b, char c) {
973 char t;
974 if (a > b) {
975 t = a;
976 a = b;
977 b = t;
978 }
979 if (b > c) {
980 t = b;
981 b = c;
982 c = t;
983 }
984 if (a > b) {
985 b = a;
986 }
987 return b;
988 }
989
990 private static class StackElem {
991 int ll;
992 int hh;
993 int dd;
994 }
995
996 private void qSort3(int loSt, int hiSt, int dSt) {
997 int unLo, unHi, ltLo, gtHi, med, n, m;
998 int sp, lo, hi, d;
999 StackElem[] stack = new StackElem[QSORT_STACK_SIZE];
1000 for (int count = 0; count < QSORT_STACK_SIZE; count++) {
1001 stack[count] = new StackElem();
1002 }
1003
1004 sp = 0;
1005
1006 stack[sp].ll = loSt;
1007 stack[sp].hh = hiSt;
1008 stack[sp].dd = dSt;
1009 sp++;
1010
1011 while (sp > 0) {
1012 if (sp >= QSORT_STACK_SIZE) {
1013 panic();
1014 }
1015
1016 sp--;
1017 lo = stack[sp].ll;
1018 hi = stack[sp].hh;
1019 d = stack[sp].dd;
1020
1021 if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
1022 simpleSort(lo, hi, d);
1023 if (workDone > workLimit && firstAttempt) {
1024 return;
1025 }
1026 continue;
1027 }
1028
1029 med = med3(block[zptr[lo] + d + 1],
1030 block[zptr[hi ] + d + 1],
1031 block[zptr[(lo + hi) >> 1] + d + 1]);
1032
1033 unLo = ltLo = lo;
1034 unHi = gtHi = hi;
1035
1036 while (true) {
1037 while (true) {
1038 if (unLo > unHi) {
1039 break;
1040 }
1041 n = ((int) block[zptr[unLo] + d + 1]) - med;
1042 if (n == 0) {
1043 int temp = 0;
1044 temp = zptr[unLo];
1045 zptr[unLo] = zptr[ltLo];
1046 zptr[ltLo] = temp;
1047 ltLo++;
1048 unLo++;
1049 continue;
1050 };
1051 if (n > 0) {
1052 break;
1053 }
1054 unLo++;
1055 }
1056 while (true) {
1057 if (unLo > unHi) {
1058 break;
1059 }
1060 n = ((int) block[zptr[unHi] + d + 1]) - med;
1061 if (n == 0) {
1062 int temp = 0;
1063 temp = zptr[unHi];
1064 zptr[unHi] = zptr[gtHi];
1065 zptr[gtHi] = temp;
1066 gtHi--;
1067 unHi--;
1068 continue;
1069 };
1070 if (n < 0) {
1071 break;
1072 }
1073 unHi--;
1074 }
1075 if (unLo > unHi) {
1076 break;
1077 }
1078 int temp = 0;
1079 temp = zptr[unLo];
1080 zptr[unLo] = zptr[unHi];
1081 zptr[unHi] = temp;
1082 unLo++;
1083 unHi--;
1084 }
1085
1086 if (gtHi < ltLo) {
1087 stack[sp].ll = lo;
1088 stack[sp].hh = hi;
1089 stack[sp].dd = d + 1;
1090 sp++;
1091 continue;
1092 }
1093
1094 n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo);
1095 vswap(lo, unLo - n, n);
1096 m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi);
1097 vswap(unLo, hi - m + 1, m);
1098
1099 n = lo + unLo - ltLo - 1;
1100 m = hi - (gtHi - unHi) + 1;
1101
1102 stack[sp].ll = lo;
1103 stack[sp].hh = n;
1104 stack[sp].dd = d;
1105 sp++;
1106
1107 stack[sp].ll = n + 1;
1108 stack[sp].hh = m - 1;
1109 stack[sp].dd = d + 1;
1110 sp++;
1111
1112 stack[sp].ll = m;
1113 stack[sp].hh = hi;
1114 stack[sp].dd = d;
1115 sp++;
1116 }
1117 }
1118
1119 private void mainSort() {
1120 int i, j, ss, sb;
1121 int[] runningOrder = new int[256];
1122 int[] copy = new int[256];
1123 boolean[] bigDone = new boolean[256];
1124 int c1, c2;
1125 int numQSorted;
1126
1127 /*
1128 In the various block-sized structures, live data runs
1129 from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
1130 set up the overshoot area for block.
1131 */
1132
1133 // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
1134 for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) {
1135 block[last + i + 2] = block[(i % (last + 1)) + 1];
1136 }
1137 for (i = 0; i <= last + NUM_OVERSHOOT_BYTES; i++) {
1138 quadrant[i] = 0;
1139 }
1140
1141 block[0] = (char) (block[last + 1]);
1142
1143 if (last < 4000) {
1144 /*
1145 Use simpleSort(), since the full sorting mechanism
1146 has quite a large constant overhead.
1147 */
1148 for (i = 0; i <= last; i++) {
1149 zptr[i] = i;
1150 }
1151 firstAttempt = false;
1152 workDone = workLimit = 0;
1153 simpleSort(0, last, 0);
1154 } else {
1155 numQSorted = 0;
1156 for (i = 0; i <= 255; i++) {
1157 bigDone[i] = false;
1158 }
1159
1160 for (i = 0; i <= 65536; i++) {
1161 ftab[i] = 0;
1162 }
1163
1164 c1 = block[0];
1165 for (i = 0; i <= last; i++) {
1166 c2 = block[i + 1];
1167 ftab[(c1 << 8) + c2]++;
1168 c1 = c2;
1169 }
1170
1171 for (i = 1; i <= 65536; i++) {
1172 ftab[i] += ftab[i - 1];
1173 }
1174
1175 c1 = block[1];
1176 for (i = 0; i < last; i++) {
1177 c2 = block[i + 2];
1178 j = (c1 << 8) + c2;
1179 c1 = c2;
1180 ftab[j]--;
1181 zptr[ftab[j]] = i;
1182 }
1183
1184 j = ((block[last + 1]) << 8) + (block[1]);
1185 ftab[j]--;
1186 zptr[ftab[j]] = last;
1187
1188 /*
1189 Now ftab contains the first loc of every small bucket.
1190 Calculate the running order, from smallest to largest
1191 big bucket.
1192 */
1193
1194 for (i = 0; i <= 255; i++) {
1195 runningOrder[i] = i;
1196 }
1197
1198 {
1199 int vv;
1200 int h = 1;
1201 do {
1202 h = 3 * h + 1;
1203 }
1204 while (h <= 256);
1205 do {
1206 h = h / 3;
1207 for (i = h; i <= 255; i++) {
1208 vv = runningOrder[i];
1209 j = i;
1210 while ((ftab[((runningOrder[j - h]) + 1) << 8]
1211 - ftab[(runningOrder[j - h]) << 8])
1212 > (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) {
1213 runningOrder[j] = runningOrder[j - h];
1214 j = j - h;
1215 if (j <= (h - 1)) {
1216 break;
1217 }
1218 }
1219 runningOrder[j] = vv;
1220 }
1221 } while (h != 1);
1222 }
1223
1224 /*
1225 The main sorting loop.
1226 */
1227 for (i = 0; i <= 255; i++) {
1228
1229 /*
1230 Process big buckets, starting with the least full.
1231 */
1232 ss = runningOrder[i];
1233
1234 /*
1235 Complete the big bucket [ss] by quicksorting
1236 any unsorted small buckets [ss, j]. Hopefully
1237 previous pointer-scanning phases have already
1238 completed many of the small buckets [ss, j], so
1239 we don't have to sort them at all.
1240 */
1241 for (j = 0; j <= 255; j++) {
1242 sb = (ss << 8) + j;
1243 if (!((ftab[sb] & SETMASK) == SETMASK)) {
1244 int lo = ftab[sb] & CLEARMASK;
1245 int hi = (ftab[sb + 1] & CLEARMASK) - 1;
1246 if (hi > lo) {
1247 qSort3(lo, hi, 2);
1248 numQSorted += (hi - lo + 1);
1249 if (workDone > workLimit && firstAttempt) {
1250 return;
1251 }
1252 }
1253 ftab[sb] |= SETMASK;
1254 }
1255 }
1256
1257 /*
1258 The ss big bucket is now done. Record this fact,
1259 and update the quadrant descriptors. Remember to
1260 update quadrants in the overshoot area too, if
1261 necessary. The "if (i < 255)" test merely skips
1262 this updating for the last bucket processed, since
1263 updating for the last bucket is pointless.
1264 */
1265 bigDone[ss] = true;
1266
1267 if (i < 255) {
1268 int bbStart = ftab[ss << 8] & CLEARMASK;
1269 int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
1270 int shifts = 0;
1271
1272 while ((bbSize >> shifts) > 65534) {
1273 shifts++;
1274 }
1275
1276 for (j = 0; j < bbSize; j++) {
1277 int a2update = zptr[bbStart + j];
1278 int qVal = (j >> shifts);
1279 quadrant[a2update] = qVal;
1280 if (a2update < NUM_OVERSHOOT_BYTES) {
1281 quadrant[a2update + last + 1] = qVal;
1282 }
1283 }
1284
1285 if (!(((bbSize - 1) >> shifts) <= 65535)) {
1286 panic();
1287 }
1288 }
1289
1290 /*
1291 Now scan this big bucket so as to synthesise the
1292 sorted order for small buckets [t, ss] for all t != ss.
1293 */
1294 for (j = 0; j <= 255; j++) {
1295 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
1296 }
1297
1298 for (j = ftab[ss << 8] & CLEARMASK;
1299 j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) {
1300 c1 = block[zptr[j]];
1301 if (!bigDone[c1]) {
1302 zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
1303 copy[c1]++;
1304 }
1305 }
1306
1307 for (j = 0; j <= 255; j++) {
1308 ftab[(j << 8) + ss] |= SETMASK;
1309 }
1310 }
1311 }
1312 }
1313
1314 private void randomiseBlock() {
1315 int i;
1316 int rNToGo = 0;
1317 int rTPos = 0;
1318 for (i = 0; i < 256; i++) {
1319 inUse[i] = false;
1320 }
1321
1322 for (i = 0; i <= last; i++) {
1323 if (rNToGo == 0) {
1324 rNToGo = (char) rNums[rTPos];
1325 rTPos++;
1326 if (rTPos == 512) {
1327 rTPos = 0;
1328 }
1329 }
1330 rNToGo--;
1331 block[i + 1] ^= ((rNToGo == 1) ? 1 : 0);
1332 // handle 16 bit signed numbers
1333 block[i + 1] &= 0xFF;
1334
1335 inUse[block[i + 1]] = true;
1336 }
1337 }
1338
1339 private void doReversibleTransformation() {
1340 int i;
1341
1342 workLimit = workFactor * last;
1343 workDone = 0;
1344 blockRandomised = false;
1345 firstAttempt = true;
1346
1347 mainSort();
1348
1349 if (workDone > workLimit && firstAttempt) {
1350 randomiseBlock();
1351 workLimit = workDone = 0;
1352 blockRandomised = true;
1353 firstAttempt = false;
1354 mainSort();
1355 }
1356
1357 origPtr = -1;
1358 for (i = 0; i <= last; i++) {
1359 if (zptr[i] == 0) {
1360 origPtr = i;
1361 break;
1362 }
1363 };
1364
1365 if (origPtr == -1) {
1366 panic();
1367 }
1368 }
1369
1370 private boolean fullGtU(int i1, int i2) {
1371 int k;
1372 char c1, c2;
1373 int s1, s2;
1374
1375 c1 = block[i1 + 1];
1376 c2 = block[i2 + 1];
1377 if (c1 != c2) {
1378 return (c1 > c2);
1379 }
1380 i1++;
1381 i2++;
1382
1383 c1 = block[i1 + 1];
1384 c2 = block[i2 + 1];
1385 if (c1 != c2) {
1386 return (c1 > c2);
1387 }
1388 i1++;
1389 i2++;
1390
1391 c1 = block[i1 + 1];
1392 c2 = block[i2 + 1];
1393 if (c1 != c2) {
1394 return (c1 > c2);
1395 }
1396 i1++;
1397 i2++;
1398
1399 c1 = block[i1 + 1];
1400 c2 = block[i2 + 1];
1401 if (c1 != c2) {
1402 return (c1 > c2);
1403 }
1404 i1++;
1405 i2++;
1406
1407 c1 = block[i1 + 1];
1408 c2 = block[i2 + 1];
1409 if (c1 != c2) {
1410 return (c1 > c2);
1411 }
1412 i1++;
1413 i2++;
1414
1415 c1 = block[i1 + 1];
1416 c2 = block[i2 + 1];
1417 if (c1 != c2) {
1418 return (c1 > c2);
1419 }
1420 i1++;
1421 i2++;
1422
1423 k = last + 1;
1424
1425 do {
1426 c1 = block[i1 + 1];
1427 c2 = block[i2 + 1];
1428 if (c1 != c2) {
1429 return (c1 > c2);
1430 }
1431 s1 = quadrant[i1];
1432 s2 = quadrant[i2];
1433 if (s1 != s2) {
1434 return (s1 > s2);
1435 }
1436 i1++;
1437 i2++;
1438
1439 c1 = block[i1 + 1];
1440 c2 = block[i2 + 1];
1441 if (c1 != c2) {
1442 return (c1 > c2);
1443 }
1444 s1 = quadrant[i1];
1445 s2 = quadrant[i2];
1446 if (s1 != s2) {
1447 return (s1 > s2);
1448 }
1449 i1++;
1450 i2++;
1451
1452 c1 = block[i1 + 1];
1453 c2 = block[i2 + 1];
1454 if (c1 != c2) {
1455 return (c1 > c2);
1456 }
1457 s1 = quadrant[i1];
1458 s2 = quadrant[i2];
1459 if (s1 != s2) {
1460 return (s1 > s2);
1461 }
1462 i1++;
1463 i2++;
1464
1465 c1 = block[i1 + 1];
1466 c2 = block[i2 + 1];
1467 if (c1 != c2) {
1468 return (c1 > c2);
1469 }
1470 s1 = quadrant[i1];
1471 s2 = quadrant[i2];
1472 if (s1 != s2) {
1473 return (s1 > s2);
1474 }
1475 i1++;
1476 i2++;
1477
1478 if (i1 > last) {
1479 i1 -= last;
1480 i1--;
1481 };
1482 if (i2 > last) {
1483 i2 -= last;
1484 i2--;
1485 };
1486
1487 k -= 4;
1488 workDone++;
1489 } while (k >= 0);
1490
1491 return false;
1492 }
1493
1494 /*
1495 Knuth's increments seem to work better
1496 than Incerpi-Sedgewick here. Possibly
1497 because the number of elems to sort is
1498 usually small, typically <= 20.
1499 */
1500 private int[] incs = {1, 4, 13, 40, 121, 364, 1093, 3280,
1501 9841, 29524, 88573, 265720,
1502 797161, 2391484};
1503
1504 private void allocateCompressStructures () {
1505 int n = baseBlockSize * blockSize100k;
1506 block = new char[(n + 1 + NUM_OVERSHOOT_BYTES)];
1507 quadrant = new int[(n + NUM_OVERSHOOT_BYTES)];
1508 zptr = new int[n];
1509 ftab = new int[65537];
1510
1511 if (block == null || quadrant == null || zptr == null
1512 || ftab == null) {
1513 //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537;
1514 //compressOutOfMemory ( totalDraw, n );
1515 }
1516
1517 /*
1518 The back end needs a place to store the MTF values
1519 whilst it calculates the coding tables. We could
1520 put them in the zptr array. However, these values
1521 will fit in a short, so we overlay szptr at the
1522 start of zptr, in the hope of reducing the number
1523 of cache misses induced by the multiple traversals
1524 of the MTF values when calculating coding tables.
1525 Seems to improve compression speed by about 1%.
1526 */
1527 // szptr = zptr;
1528
1529
1530 szptr = new short[2 * n];
1531 }
1532
1533 private void generateMTFValues() {
1534 char[] yy = new char[256];
1535 int i, j;
1536 char tmp;
1537 char tmp2;
1538 int zPend;
1539 int wr;
1540 int EOB;
1541
1542 makeMaps();
1543 EOB = nInUse + 1;
1544
1545 for (i = 0; i <= EOB; i++) {
1546 mtfFreq[i] = 0;
1547 }
1548
1549 wr = 0;
1550 zPend = 0;
1551 for (i = 0; i < nInUse; i++) {
1552 yy[i] = (char) i;
1553 }
1554
1555
1556 for (i = 0; i <= last; i++) {
1557 char ll_i;
1558
1559 ll_i = unseqToSeq[block[zptr[i]]];
1560
1561 j = 0;
1562 tmp = yy[j];
1563 while (ll_i != tmp) {
1564 j++;
1565 tmp2 = tmp;
1566 tmp = yy[j];
1567 yy[j] = tmp2;
1568 };
1569 yy[0] = tmp;
1570
1571 if (j == 0) {
1572 zPend++;
1573 } else {
1574 if (zPend > 0) {
1575 zPend--;
1576 while (true) {
1577 switch (zPend % 2) {
1578 case 0:
1579 szptr[wr] = (short) RUNA;
1580 wr++;
1581 mtfFreq[RUNA]++;
1582 break;
1583 case 1:
1584 szptr[wr] = (short) RUNB;
1585 wr++;
1586 mtfFreq[RUNB]++;
1587 break;
1588 };
1589 if (zPend < 2) {
1590 break;
1591 }
1592 zPend = (zPend - 2) / 2;
1593 };
1594 zPend = 0;
1595 }
1596 szptr[wr] = (short) (j + 1);
1597 wr++;
1598 mtfFreq[j + 1]++;
1599 }
1600 }
1601
1602 if (zPend > 0) {
1603 zPend--;
1604 while (true) {
1605 switch (zPend % 2) {
1606 case 0:
1607 szptr[wr] = (short) RUNA;
1608 wr++;
1609 mtfFreq[RUNA]++;
1610 break;
1611 case 1:
1612 szptr[wr] = (short) RUNB;
1613 wr++;
1614 mtfFreq[RUNB]++;
1615 break;
1616 }
1617 if (zPend < 2) {
1618 break;
1619 }
1620 zPend = (zPend - 2) / 2;
1621 }
1622 }
1623
1624 szptr[wr] = (short) EOB;
1625 wr++;
1626 mtfFreq[EOB]++;
1627
1628 nMTF = wr;
1629 }
1630}
1631
1632
Note: See TracBrowser for help on using the repository browser.