File: /usr/src/linux/arch/arm/nwfpe/softfloat-macros

1     
2     /*
3     ===============================================================================
4     
5     This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6     Arithmetic Package, Release 2.
7     
8     Written by John R. Hauser.  This work was made possible in part by the
9     International Computer Science Institute, located at Suite 600, 1947 Center
10     Street, Berkeley, California 94704.  Funding was partially provided by the
11     National Science Foundation under grant MIP-9311980.  The original version
12     of this code was written as part of a project to build a fixed-point vector
13     processor in collaboration with the University of California at Berkeley,
14     overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
15     is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16     arithmetic/softfloat.html'.
17     
18     THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
19     has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20     TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
21     PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22     AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23     
24     Derivative works are acceptable, even for commercial purposes, so long as
25     (1) they include prominent notice that the work is derivative, and (2) they
26     include prominent notice akin to these three paragraphs for those parts of
27     this code that are retained.
28     
29     ===============================================================================
30     */
31     
32     /*
33     -------------------------------------------------------------------------------
34     Shifts `a' right by the number of bits given in `count'.  If any nonzero
35     bits are shifted off, they are ``jammed'' into the least significant bit of
36     the result by setting the least significant bit to 1.  The value of `count'
37     can be arbitrarily large; in particular, if `count' is greater than 32, the
38     result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39     The result is stored in the location pointed to by `zPtr'.
40     -------------------------------------------------------------------------------
41     */
42     INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43     {
44         bits32 z;
45         if ( count == 0 ) {
46             z = a;
47         }
48         else if ( count < 32 ) {
49             z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
50         }
51         else {
52             z = ( a != 0 );
53         }
54         *zPtr = z;
55     }
56     
57     /*
58     -------------------------------------------------------------------------------
59     Shifts `a' right by the number of bits given in `count'.  If any nonzero
60     bits are shifted off, they are ``jammed'' into the least significant bit of
61     the result by setting the least significant bit to 1.  The value of `count'
62     can be arbitrarily large; in particular, if `count' is greater than 64, the
63     result will be either 0 or 1, depending on whether `a' is zero or nonzero.
64     The result is stored in the location pointed to by `zPtr'.
65     -------------------------------------------------------------------------------
66     */
67     INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
68     {
69         bits64 z;
70     
71      __asm__("@shift64RightJamming -- start");   
72         if ( count == 0 ) {
73             z = a;
74         }
75         else if ( count < 64 ) {
76             z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
77         }
78         else {
79             z = ( a != 0 );
80         }
81      __asm__("@shift64RightJamming -- end");   
82         *zPtr = z;
83     }
84     
85     /*
86     -------------------------------------------------------------------------------
87     Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
88     _plus_ the number of bits given in `count'.  The shifted result is at most
89     64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
90     bits shifted off form a second 64-bit result as follows:  The _last_ bit
91     shifted off is the most-significant bit of the extra result, and the other
92     63 bits of the extra result are all zero if and only if _all_but_the_last_
93     bits shifted off were all zero.  This extra result is stored in the location
94     pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
95         (This routine makes more sense if `a0' and `a1' are considered to form a
96     fixed-point value with binary point between `a0' and `a1'.  This fixed-point
97     value is shifted right by the number of bits given in `count', and the
98     integer part of the result is returned at the location pointed to by
99     `z0Ptr'.  The fractional part of the result may be slightly corrupted as
100     described above, and is returned at the location pointed to by `z1Ptr'.)
101     -------------------------------------------------------------------------------
102     */
103     INLINE void
104      shift64ExtraRightJamming(
105          bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
106     {
107         bits64 z0, z1;
108         int8 negCount = ( - count ) & 63;
109     
110         if ( count == 0 ) {
111             z1 = a1;
112             z0 = a0;
113         }
114         else if ( count < 64 ) {
115             z1 = ( a0<<negCount ) | ( a1 != 0 );
116             z0 = a0>>count;
117         }
118         else {
119             if ( count == 64 ) {
120                 z1 = a0 | ( a1 != 0 );
121             }
122             else {
123                 z1 = ( ( a0 | a1 ) != 0 );
124             }
125             z0 = 0;
126         }
127         *z1Ptr = z1;
128         *z0Ptr = z0;
129     
130     }
131     
132     /*
133     -------------------------------------------------------------------------------
134     Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
135     number of bits given in `count'.  Any bits shifted off are lost.  The value
136     of `count' can be arbitrarily large; in particular, if `count' is greater
137     than 128, the result will be 0.  The result is broken into two 64-bit pieces
138     which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
139     -------------------------------------------------------------------------------
140     */
141     INLINE void
142      shift128Right(
143          bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
144     {
145         bits64 z0, z1;
146         int8 negCount = ( - count ) & 63;
147     
148         if ( count == 0 ) {
149             z1 = a1;
150             z0 = a0;
151         }
152         else if ( count < 64 ) {
153             z1 = ( a0<<negCount ) | ( a1>>count );
154             z0 = a0>>count;
155         }
156         else {
157             z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
158             z0 = 0;
159         }
160         *z1Ptr = z1;
161         *z0Ptr = z0;
162     
163     }
164     
165     /*
166     -------------------------------------------------------------------------------
167     Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
168     number of bits given in `count'.  If any nonzero bits are shifted off, they
169     are ``jammed'' into the least significant bit of the result by setting the
170     least significant bit to 1.  The value of `count' can be arbitrarily large;
171     in particular, if `count' is greater than 128, the result will be either 0
172     or 1, depending on whether the concatenation of `a0' and `a1' is zero or
173     nonzero.  The result is broken into two 64-bit pieces which are stored at
174     the locations pointed to by `z0Ptr' and `z1Ptr'.
175     -------------------------------------------------------------------------------
176     */
177     INLINE void
178      shift128RightJamming(
179          bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
180     {
181         bits64 z0, z1;
182         int8 negCount = ( - count ) & 63;
183     
184         if ( count == 0 ) {
185             z1 = a1;
186             z0 = a0;
187         }
188         else if ( count < 64 ) {
189             z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
190             z0 = a0>>count;
191         }
192         else {
193             if ( count == 64 ) {
194                 z1 = a0 | ( a1 != 0 );
195             }
196             else if ( count < 128 ) {
197                 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
198             }
199             else {
200                 z1 = ( ( a0 | a1 ) != 0 );
201             }
202             z0 = 0;
203         }
204         *z1Ptr = z1;
205         *z0Ptr = z0;
206     
207     }
208     
209     /*
210     -------------------------------------------------------------------------------
211     Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
212     by 64 _plus_ the number of bits given in `count'.  The shifted result is
213     at most 128 nonzero bits; these are broken into two 64-bit pieces which are
214     stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
215     off form a third 64-bit result as follows:  The _last_ bit shifted off is
216     the most-significant bit of the extra result, and the other 63 bits of the
217     extra result are all zero if and only if _all_but_the_last_ bits shifted off
218     were all zero.  This extra result is stored in the location pointed to by
219     `z2Ptr'.  The value of `count' can be arbitrarily large.
220         (This routine makes more sense if `a0', `a1', and `a2' are considered
221     to form a fixed-point value with binary point between `a1' and `a2'.  This
222     fixed-point value is shifted right by the number of bits given in `count',
223     and the integer part of the result is returned at the locations pointed to
224     by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
225     corrupted as described above, and is returned at the location pointed to by
226     `z2Ptr'.)
227     -------------------------------------------------------------------------------
228     */
229     INLINE void
230      shift128ExtraRightJamming(
231          bits64 a0,
232          bits64 a1,
233          bits64 a2,
234          int16 count,
235          bits64 *z0Ptr,
236          bits64 *z1Ptr,
237          bits64 *z2Ptr
238      )
239     {
240         bits64 z0, z1, z2;
241         int8 negCount = ( - count ) & 63;
242     
243         if ( count == 0 ) {
244             z2 = a2;
245             z1 = a1;
246             z0 = a0;
247         }
248         else {
249             if ( count < 64 ) {
250                 z2 = a1<<negCount;
251                 z1 = ( a0<<negCount ) | ( a1>>count );
252                 z0 = a0>>count;
253             }
254             else {
255                 if ( count == 64 ) {
256                     z2 = a1;
257                     z1 = a0;
258                 }
259                 else {
260                     a2 |= a1;
261                     if ( count < 128 ) {
262                         z2 = a0<<negCount;
263                         z1 = a0>>( count & 63 );
264                     }
265                     else {
266                         z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
267                         z1 = 0;
268                     }
269                 }
270                 z0 = 0;
271             }
272             z2 |= ( a2 != 0 );
273         }
274         *z2Ptr = z2;
275         *z1Ptr = z1;
276         *z0Ptr = z0;
277     
278     }
279     
280     /*
281     -------------------------------------------------------------------------------
282     Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283     number of bits given in `count'.  Any bits shifted off are lost.  The value
284     of `count' must be less than 64.  The result is broken into two 64-bit
285     pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286     -------------------------------------------------------------------------------
287     */
288     INLINE void
289      shortShift128Left(
290          bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
291     {
292     
293         *z1Ptr = a1<<count;
294         *z0Ptr =
295             ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
296     
297     }
298     
299     /*
300     -------------------------------------------------------------------------------
301     Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
302     by the number of bits given in `count'.  Any bits shifted off are lost.
303     The value of `count' must be less than 64.  The result is broken into three
304     64-bit pieces which are stored at the locations pointed to by `z0Ptr',
305     `z1Ptr', and `z2Ptr'.
306     -------------------------------------------------------------------------------
307     */
308     INLINE void
309      shortShift192Left(
310          bits64 a0,
311          bits64 a1,
312          bits64 a2,
313          int16 count,
314          bits64 *z0Ptr,
315          bits64 *z1Ptr,
316          bits64 *z2Ptr
317      )
318     {
319         bits64 z0, z1, z2;
320         int8 negCount;
321     
322         z2 = a2<<count;
323         z1 = a1<<count;
324         z0 = a0<<count;
325         if ( 0 < count ) {
326             negCount = ( ( - count ) & 63 );
327             z1 |= a2>>negCount;
328             z0 |= a1>>negCount;
329         }
330         *z2Ptr = z2;
331         *z1Ptr = z1;
332         *z0Ptr = z0;
333     
334     }
335     
336     /*
337     -------------------------------------------------------------------------------
338     Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
339     value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
340     any carry out is lost.  The result is broken into two 64-bit pieces which
341     are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
342     -------------------------------------------------------------------------------
343     */
344     INLINE void
345      add128(
346          bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
347     {
348         bits64 z1;
349     
350         z1 = a1 + b1;
351         *z1Ptr = z1;
352         *z0Ptr = a0 + b0 + ( z1 < a1 );
353     
354     }
355     
356     /*
357     -------------------------------------------------------------------------------
358     Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
359     192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
360     modulo 2^192, so any carry out is lost.  The result is broken into three
361     64-bit pieces which are stored at the locations pointed to by `z0Ptr',
362     `z1Ptr', and `z2Ptr'.
363     -------------------------------------------------------------------------------
364     */
365     INLINE void
366      add192(
367          bits64 a0,
368          bits64 a1,
369          bits64 a2,
370          bits64 b0,
371          bits64 b1,
372          bits64 b2,
373          bits64 *z0Ptr,
374          bits64 *z1Ptr,
375          bits64 *z2Ptr
376      )
377     {
378         bits64 z0, z1, z2;
379         int8 carry0, carry1;
380     
381         z2 = a2 + b2;
382         carry1 = ( z2 < a2 );
383         z1 = a1 + b1;
384         carry0 = ( z1 < a1 );
385         z0 = a0 + b0;
386         z1 += carry1;
387         z0 += ( z1 < carry1 );
388         z0 += carry0;
389         *z2Ptr = z2;
390         *z1Ptr = z1;
391         *z0Ptr = z0;
392     
393     }
394     
395     /*
396     -------------------------------------------------------------------------------
397     Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
398     128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
399     2^128, so any borrow out (carry out) is lost.  The result is broken into two
400     64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
401     `z1Ptr'.
402     -------------------------------------------------------------------------------
403     */
404     INLINE void
405      sub128(
406          bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
407     {
408     
409         *z1Ptr = a1 - b1;
410         *z0Ptr = a0 - b0 - ( a1 < b1 );
411     
412     }
413     
414     /*
415     -------------------------------------------------------------------------------
416     Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
417     from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
418     Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
419     result is broken into three 64-bit pieces which are stored at the locations
420     pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
421     -------------------------------------------------------------------------------
422     */
423     INLINE void
424      sub192(
425          bits64 a0,
426          bits64 a1,
427          bits64 a2,
428          bits64 b0,
429          bits64 b1,
430          bits64 b2,
431          bits64 *z0Ptr,
432          bits64 *z1Ptr,
433          bits64 *z2Ptr
434      )
435     {
436         bits64 z0, z1, z2;
437         int8 borrow0, borrow1;
438     
439         z2 = a2 - b2;
440         borrow1 = ( a2 < b2 );
441         z1 = a1 - b1;
442         borrow0 = ( a1 < b1 );
443         z0 = a0 - b0;
444         z0 -= ( z1 < borrow1 );
445         z1 -= borrow1;
446         z0 -= borrow0;
447         *z2Ptr = z2;
448         *z1Ptr = z1;
449         *z0Ptr = z0;
450     
451     }
452     
453     /*
454     -------------------------------------------------------------------------------
455     Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
456     into two 64-bit pieces which are stored at the locations pointed to by
457     `z0Ptr' and `z1Ptr'.
458     -------------------------------------------------------------------------------
459     */
460     INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
461     {
462         bits32 aHigh, aLow, bHigh, bLow;
463         bits64 z0, zMiddleA, zMiddleB, z1;
464     
465         aLow = a;
466         aHigh = a>>32;
467         bLow = b;
468         bHigh = b>>32;
469         z1 = ( (bits64) aLow ) * bLow;
470         zMiddleA = ( (bits64) aLow ) * bHigh;
471         zMiddleB = ( (bits64) aHigh ) * bLow;
472         z0 = ( (bits64) aHigh ) * bHigh;
473         zMiddleA += zMiddleB;
474         z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
475         zMiddleA <<= 32;
476         z1 += zMiddleA;
477         z0 += ( z1 < zMiddleA );
478         *z1Ptr = z1;
479         *z0Ptr = z0;
480     
481     }
482     
483     /*
484     -------------------------------------------------------------------------------
485     Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
486     obtain a 192-bit product.  The product is broken into three 64-bit pieces
487     which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
488     `z2Ptr'.
489     -------------------------------------------------------------------------------
490     */
491     INLINE void
492      mul128By64To192(
493          bits64 a0,
494          bits64 a1,
495          bits64 b,
496          bits64 *z0Ptr,
497          bits64 *z1Ptr,
498          bits64 *z2Ptr
499      )
500     {
501         bits64 z0, z1, z2, more1;
502     
503         mul64To128( a1, b, &z1, &z2 );
504         mul64To128( a0, b, &z0, &more1 );
505         add128( z0, more1, 0, z1, &z0, &z1 );
506         *z2Ptr = z2;
507         *z1Ptr = z1;
508         *z0Ptr = z0;
509     
510     }
511     
512     /*
513     -------------------------------------------------------------------------------
514     Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
515     128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
516     product.  The product is broken into four 64-bit pieces which are stored at
517     the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
518     -------------------------------------------------------------------------------
519     */
520     INLINE void
521      mul128To256(
522          bits64 a0,
523          bits64 a1,
524          bits64 b0,
525          bits64 b1,
526          bits64 *z0Ptr,
527          bits64 *z1Ptr,
528          bits64 *z2Ptr,
529          bits64 *z3Ptr
530      )
531     {
532         bits64 z0, z1, z2, z3;
533         bits64 more1, more2;
534     
535         mul64To128( a1, b1, &z2, &z3 );
536         mul64To128( a1, b0, &z1, &more2 );
537         add128( z1, more2, 0, z2, &z1, &z2 );
538         mul64To128( a0, b0, &z0, &more1 );
539         add128( z0, more1, 0, z1, &z0, &z1 );
540         mul64To128( a0, b1, &more1, &more2 );
541         add128( more1, more2, 0, z2, &more1, &z2 );
542         add128( z0, z1, 0, more1, &z0, &z1 );
543         *z3Ptr = z3;
544         *z2Ptr = z2;
545         *z1Ptr = z1;
546         *z0Ptr = z0;
547     
548     }
549     
550     /*
551     -------------------------------------------------------------------------------
552     Returns an approximation to the 64-bit integer quotient obtained by dividing
553     `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
554     divisor `b' must be at least 2^63.  If q is the exact quotient truncated
555     toward zero, the approximation returned lies between q and q + 2 inclusive.
556     If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
557     unsigned integer is returned.
558     -------------------------------------------------------------------------------
559     */
560     static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
561     {
562         bits64 b0, b1;
563         bits64 rem0, rem1, term0, term1;
564         bits64 z;
565         if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
566         b0 = b>>32;
567         z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
568         mul64To128( b, z, &term0, &term1 );
569         sub128( a0, a1, term0, term1, &rem0, &rem1 );
570         while ( ( (sbits64) rem0 ) < 0 ) {
571             z -= LIT64( 0x100000000 );
572             b1 = b<<32;
573             add128( rem0, rem1, b0, b1, &rem0, &rem1 );
574         }
575         rem0 = ( rem0<<32 ) | ( rem1>>32 );
576         z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
577         return z;
578     
579     }
580     
581     /*
582     -------------------------------------------------------------------------------
583     Returns an approximation to the square root of the 32-bit significand given
584     by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
585     `aExp' (the least significant bit) is 1, the integer returned approximates
586     2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
587     is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
588     case, the approximation returned lies strictly within +/-2 of the exact
589     value.
590     -------------------------------------------------------------------------------
591     */
592     static bits32 estimateSqrt32( int16 aExp, bits32 a )
593     {
594         static const bits16 sqrtOddAdjustments[] = {
595             0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
596             0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
597         };
598         static const bits16 sqrtEvenAdjustments[] = {
599             0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
600             0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
601         };
602         int8 index;
603         bits32 z;
604     
605         index = ( a>>27 ) & 15;
606         if ( aExp & 1 ) {
607             z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
608             z = ( ( a / z )<<14 ) + ( z<<15 );
609             a >>= 1;
610         }
611         else {
612             z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
613             z = a / z + z;
614             z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
615             if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
616         }
617         return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
618     
619     }
620     
621     /*
622     -------------------------------------------------------------------------------
623     Returns the number of leading 0 bits before the most-significant 1 bit
624     of `a'.  If `a' is zero, 32 is returned.
625     -------------------------------------------------------------------------------
626     */
627     static int8 countLeadingZeros32( bits32 a )
628     {
629         static const int8 countLeadingZerosHigh[] = {
630             8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
631             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
632             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
633             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
634             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
635             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
636             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
637             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
638             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
639             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
640             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
641             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
642             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
643             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
644             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
646         };
647         int8 shiftCount;
648     
649         shiftCount = 0;
650         if ( a < 0x10000 ) {
651             shiftCount += 16;
652             a <<= 16;
653         }
654         if ( a < 0x1000000 ) {
655             shiftCount += 8;
656             a <<= 8;
657         }
658         shiftCount += countLeadingZerosHigh[ a>>24 ];
659         return shiftCount;
660     
661     }
662     
663     /*
664     -------------------------------------------------------------------------------
665     Returns the number of leading 0 bits before the most-significant 1 bit
666     of `a'.  If `a' is zero, 64 is returned.
667     -------------------------------------------------------------------------------
668     */
669     static int8 countLeadingZeros64( bits64 a )
670     {
671         int8 shiftCount;
672     
673         shiftCount = 0;
674         if ( a < ( (bits64) 1 )<<32 ) {
675             shiftCount += 32;
676         }
677         else {
678             a >>= 32;
679         }
680         shiftCount += countLeadingZeros32( a );
681         return shiftCount;
682     
683     }
684     
685     /*
686     -------------------------------------------------------------------------------
687     Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
688     is equal to the 128-bit value formed by concatenating `b0' and `b1'.
689     Otherwise, returns 0.
690     -------------------------------------------------------------------------------
691     */
692     INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
693     {
694     
695         return ( a0 == b0 ) && ( a1 == b1 );
696     
697     }
698     
699     /*
700     -------------------------------------------------------------------------------
701     Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
702     than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
703     Otherwise, returns 0.
704     -------------------------------------------------------------------------------
705     */
706     INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
707     {
708     
709         return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
710     
711     }
712     
713     /*
714     -------------------------------------------------------------------------------
715     Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
716     than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
717     returns 0.
718     -------------------------------------------------------------------------------
719     */
720     INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
721     {
722     
723         return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
724     
725     }
726     
727     /*
728     -------------------------------------------------------------------------------
729     Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
730     not equal to the 128-bit value formed by concatenating `b0' and `b1'.
731     Otherwise, returns 0.
732     -------------------------------------------------------------------------------
733     */
734     INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
735     {
736     
737         return ( a0 != b0 ) || ( a1 != b1 );
738     
739     }
740     
741