本文共 5697 字,大约阅读时间需要 18 分钟。
V = 2 E ∗ M V=2^{E}* M V=2E∗M
b i a s = 2 k − 1 − 1 bias = 2^{k-1}-1 bias=2k−1−1description | binary | decimal |
---|---|---|
least positive unstandard | 0 0000…(15) 0 000…(62)1 | 2 1 − ( 2 15 − 1 − 1 ) − 63 2^{1-(2^{15-1}-1)-63} 21−(215−1−1)−63 |
least positive standard | 0 000…(14)1 1 000…(63) | 2 1 − ( 2 15 − 1 − 1 ) 2^{1-(2^{15-1}-1)} 21−(215−1−1) |
bigest standard | 0 111…(14)0 1 111…(63) | 2 2 15 − 1 − 1 ∗ ( 2 − 2 − 63 ) 2^{2^{15-1}-1 }* (2-2^{-63}) 2215−1−1∗(2−2−63) |
Desc | Hex | M M M | E E E | V V V | D D D |
---|---|---|---|---|---|
-0 | 0x8000 | 0 | -14 | -0 | -0.0 |
>2 least | 0x4001 | 1025 1024 \frac{1025}{1024} 10241025 | 1 | 1025 512 \frac{1025}{512} 5121025 | 2.00195312 |
512 | 0x6000 | 1 | 9 | 512 | 512.0 |
bigest denormalized | 0x03FF | 1023 1024 \frac{1023}{1024} 10241023 | -14 | 1023 2 24 \frac{1023}{2^{24}} 2241023 | 6.09755516e-5 |
- ∞ \infin ∞ | 0xFC00 | - | - | - ∞ \infin ∞ | - ∞ \infin ∞ |
ox3BB0 | 0x3BB0 | 123 64 \frac{123}{64} 64123 | -1 | 123 128 \frac{123}{128} 128123 | 0.9609375 |
A b i t A bit Abit | A v a l u e A value Avalue | B b i t B bit Bbit | B v a l u e B value Bvalue |
---|---|---|---|
1 01110 001 | - 9 16 \frac{9}{16} 169 | 1 0110 0010 | - 9 16 \frac{9}{16} 169 |
0 10110 101 | 13 ∗ 2 4 13*2^4 13∗24 | 0 1110 1010 | 13 ∗ 2 4 13*2^4 13∗24 |
1 00111 110 | - 7 2 10 \frac{7}{2^{10}} 2107 | 1 0000 0111 | - 7 2 10 \frac{7}{2^{10}} 2107 |
0 00000 101 | 5 2 11 \frac{5}{2^{11}} 2115 | 0 0000 0001 | 1 2 10 \frac{1}{2^{10}} 2101 |
1 11011 000 | - 2 12 2^{12} 212 | 1 1110 1111 | - 31 ∗ 2 3 31*2^3 31∗23 |
0 11000 100 | 3 ∗ 2 8 3*2^8 3∗28 | 0 1111 0000 | + ∞ \infin ∞ |
#include#include float u2f(unsigned x) { return *(float*) &x;}float fpwr2(int x) { unsigned exp, frac; unsigned u; if (x < 2-pow(2,7)-23) { exp = 0; frac = 0; } else if (x < 2-pow(2,7)) { exp = 0; frac = 1 << (unsigned)(x - (2-pow(2,7)-23)); } else if (x < pow(2,7)-1+1) { exp = pow(2,7)-1+x; frac = 0; } else { exp = 0xFF; frac = 0; } u = exp << 23 | frac; return u2f(u);}
#includetypedef unsigned float_bits;float_bits float_negate(float_bits f){ unsigned sig = f>>31; unsigned e = (f>>23)&0xff; unsigned frac = f&0x7fffff; unsigned res = (e<<23)|frac; if(e==0xff&&frac) res = f; return res;}int main(){ printf("%x\n",float_negate(114514));}
#includetypedef unsigned float_bits;float_bits float_absval(float_bits f){ unsigned sig = f>>31; unsigned e = (f>>23)&0xff; unsigned frac = f & 0x7FFFFF; if ((e == 0xFF) && (frac != 0)) { return f; } return 0 << 31 | e << 23 | frac;}int main(){ printf("%u\n",float_absval(-1919810));}
#includetypedef unsigned float_bits;float_bits float_twice(float_bits f) { unsigned sig = f >> 31; unsigned e = f >> 23 & 0xFF; unsigned frac = f & 0x7FFFFF; if (e == 0xFF) { return f; } if (e == 0) { frac <<= 1; } else if (e == (0xFF - 1)) { e = 0xFF; frac = 0; } else { e += 1; } return sig << 31 | e << 23 | frac;}int main(){ printf("%f\n",float_twice(0x11451419));}
#includetypedef unsigned float_bits;float_bits float_half(float_bits f){ unsigned sig = f>>31; unsigned e = (f>>23)&0xff; unsigned frac = f&0x7fffff; unsigned tail = f&0x7ffffffff; if(e==0xff) return f; int ass = (frac&0x3)==0x3; if(e==0){ frac=frac>>1; frac=frac+ass; } else if(e==1){ tail=tail>>1; tail=tail+ass; e=(tail>>23)&0xff; frac=tail&0x7fffff; } else { e=e-1; } return sig<<31|e<<23|frac;}int main(){ printf("%f\n",float_half(114514.1919810));}
#includetypedef unsigned float_bits;int float_f2i(float_bits f){ unsigned sig = f>>31; unsigned e = (f>>23)&0xff; unsigned frac = f&0x7fffff; unsigned bias = 0x7f; int val;//num unsigned E; unsigned M; if(e>=0&&e =31+bias) val = 0x80000000; else{ E = e-bias; M = frac|0x800000; if(E>23){ val = M<<(E-23); }else{ val = M>>(23-E); } } if(sig) return -val; else return val;}int main(){ printf("%f\n",float_f2i(0xffaaffaa));}
#include#include typedef unsigned float_bits;int bits_length(int i){ if((i&INT_MIN)!=0) return 32; unsigned u = (unsigned)i; int length = 0; while(u>=(1< >(32-1);}float_bits float_i2f(int i){ unsigned sig,exp,rest,frac,exp_sig,round_part; unsigned bits,fbits; unsigned bias = 0x7f; if(i==0){ sig=0; exp=0; frac=0; return sig<<31|exp<<23|frac; } if(i==INT_MIN){ sig=1; exp=bias+31; frac=0; return sig<<31|exp<<23|frac; } sig=0; if(i<0){ sig=1; i=-i; } bits=bits_length(i); fbits=bits-1; exp=bias+fbits; rest=i&bits_mask(fbits); if(fbits<=23){ frac=rest<<(23-fbits); exp_sig=exp<<23|frac; } else{ int offset=fbits-23; int round_mid=1<<(offset-1); round_part=rest&bits_mask(offset); frac=rest>>offset; exp_sig=exp<<23|frac; if(round_part round_mid){ exp_sig=exp_sig+1; }else{ if((frac&0x1)==1){ exp_sig=exp_sig+1; } } } return sig<<31|exp_sig;}int main(){ printf("%f\n",float_i2f(114.514));}
转载地址:http://jywai.baihongyu.com/