Float & Double

Double Precision Format

double.png
Exponent Equation
0x000 (−1)^signbit × 2^−1022 × 0.significandbits
0x001, …, 0x7FE (−1)^signbit × 2^(exponentbits−1023) × 1.significandbits
0x7FF Not Define(infinite)

Example:
3ff0 0000 0000 0000 = 1 Note: 1.xxx in decimal, must has 3ff… in binary
3ff0 0000 0000 0001 = 1.0000000000000002, the next higher number > 1
3ff0 0000 0000 0002 = 1.0000000000000004
4000 0000 0000 0000 = 2
c000 0000 0000 0000 = −2

int main()
{
    float a = 1.1f;
    if (a == 1.1) 
        printf("a is equal to 1.1");
    else 
        printf("a is not equal to 1.1");
    return 0;
}

Ans:
Output "a is not equal to 1.1", because constant 1.1 will be stored in double precision format (default), while a is stored in float.

Print It Out

int main()
{
    int i, zero;
    long long temp1;
    long temp2;
    double d = -1.25;
    float f = -1.25f;
 
    printf("-1.25 double:   ");
    temp1 = *((long long*)&d);
    zero = (temp1 == 0ULL)?1:0;                // used to determine default bit
    for (i = 0; i < 64; i++) {
        if (i == 1 || i == 12)
            printf(" ");
        if (i == 12)
            printf("(%d).", !zero);            // always (1). except for 0.0 case
        printf("%d", temp1 < 0);               // double can't &
        temp1 <<= 1;                           // use the sign bit                     
    }
    printf("\n");
 
    printf("-1.25f float:   ");
    temp2 = *((long*)&f);
    zero = (temp2 == 0UL)?1:0;
    for (i = 0; i < 32; i++) {
        if (i == 1 || i == 9)
            printf(" ");
        if (i == 9)
            printf("(%d).", !zero);
        printf("%d", temp2 < 0);
        temp2 <<= 1;
    }
    printf("\n");
}

Output:
-1.25 double: 1 01111111111 (1).0100000000000000000000000000000000000000000000000000
-1.25f float: 1 01111111 (1).01000000000000000000000

Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License