-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfloat.h
120 lines (97 loc) · 3.26 KB
/
float.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#pragma once
#include "numformat.h"
// IEEE 754 standard of doube precision
#define MANT_DIG 52
#define EXPO_DIG 11
#define SIGN_DIG 1
#define OFFSET -1023
#define SIGN(sign) (1-2*sign)
typedef union {
double f;
struct {
unsigned long mant : MANT_DIG;
unsigned int expo : EXPO_DIG;
unsigned int sign : SIGN_DIG;
} structure;
} uFloat;
template< bool TDebug, unsigned TMant_dig, unsigned TExpo_dig>
class FloatBase : public NumFormat<double>
{
public:
//allow just declaration without initilization, is initilized to zero by NumFormat class
FloatBase() : NumFormat() {};
FloatBase(double value) : NumFormat(value) {
//printf("constructor\n");
_value = TDebug ? value : convertTo(value);
};
inline static int getMaxExponent() {
int maxExponent = (1 << (TExpo_dig-1))-1;
//printf("%d\n", maxExponent);
return maxExponent;
}
inline static int getMinExponent() {
int minExponent = -((1 << (TExpo_dig-1))-2);
//printf(" %d ", minExponent);
return minExponent;
}
static double getMinValue() {
return pow(2,getMinValue());
}
double convertTo(double value) const {
uFloat helper;
helper.f = value;
//easy extraction
// printf("sign = %d\n",helper.structure.sign);
// printf("exponent = %d\n",helper.structure.expo);
// printf("mantisa = %d\n",helper.structure.mant);
int minExponent = getMinExponent();
int maxExponent = getMaxExponent();
if(helper.structure.expo == 0 && helper.structure.mant == 0)
{
value = SIGN(helper.structure.sign) * 0.0;
}
else if(maxExponent < helper.structure.expo + OFFSET)
{
value = SIGN(helper.structure.sign) * INFINITY;
}
else if (minExponent > helper.structure.expo + OFFSET)
{
value = pow(2,minExponent);
}
else
{
// get the mant_dig significant digits of the mantisse
long mantMask = ~((1l << (MANT_DIG - TMant_dig))-1);
//printf("mask %ld mant %ld\n", mantMask, helper.structure.mant);
helper.structure.mant &= mantMask;
//printf("mask %ld mant %ld\n", mantMask, helper.structure.mant);
value = helper.f;
}
// printf("sign = %d\n",helper.structure.sign);
// printf("exponent = %d\n",helper.structure.expo);
// printf("mantisa = %d\n",helper.structure.mant);
// printf("value %f\n", helper.f);
// printf("%d %d \n", minExpo, maxExpo);
return value;
};
virtual void print(std::ostream& out) const{
out << " " << _value << " ";
if(TDebug)
{
const float value = convertTo(_value);
out << " " << value << " " << (float)(_value) << " ";
}
};
//unary operator
friend FloatBase operator-(FloatBase &negate) {
negate.setValue(negate.convertTo(negate.getValue() * -1));
return negate;
}
//CASTING
//returns for other format the value. The constructor will then convert it
//correctly
template< bool TDebug2, unsigned TMant_dig2, unsigned TExpo_dig2>
operator FloatBase<TDebug2, TMant_dig2, TExpo_dig2>() {
return _value;
};
};