Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 228 additions & 0 deletions real48.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
#include "real48.hpp"
#include <limits>
#include <cstring>
#include <stdexcept>

namespace math
{

static inline void pack_bytes(uint8_t out[6], uint64_t word) {
out[0] = static_cast<uint8_t>(word & 0xFF);
out[1] = static_cast<uint8_t>((word >> 8) & 0xFF);
out[2] = static_cast<uint8_t>((word >> 16) & 0xFF);
out[3] = static_cast<uint8_t>((word >> 24) & 0xFF);
out[4] = static_cast<uint8_t>((word >> 32) & 0xFF);
out[5] = static_cast<uint8_t>((word >> 40) & 0xFF);
}

Real48::Real48(const float number)
{
if (std::isnan(number) || std::isinf(number)) {
throw std::overflow_error("Real48: cannot represent NaN or infinity");
}

uint32_t bits = 0;
static_assert(sizeof(bits) == sizeof(number));
std::memcpy(&bits, &number, sizeof(bits));
bool sign = (bits >> 31) != 0;
uint32_t exp_f = (bits >> 23) & 0xFF;
uint32_t mant_f = bits & ((1u << 23) - 1);

if (exp_f == 0) {
std::memset(_b, 0, 6);
return;
}

int32_t e48 = static_cast<int32_t>(exp_f) + 2;

if (e48 <= 0) {
throw std::overflow_error("Real48: too small (underflow) converting from float");
}
if (e48 > 255) {
throw std::overflow_error("Real48: exponent overflow converting from float");
}

uint64_t f48 = uint64_t(mant_f) << (39 - 23);

uint64_t word = 0;
word |= (uint64_t)(e48 & 0xFF);
word |= (f48 & MASK39) << 8;
word |= (uint64_t)(sign ? 1ull : 0ull) << 47;
pack_bytes(_b, word);
}

Real48::Real48(const double number) {
if (std::isnan(number) || std::isinf(number)) {
throw std::overflow_error("Real48: cannot represent NaN or infinity");
}

uint64_t bits = 0;
static_assert(sizeof(bits) == sizeof(number));
std::memcpy(&bits, &number, sizeof(bits));
bool sign = (bits >> 63) != 0;
uint32_t exp_d = static_cast<uint32_t>((bits >> 52) & 0x7FF);
uint64_t mant_d = bits & ((uint64_t(1) << 52) - 1);

if (exp_d == 0) {
std::memset(_b, 0, 6);
return;
}

int32_t e48 = static_cast<int32_t>(exp_d) - 894;

if (e48 <= 0) {
throw std::overflow_error("Real48: too small (underflow) converting from double");
}
if (e48 > 255) {
throw std::overflow_error("Real48: exponent overflow converting from double");
}

const int shift = 52 - 39;
uint64_t add = uint64_t(1) << (shift - 1);
uint64_t f48 = (mant_d + add) >> shift;

if (f48 == (uint64_t(1) << 39)) {
f48 = 0;
e48 += 1;
if (e48 > 255) {
throw std::overflow_error("Real48: exponent overflow after rounding");
}
}

uint64_t word = 0;
word |= (uint64_t)(e48 & 0xFF);
word |= (f48 & MASK39) << 8;
word |= (uint64_t)(sign ? 1ull : 0ull) << 47;
pack_bytes(_b, word);
}

Real48::operator double() const noexcept {
bool sign; uint32_t e48; uint64_t f48;
unpack_parts(sign, e48, f48);
if (e48 == 0) {
return 0.0;
}

uint64_t exp_d = static_cast<uint64_t>(e48) - 129 + 1023;
uint64_t mant_d = (f48 & MASK39) << (52 - 39);


uint64_t bits = 0;
bits |= (uint64_t)(sign ? 1ull : 0ull) << 63;
bits |= (exp_d & 0x7FFull) << 52;
bits |= mant_d & ((uint64_t(1) << 52) - 1);

double out;
std::memcpy(&out, &bits, sizeof(out));
return out;
}

Real48::operator float() const {
bool sign; uint32_t e48; uint64_t f48;
unpack_parts(sign, e48, f48);
if (e48 == 0) {
return 0.0f;
}

int32_t exp_f = static_cast<int32_t>(e48) - 2;
if (exp_f <= 0) {
throw std::overflow_error("Real48: value too small for float (would be denormal/zero)");
}
if (exp_f >= 255) {
throw std::overflow_error("Real48: exponent overflow converting to float");
}

const int shift = 39 - 23;
uint64_t add = uint64_t(1) << (shift - 1);
uint32_t mant_f = static_cast<uint32_t>((f48 + add) >> shift);

if (mant_f == (1u << 23)) {
mant_f = 0;
++exp_f;
if (exp_f >= 255) {
throw std::overflow_error("Real48: exponent overflow converting to float after rounding");
}
}

uint32_t bits = 0;
bits |= (sign ? 1u : 0u) << 31;
bits |= (static_cast<uint32_t>(exp_f) & 0xFFu) << 23;
bits |= mant_f & ((1u << 23) - 1);

float out;
std::memcpy(&out, &bits, sizeof(out));
return out;
}

Real48& Real48::operator+=(const Real48& b) {
double a_d = static_cast<double>(*this);
double b_d = static_cast<double>(b);
double res = a_d + b_d;
Real48 r(res);
*this = r;
return *this;
}
Real48& Real48::operator-=(const Real48& b) {
double a_d = static_cast<double>(*this);
double b_d = static_cast<double>(b);
double res = a_d - b_d;
Real48 r(res);
*this = r;
return *this;
}
Real48& Real48::operator*=(const Real48& b) {
double a_d = static_cast<double>(*this);
double b_d = static_cast<double>(b);
double res = a_d * b_d;
Real48 r(res);
*this = r;
return *this;
}
Real48& Real48::operator/=(const Real48& b) {
double a_d = static_cast<double>(*this);
double b_d = static_cast<double>(b);
double res = a_d / b_d;
Real48 r(res);
*this = r;
return *this;
}

Real48 Real48::operator+() const noexcept {
return *this;
}

Real48 Real48::operator-() const noexcept {
Real48 r = *this;
r._b[5] ^= 0x80;
return r;
}

Real48 Real48::operator+(const Real48& o) const {
Real48 r = *this;
r += o;
return r;
}
Real48 Real48::operator-(const Real48& o) const {
Real48 r = *this;
r -= o;
return r;
}
Real48 Real48::operator*(const Real48& o) const {
Real48 r = *this;
r *= o;
return r;
}
Real48 Real48::operator/(const Real48& o) const {
Real48 r = *this;
r /= o;
return r;
}

bool Real48::operator>(const Real48& o) const noexcept {
return static_cast<double>(*this) > static_cast<double>(o);
}
bool Real48::operator<(const Real48& o) const noexcept {
return static_cast<double>(*this) < static_cast<double>(o);
}

} // namespace math
73 changes: 68 additions & 5 deletions real48.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
#include <cstdint>
#include <cstring>
#include <cmath>
#include <stdexcept>

namespace math
{

class Real48
{
public:
// constructors
constexpr Real48(); // TODO: add definition
constexpr Real48();
Real48(const float number);
Real48(const double number);
constexpr Real48(const Real48& o) = default;
Expand Down Expand Up @@ -42,12 +47,70 @@ class Real48
Class Classify() const noexcept;

// limits
consteval static Real48 min(); // TODO: add definition
consteval static Real48 max(); // TODO: add definition
consteval static Real48 epsilon(); // TODO: add definition
static constexpr Real48 min();
static constexpr Real48 max();
static constexpr Real48 epsilon();

private:
// TODO: add members
uint8_t _b[6];

static constexpr uint64_t MASK39 = (uint64_t(1) << 39) - 1;

constexpr explicit Real48(const uint8_t bytes[6]);
static constexpr Real48 fromParts(bool sign, uint32_t exp, uint64_t mantissa);

void unpack_parts(bool &sign, uint32_t &exp, uint64_t &mantissa) const noexcept;
};

constexpr Real48::Real48() : _b{0,0,0,0,0,0} {}

constexpr Real48::Real48(const uint8_t bytes[6]) : _b{bytes[0],bytes[1],bytes[2],bytes[3],bytes[4],bytes[5]} {}

constexpr Real48 Real48::fromParts(bool sign, uint32_t exp, uint64_t mantissa) {
uint64_t word = 0;
word |= (uint64_t)(exp & 0xFF);
word |= (mantissa & MASK39) << 8;
word |= (uint64_t)(sign ? 1ull : 0ull) << 47;
uint8_t bytes[6] = {
static_cast<uint8_t>(word & 0xFF),
static_cast<uint8_t>((word >> 8) & 0xFF),
static_cast<uint8_t>((word >> 16) & 0xFF),
static_cast<uint8_t>((word >> 24) & 0xFF),
static_cast<uint8_t>((word >> 32) & 0xFF),
static_cast<uint8_t>((word >> 40) & 0xFF)
};
return Real48(bytes);
}

constexpr Real48 Real48::min() {
return fromParts(false, 1u, 0u);
}

constexpr Real48 Real48::max() {
return fromParts(false, 255u, MASK39);
}

constexpr Real48 Real48::epsilon() {
return fromParts(false, 90u, 0u);
}

inline void Real48::unpack_parts(bool &sign, uint32_t &exp, uint64_t &mantissa) const noexcept {
uint64_t word = 0;
word |= uint64_t(_b[0]);
word |= uint64_t(_b[1]) << 8;
word |= uint64_t(_b[2]) << 16;
word |= uint64_t(_b[3]) << 24;
word |= uint64_t(_b[4]) << 32;
word |= uint64_t(_b[5]) << 40;
exp = static_cast<uint32_t>(word & 0xFF);
mantissa = (word >> 8) & MASK39;
sign = ((word >> 47) & 0x1) != 0;
}

inline Real48::Class Real48::Classify() const noexcept {
bool s; uint32_t e; uint64_t m;
unpack_parts(s,e,m);
return (e == 0) ? Class::ZERO : Class::NORMAL;
}

} // namespace math