457 lines
18 KiB
HTML
457 lines
18 KiB
HTML
|
<!DOCTYPE html>
|
||
|
<html><head>
|
||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
|
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
||
|
<link href="sqlite.css" rel="stylesheet">
|
||
|
<title>Floating Pointer Numbers</title>
|
||
|
<!-- path= -->
|
||
|
</head>
|
||
|
<body>
|
||
|
<div class=nosearch>
|
||
|
<a href="index.html">
|
||
|
<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
|
||
|
</a>
|
||
|
<div><!-- IE hack to prevent disappearing logo --></div>
|
||
|
<div class="tagline desktoponly">
|
||
|
Small. Fast. Reliable.<br>Choose any three.
|
||
|
</div>
|
||
|
<div class="menu mainmenu">
|
||
|
<ul>
|
||
|
<li><a href="index.html">Home</a>
|
||
|
<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
|
||
|
<li class='wideonly'><a href='about.html'>About</a>
|
||
|
<li class='desktoponly'><a href="docs.html">Documentation</a>
|
||
|
<li class='desktoponly'><a href="download.html">Download</a>
|
||
|
<li class='wideonly'><a href='copyright.html'>License</a>
|
||
|
<li class='desktoponly'><a href="support.html">Support</a>
|
||
|
<li class='desktoponly'><a href="prosupport.html">Purchase</a>
|
||
|
<li class='search' id='search_menubutton'>
|
||
|
<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
|
||
|
</ul>
|
||
|
</div>
|
||
|
<div class="menu submenu" id="submenu">
|
||
|
<ul>
|
||
|
<li><a href='about.html'>About</a>
|
||
|
<li><a href='docs.html'>Documentation</a>
|
||
|
<li><a href='download.html'>Download</a>
|
||
|
<li><a href='support.html'>Support</a>
|
||
|
<li><a href='prosupport.html'>Purchase</a>
|
||
|
</ul>
|
||
|
</div>
|
||
|
<div class="searchmenu" id="searchmenu">
|
||
|
<form method="GET" action="search">
|
||
|
<select name="s" id="searchtype">
|
||
|
<option value="d">Search Documentation</option>
|
||
|
<option value="c">Search Changelog</option>
|
||
|
</select>
|
||
|
<input type="text" name="q" id="searchbox" value="">
|
||
|
<input type="submit" value="Go">
|
||
|
</form>
|
||
|
</div>
|
||
|
</div>
|
||
|
<script>
|
||
|
function toggle_div(nm) {
|
||
|
var w = document.getElementById(nm);
|
||
|
if( w.style.display=="block" ){
|
||
|
w.style.display = "none";
|
||
|
}else{
|
||
|
w.style.display = "block";
|
||
|
}
|
||
|
}
|
||
|
function toggle_search() {
|
||
|
var w = document.getElementById("searchmenu");
|
||
|
if( w.style.display=="block" ){
|
||
|
w.style.display = "none";
|
||
|
} else {
|
||
|
w.style.display = "block";
|
||
|
setTimeout(function(){
|
||
|
document.getElementById("searchbox").focus()
|
||
|
}, 30);
|
||
|
}
|
||
|
}
|
||
|
function div_off(nm){document.getElementById(nm).style.display="none";}
|
||
|
window.onbeforeunload = function(e){div_off("submenu");}
|
||
|
/* Disable the Search feature if we are not operating from CGI, since */
|
||
|
/* Search is accomplished using CGI and will not work without it. */
|
||
|
if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
|
||
|
document.getElementById("search_menubutton").style.display = "none";
|
||
|
}
|
||
|
/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
|
||
|
function hideorshow(btn,obj){
|
||
|
var x = document.getElementById(obj);
|
||
|
var b = document.getElementById(btn);
|
||
|
if( x.style.display!='none' ){
|
||
|
x.style.display = 'none';
|
||
|
b.innerHTML='show';
|
||
|
}else{
|
||
|
x.style.display = '';
|
||
|
b.innerHTML='hide';
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
</script>
|
||
|
</div>
|
||
|
<div class=fancy>
|
||
|
<div class=nosearch>
|
||
|
<div class="fancy_title">
|
||
|
Floating Pointer Numbers
|
||
|
</div>
|
||
|
<div class="fancy_toc">
|
||
|
<a onclick="toggle_toc()">
|
||
|
<span class="fancy_toc_mark" id="toc_mk">►</span>
|
||
|
Table Of Contents
|
||
|
</a>
|
||
|
<div id="toc_sub"><div class="fancy-toc1"><a href="#how_sqlite_stores_numbers">1. How SQLite Stores Numbers</a></div>
|
||
|
<div class="fancy-toc2"><a href="#floating_point_accuracy">1.1. Floating-Point Accuracy</a></div>
|
||
|
<div class="fancy-toc2"><a href="#floating_point_numbers">1.2. Floating Point Numbers</a></div>
|
||
|
<div class="fancy-toc3"><a href="#unrepresentable_numbers">1.2.1. Unrepresentable numbers</a></div>
|
||
|
<div class="fancy-toc3"><a href="#is_it_close_enough_">1.2.2. Is it close enough?</a></div>
|
||
|
<div class="fancy-toc1"><a href="#extensions_for_dealing_with_floating_point_numbers">2. Extensions For Dealing With Floating Point Numbers</a></div>
|
||
|
<div class="fancy-toc2"><a href="#the_ieee754_c_extension">2.1. The ieee754.c Extension</a></div>
|
||
|
<div class="fancy-toc3"><a href="#the_ieee754_function">2.1.1. The ieee754() function</a></div>
|
||
|
<div class="fancy-toc3"><a href="#the_ieee754_mantissa_and_ieee754_exponent_functions">2.1.2. The ieee754_mantissa() and ieee754_exponent() functions</a></div>
|
||
|
<div class="fancy-toc3"><a href="#the_ieee754_from_blob_and_ieee754_to_blob_functions">2.1.3. The ieee754_from_blob() and ieee754_to_blob() functions</a></div>
|
||
|
<div class="fancy-toc2"><a href="#the_decimal_c_extension">2.2. The decimal.c Extension</a></div>
|
||
|
<div class="fancy-toc1"><a href="#techniques">3. Techniques</a></div>
|
||
|
</div>
|
||
|
</div>
|
||
|
<script>
|
||
|
function toggle_toc(){
|
||
|
var sub = document.getElementById("toc_sub")
|
||
|
var mk = document.getElementById("toc_mk")
|
||
|
if( sub.style.display!="block" ){
|
||
|
sub.style.display = "block";
|
||
|
mk.innerHTML = "▼";
|
||
|
} else {
|
||
|
sub.style.display = "none";
|
||
|
mk.innerHTML = "►";
|
||
|
}
|
||
|
}
|
||
|
</script>
|
||
|
</div>
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
<h1 id="how_sqlite_stores_numbers"><span>1. </span>How SQLite Stores Numbers</h1>
|
||
|
|
||
|
<p>
|
||
|
SQLite stores integer values in the 64-bit
|
||
|
<a href="https://en.wikipedia.org/wiki/Two%27s_complement">twos-complement</a>
|
||
|
format.
|
||
|
This gives a storage range of -9223372036854775808 to +9223372036854775807,
|
||
|
inclusive. Integers within this range are exact.
|
||
|
|
||
|
</p><p>
|
||
|
So-called "REAL" or floating point values are stored in the
|
||
|
<a href="https://en.wikipedia.org/wiki/IEEE_754">IEEE 754</a>
|
||
|
<a href="https://en.wikipedia.org/wiki/Double-precision_floating-point_format">Binary-64</a>
|
||
|
format.
|
||
|
This gives a range of positive values between approximately
|
||
|
1.7976931348623157e+308 and 4.9406564584124654e-324 with an equivalent
|
||
|
range of negative values. A binary64 can also be 0.0 (and -0.0), positive
|
||
|
and negative infinity and "NaN" or "Not-a-Number". Floating point
|
||
|
values are approximate.
|
||
|
|
||
|
</p><p>
|
||
|
Pay close attention to the last sentence in the previous paragraph:
|
||
|
</p><blockquote><b>
|
||
|
Floating point values are approximate ← <u>Always</u> remember this!
|
||
|
</b></blockquote>
|
||
|
|
||
|
<p>
|
||
|
If you need an exact answer, you should not use binary64 floating-point
|
||
|
values, in SQLite or in any other product. This is not an SQLite limitation.
|
||
|
It is a mathematical limitation inherent in the design of floating-point numbers.
|
||
|
|
||
|
</p><h2 id="floating_point_accuracy"><span>1.1. </span>Floating-Point Accuracy</h2>
|
||
|
|
||
|
<p>
|
||
|
SQLite promises to preserve the 15 most significant digits of a floating
|
||
|
point value. However, it makes no guarantees about the accuracy of
|
||
|
computations on floating point values, as no such guarantees are possible.
|
||
|
Performing math on floating-point values introduces error.
|
||
|
For example, consider what happens if you attempt to subtract two floating-point
|
||
|
numbers of similar magnitude:
|
||
|
|
||
|
</p><blockquote>
|
||
|
<table border="0" cellpadding="0" cellspacing="0">
|
||
|
<tr><td align="right">1152693165.1106291898</td></tr>
|
||
|
<tr><td align="right">-1152693165.1106280772</td></tr>
|
||
|
<tr><td><hr>
|
||
|
</td></tr><tr><td align="right">0.0000011126
|
||
|
</td></tr></table>
|
||
|
</blockquote>
|
||
|
|
||
|
<p>The result shown above (0.0000011126) is the correct answer. But if you
|
||
|
do this computation using binary64 floating-point, the answer you get is
|
||
|
0.00000095367431640625 - an error of about 14%. If you do many similar
|
||
|
computations as part of your program, the errors add up so that your final
|
||
|
result might be completely meaningless.
|
||
|
|
||
|
</p><p>The error arises because only about the first 15 significant digits of
|
||
|
each number are stored accurately, and first the difference between the two numbers
|
||
|
being subtracted is in the 16th digit.
|
||
|
|
||
|
</p><h2 id="floating_point_numbers"><span>1.2. </span>Floating Point Numbers</h2>
|
||
|
|
||
|
<p>
|
||
|
The binary64 floating-point format uses 64 bits per number. Hence there
|
||
|
are 1.845e+19 different possible floating point values. On the other hand
|
||
|
there are infinitely many real numbers in the range of
|
||
|
1.7977e+308 and 4.9407e-324. It follows then that binary64 cannot possibly
|
||
|
represent all possible real numbers within that range. Approximations are
|
||
|
required.
|
||
|
|
||
|
</p><p>
|
||
|
An IEEE 754 floating-point value is an integer multiplied by a power
|
||
|
of two:
|
||
|
|
||
|
</p><blockquote>
|
||
|
<big>M × 2<sup><small>E</small></sup></big>
|
||
|
</blockquote>
|
||
|
|
||
|
<p>The M value is the "mantissa" and E is the "exponent". Both
|
||
|
M and E are integers.
|
||
|
|
||
|
</p><p>For Binary64, M is a 53-bit integer and E is an 11-bit integer that
|
||
|
offset so that represents a range of values between -1074 and +972, inclusive.
|
||
|
|
||
|
</p><p><i>(NB: The usual description of IEEE 754 is more complex, and it is important
|
||
|
to understand the added complexity if you really want to appreciate the details,
|
||
|
merits, and limitations of IEEE 754. However, the integer description shown
|
||
|
here, while not exactly right, is easier to understand and is sufficient for
|
||
|
the purposes of this article.)</i></p>
|
||
|
|
||
|
<h3 id="unrepresentable_numbers"><span>1.2.1. </span>Unrepresentable numbers</h3>
|
||
|
|
||
|
<p>Not ever decimal number with fewer than 16 significant digits can be
|
||
|
represented exactly as a binary64 number. In fact, most decimal numbers
|
||
|
with digits to the right of the decimal point lack an exact binary64
|
||
|
equivalent. For example, if you have a database column that is intended
|
||
|
to hold an item price in dollars and cents, the only cents value that
|
||
|
can be exactly represented are 0.00, 0.25, 0.50, and 0.75. Any other
|
||
|
numbers to the right of the decimal point result in an approximation.
|
||
|
If you provide a "price" value of 47.49, that number will be represented
|
||
|
in binary64 as:
|
||
|
|
||
|
</p><blockquote>
|
||
|
6683623321994527 × 2<sup><small>-47</small></sup>
|
||
|
</blockquote>
|
||
|
|
||
|
<p>Which works out to be:
|
||
|
|
||
|
</p><blockquote>
|
||
|
47.49000000000000198951966012828052043914794921875
|
||
|
</blockquote>
|
||
|
|
||
|
<p>That number is very close to 47.49, but it is not exact. It is a little
|
||
|
too big. If we reduce M by one to 6683623321994526 so that we have the
|
||
|
next smaller possible binary64 value, we get:
|
||
|
|
||
|
</p><blockquote>
|
||
|
47.4899999999999948840923025272786617279052734375
|
||
|
</blockquote>
|
||
|
|
||
|
|
||
|
<p>
|
||
|
This second number is too small.
|
||
|
The first number is closer to the desired value of 47.49, so that is the
|
||
|
one that gets used. But it is not exact. Most decimal values work this
|
||
|
way in IEEE 754. Remember the key point we made above:
|
||
|
|
||
|
</p><blockquote><b>
|
||
|
Floating point values are approximate.
|
||
|
</b></blockquote>
|
||
|
|
||
|
<p>If you remember nothing else about floating-point values,
|
||
|
please don't forget this one key idea.
|
||
|
|
||
|
</p><h3 id="is_it_close_enough_"><span>1.2.2. </span>Is it close enough?</h3>
|
||
|
|
||
|
<p>The precision provided by IEEE 754 Binary64 is sufficient for most computations.
|
||
|
For example, if "47.49" represents a price and inflation is running
|
||
|
at 2% per year, then the price is going up by about 0.0000000301 dollars per
|
||
|
second. The error in the recorded value of 47.49 represents about 66 nanoseconds
|
||
|
worth of inflation. So if the 47.49 price is exact
|
||
|
when you enter it, then the effects of inflation will cause the true value to
|
||
|
exactly equal the value actually stored
|
||
|
(47.4900000000000019895196601282805204391479492187) in less than
|
||
|
one ten-millionth of a second.
|
||
|
Surely that level of precision is sufficient for most purposes?
|
||
|
|
||
|
</p><h1 id="extensions_for_dealing_with_floating_point_numbers"><span>2. </span>Extensions For Dealing With Floating Point Numbers</h1>
|
||
|
|
||
|
<a name="ieee754ext"></a>
|
||
|
|
||
|
<h2 id="the_ieee754_c_extension"><span>2.1. </span>The ieee754.c Extension</h2>
|
||
|
|
||
|
<p>The ieee754 extension converts a floating point number between its
|
||
|
binary64 representation and the M×2<sup><small>E</small></sup> format.
|
||
|
In other words in the expression:
|
||
|
|
||
|
</p><blockquote>
|
||
|
<big>F = M × 2<sup><small>E</small></sup></big>
|
||
|
</blockquote>
|
||
|
|
||
|
<p>The ieee754 extension converts between F and (M,E) and back again.
|
||
|
|
||
|
</p><p>The ieee754 extension is not part of the <a href="amalgamation.html">amalgamation</a>, but it is included
|
||
|
by default in the <a href="cli.html">CLI</a>. If you want to include the ieee754 extension in your
|
||
|
application, you will need to compile and load it separately.
|
||
|
|
||
|
<a name="ieee754"></a>
|
||
|
|
||
|
</p><h3 id="the_ieee754_function"><span>2.1.1. </span>The ieee754() function</h3>
|
||
|
|
||
|
<p>The ieee754(F) SQL function takes a single floating-point argument
|
||
|
as its input and returns a string that looks like this:
|
||
|
|
||
|
</p><blockquote>
|
||
|
'ieee754(M,E)'
|
||
|
</blockquote>
|
||
|
|
||
|
<p>Except that the M and E are replaced by the mantissa and exponent of the
|
||
|
floating point number. For example:
|
||
|
|
||
|
</p><div class="codeblock"><pre>sqlite> .mode box
|
||
|
sqlite> SELECT ieee754(47.49) AS x;
|
||
|
┌───────────────────────────────┐
|
||
|
│ x │
|
||
|
├───────────────────────────────┤
|
||
|
│ ieee754(6683623321994527,-47) │
|
||
|
└───────────────────────────────┘
|
||
|
</pre></div>
|
||
|
|
||
|
<p>
|
||
|
Going in the other direction, the 2-argument version of ieee754() takes
|
||
|
the M and E values and converts them into the corresponding F value:
|
||
|
|
||
|
</p><div class="codeblock"><pre>sqlite> select ieee754(6683623321994527,-47) as x;
|
||
|
┌───────┐
|
||
|
│ x │
|
||
|
├───────┤
|
||
|
│ 47.49 │
|
||
|
└───────┘
|
||
|
</pre></div>
|
||
|
|
||
|
<a name="ieee754m"></a>
|
||
|
|
||
|
<h3 id="the_ieee754_mantissa_and_ieee754_exponent_functions"><span>2.1.2. </span>The ieee754_mantissa() and ieee754_exponent() functions</h3>
|
||
|
|
||
|
<p>The text output of the one-argument form of ieee754() is great for human
|
||
|
readability, but it awkward to use as part of a larger expression. Hence
|
||
|
The ieee754_mantissa() and ieee754_exponent() routines were added to return
|
||
|
the M and E values corresponding to their single argument F
|
||
|
value.
|
||
|
For example:
|
||
|
|
||
|
</p><div class="codeblock"><pre>sqlite> .mode box
|
||
|
sqlite> SELECT ieee754_mantissa(47.49) AS M, ieee754_exponent(47.49) AS E;
|
||
|
┌──────────────────┬─────┐
|
||
|
│ M │ E │
|
||
|
├──────────────────┼─────┤
|
||
|
│ 6683623321994527 │ -47 │
|
||
|
└──────────────────┴─────┘
|
||
|
</pre></div>
|
||
|
|
||
|
<a name="ieee754b"></a>
|
||
|
|
||
|
<h3 id="the_ieee754_from_blob_and_ieee754_to_blob_functions"><span>2.1.3. </span>The ieee754_from_blob() and ieee754_to_blob() functions</h3>
|
||
|
|
||
|
<p>The ieee754_to_blob(F) SQL function converts the floating point number F
|
||
|
into an 8-byte BLOB that is the big-endian binary64 encoding of that number.
|
||
|
The ieee754_from_blob(B) function goes the other way, converting an 8-byte
|
||
|
blob into the floating-point value that the binary64 encoding represents.
|
||
|
|
||
|
</p><p>So, for example, if you read
|
||
|
<a href="'https://en.wikipedia.org/wiki/Double-precision_floating-point_format'">on
|
||
|
Wikipedia</a> that the encoding for the minimum positive binary64 value is
|
||
|
0x0000000000000001, then you can find the corresponding floating point value
|
||
|
like this:
|
||
|
|
||
|
</p><div class="codeblock"><pre>sqlite> .mode box
|
||
|
sqlite> SELECT ieee754_from_blob(x'0000000000000001') AS F;
|
||
|
┌───────────────────────┐
|
||
|
│ F │
|
||
|
├───────────────────────┤
|
||
|
│ 4.94065645841247e-324 │
|
||
|
└───────────────────────┘
|
||
|
</pre></div>
|
||
|
|
||
|
<p>Or go the other way:
|
||
|
|
||
|
</p><div class="codeblock"><pre>sqlite> .mode box
|
||
|
sqlite> SELECT quote(ieee754_to_blob(4.94065645841247e-324)) AS binary64;
|
||
|
┌─────────────────────┐
|
||
|
│ binary64 │
|
||
|
├─────────────────────┤
|
||
|
│ X'0000000000000001' │
|
||
|
└─────────────────────┘
|
||
|
</pre></div>
|
||
|
|
||
|
<a name="decext"></a>
|
||
|
|
||
|
<h2 id="the_decimal_c_extension"><span>2.2. </span>The decimal.c Extension</h2>
|
||
|
|
||
|
<p>The decimal extension provides arbitrary-precision decimal arithmetic on
|
||
|
numbers stored as text strings. Because the numbers are stored to arbitrary
|
||
|
precision and as text, no approximations are needed. Computations can be
|
||
|
done exactly.
|
||
|
|
||
|
</p><p>The decimal extension is not (currently) part of the SQLite <a href="amalgamation.html">amalgamation</a>.
|
||
|
However, it is included in the <a href="cli.html">CLI</a>.
|
||
|
|
||
|
</p><p>There are three math functions available:
|
||
|
|
||
|
</p><p>
|
||
|
</p><ul>
|
||
|
<li> decimal_add(A,B)
|
||
|
</li><li> decimal_sub(A,B)
|
||
|
</li><li> decimal_mul(A,B)
|
||
|
</li></ul>
|
||
|
|
||
|
|
||
|
<p>These functions respectively add, subtract, and multiply their arguments
|
||
|
and return a new text string that is the decimal representation of the result.
|
||
|
There is no division operator at this time.
|
||
|
|
||
|
</p><p>Use the decimal_cmp(A,B) to compare two decimal values. The result will
|
||
|
be negative, zero, or positive if A is less than, equal to, or greater than B,
|
||
|
respectively.
|
||
|
|
||
|
</p><p>The decimal_sum(X) function is an aggregate, like the built-in
|
||
|
<a href="lang_aggfunc.html#sumunc">sum() aggregate function</a>, except that decimal_sum() computes its result
|
||
|
to arbitrary precision and is therefore precise.
|
||
|
|
||
|
</p><p>Finally, the decimal extension provides the "decimal" collating sequences
|
||
|
that compares decimal text strings in numeric order.
|
||
|
|
||
|
</p><h1 id="techniques"><span>3. </span>Techniques</h1>
|
||
|
|
||
|
<p>
|
||
|
The following SQL illustrates how to use the ieee754 and decimal
|
||
|
extensions to compute the exact decimal equivalent
|
||
|
for a binary64 floating-point number.
|
||
|
|
||
|
</p><div class="codeblock"><pre>-- The pow2 table will hold all the necessary powers of two.
|
||
|
CREATE TABLE pow2(x INTEGER PRIMARY KEY, v TEXT);
|
||
|
WITH RECURSIVE c(x,v) AS (
|
||
|
VALUES(0,'1')
|
||
|
UNION ALL
|
||
|
SELECT x+1, decimal_mul(v,'2') FROM c WHERE x+1<=971
|
||
|
) INSERT INTO pow2(x,v) SELECT x, v FROM c;
|
||
|
WITH RECURSIVE c(x,v) AS (
|
||
|
VALUES(-1,'0.5')
|
||
|
UNION ALL
|
||
|
SELECT x-1, decimal_mul(v,'0.5') FROM c WHERE x-1>=-1075
|
||
|
) INSERT INTO pow2(x,v) SELECT x, v FROM c;
|
||
|
|
||
|
-- This query finds the decimal representation of each value in the "c" table.
|
||
|
WITH c(n) AS (VALUES(47.49))
|
||
|
----XXXXX----------- Replace with whatever you want
|
||
|
SELECT decimal_mul(ieee754_mantissa(c.n),pow2.v)
|
||
|
FROM pow2, c WHERE pow2.x=ieee754_exponent(c.n);
|
||
|
</pre></div>
|
||
|
|