620 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			620 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
<!DOCTYPE html>
 | 
						|
<html><head>
 | 
						|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
						|
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
 | 
						|
<link href="sqlite.css" rel="stylesheet">
 | 
						|
<title>35% Faster Than The Filesystem</title>
 | 
						|
<!-- path= -->
 | 
						|
</head>
 | 
						|
<body>
 | 
						|
<div class=nosearch>
 | 
						|
<a href="index.html">
 | 
						|
<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
 | 
						|
</a>
 | 
						|
<div><!-- IE hack to prevent disappearing logo --></div>
 | 
						|
<div class="tagline desktoponly">
 | 
						|
Small. Fast. Reliable.<br>Choose any three.
 | 
						|
</div>
 | 
						|
<div class="menu mainmenu">
 | 
						|
<ul>
 | 
						|
<li><a href="index.html">Home</a>
 | 
						|
<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
 | 
						|
<li class='wideonly'><a href='about.html'>About</a>
 | 
						|
<li class='desktoponly'><a href="docs.html">Documentation</a>
 | 
						|
<li class='desktoponly'><a href="download.html">Download</a>
 | 
						|
<li class='wideonly'><a href='copyright.html'>License</a>
 | 
						|
<li class='desktoponly'><a href="support.html">Support</a>
 | 
						|
<li class='desktoponly'><a href="prosupport.html">Purchase</a>
 | 
						|
<li class='search' id='search_menubutton'>
 | 
						|
<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
 | 
						|
</ul>
 | 
						|
</div>
 | 
						|
<div class="menu submenu" id="submenu">
 | 
						|
<ul>
 | 
						|
<li><a href='about.html'>About</a>
 | 
						|
<li><a href='docs.html'>Documentation</a>
 | 
						|
<li><a href='download.html'>Download</a>
 | 
						|
<li><a href='support.html'>Support</a>
 | 
						|
<li><a href='prosupport.html'>Purchase</a>
 | 
						|
</ul>
 | 
						|
</div>
 | 
						|
<div class="searchmenu" id="searchmenu">
 | 
						|
<form method="GET" action="search">
 | 
						|
<select name="s" id="searchtype">
 | 
						|
<option value="d">Search Documentation</option>
 | 
						|
<option value="c">Search Changelog</option>
 | 
						|
</select>
 | 
						|
<input type="text" name="q" id="searchbox" value="">
 | 
						|
<input type="submit" value="Go">
 | 
						|
</form>
 | 
						|
</div>
 | 
						|
</div>
 | 
						|
<script>
 | 
						|
function toggle_div(nm) {
 | 
						|
var w = document.getElementById(nm);
 | 
						|
if( w.style.display=="block" ){
 | 
						|
w.style.display = "none";
 | 
						|
}else{
 | 
						|
w.style.display = "block";
 | 
						|
}
 | 
						|
}
 | 
						|
function toggle_search() {
 | 
						|
var w = document.getElementById("searchmenu");
 | 
						|
if( w.style.display=="block" ){
 | 
						|
w.style.display = "none";
 | 
						|
} else {
 | 
						|
w.style.display = "block";
 | 
						|
setTimeout(function(){
 | 
						|
document.getElementById("searchbox").focus()
 | 
						|
}, 30);
 | 
						|
}
 | 
						|
}
 | 
						|
function div_off(nm){document.getElementById(nm).style.display="none";}
 | 
						|
window.onbeforeunload = function(e){div_off("submenu");}
 | 
						|
/* Disable the Search feature if we are not operating from CGI, since */
 | 
						|
/* Search is accomplished using CGI and will not work without it. */
 | 
						|
if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
 | 
						|
document.getElementById("search_menubutton").style.display = "none";
 | 
						|
}
 | 
						|
/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
 | 
						|
function hideorshow(btn,obj){
 | 
						|
var x = document.getElementById(obj);
 | 
						|
var b = document.getElementById(btn);
 | 
						|
if( x.style.display!='none' ){
 | 
						|
x.style.display = 'none';
 | 
						|
b.innerHTML='show';
 | 
						|
}else{
 | 
						|
x.style.display = '';
 | 
						|
b.innerHTML='hide';
 | 
						|
}
 | 
						|
return false;
 | 
						|
}
 | 
						|
</script>
 | 
						|
</div>
 | 
						|
<div class=fancy>
 | 
						|
<div class=nosearch>
 | 
						|
<div class="fancy_title">
 | 
						|
35% Faster Than The Filesystem
 | 
						|
</div>
 | 
						|
<div class="fancy_toc">
 | 
						|
<a onclick="toggle_toc()">
 | 
						|
<span class="fancy_toc_mark" id="toc_mk">►</span>
 | 
						|
Table Of Contents
 | 
						|
</a>
 | 
						|
<div id="toc_sub"><div class="fancy-toc1"><a href="#summary">1. Summary</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#caveats">1.1. Caveats</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#related_studies">1.2. Related Studies</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#how_these_measurements_are_made">2. How These Measurements Are Made</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#read_performance_measurements">2.1. Read Performance Measurements</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#write_performance_measurements">2.2. Write Performance Measurements</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#variations">2.3. Variations</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#general_findings">3. General Findings</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#additional_notes">4. Additional Notes</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#compiling_and_testing_on_android">4.1. Compiling And Testing on Android</a></div>
 | 
						|
</div>
 | 
						|
</div>
 | 
						|
<script>
 | 
						|
function toggle_toc(){
 | 
						|
var sub = document.getElementById("toc_sub")
 | 
						|
var mk = document.getElementById("toc_mk")
 | 
						|
if( sub.style.display!="block" ){
 | 
						|
sub.style.display = "block";
 | 
						|
mk.innerHTML = "▼";
 | 
						|
} else {
 | 
						|
sub.style.display = "none";
 | 
						|
mk.innerHTML = "►";
 | 
						|
}
 | 
						|
}
 | 
						|
</script>
 | 
						|
</div>
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
<h1 id="summary"><span>1. </span>Summary</h1>
 | 
						|
 | 
						|
<p>SQLite reads and writes small blobs (for example, thumbnail images)
 | 
						|
<a href="#approx">35% faster¹</a> than the same blobs
 | 
						|
can be read from or written to individual files on disk using
 | 
						|
fread() or fwrite().
 | 
						|
 | 
						|
</p><p>Furthermore, a single SQLite database holding
 | 
						|
10-kilobyte blobs uses about 20% less disk space than
 | 
						|
storing the blobs in individual files.
 | 
						|
 | 
						|
</p><p>The performance difference arises (we believe) because when
 | 
						|
working from an SQLite database, the open() and close() system calls
 | 
						|
are invoked only once, whereas
 | 
						|
open() and close() are invoked once for each blob
 | 
						|
when using blobs stored in individual files.  It appears that the
 | 
						|
overhead of calling open() and close() is greater than the overhead
 | 
						|
of using the database.  The size reduction arises from the fact that
 | 
						|
individual files are padded out to the next multiple of the filesystem
 | 
						|
block size, whereas the blobs are packed more tightly into an SQLite
 | 
						|
database.
 | 
						|
 | 
						|
</p><p>
 | 
						|
The measurements in this article were made during the week of 2017-06-05
 | 
						|
using a version of SQLite in between 3.19.2 and 3.20.0.  You may expect
 | 
						|
future versions of SQLite to perform even better.
 | 
						|
 | 
						|
</p><h2 id="caveats"><span>1.1. </span>Caveats</h2>
 | 
						|
 | 
						|
<a name="approx"></a>
 | 
						|
<p>
 | 
						|
¹The 35% figure above is approximate.  Actual timings vary
 | 
						|
depending on hardware, operating system, and the
 | 
						|
details of the experiment, and due to random performance fluctuations
 | 
						|
on real-world hardware.  See the text below for more detail.
 | 
						|
Try the experiments yourself.  Report significant deviations to
 | 
						|
the <a href="support.html#mailinglists">mailing lists</a>.
 | 
						|
</p>
 | 
						|
 | 
						|
<p>
 | 
						|
The 35% figure is based on running tests on every machine
 | 
						|
that the author has easily at hand.
 | 
						|
Some reviewers of this article report that SQLite has higher 
 | 
						|
latency than direct I/O on their systems.  We do not yet understand
 | 
						|
the difference.  We also see indications that SQLite does not
 | 
						|
perform as well as direct I/O when experiments are run using
 | 
						|
a cold filesystem cache.
 | 
						|
 | 
						|
</p><p>
 | 
						|
So let your take-away be this: read/write latency for
 | 
						|
SQLite is competitive with read/write latency of individual files on
 | 
						|
disk.  Often SQLite is faster.  Sometimes SQLite is almost
 | 
						|
as fast.  Either way, this article disproves the common
 | 
						|
assumption that a relational database must be slower than direct
 | 
						|
filesystem I/O.
 | 
						|
 | 
						|
</p><h2 id="related_studies"><span>1.2. </span>Related Studies</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
<a href="https://www.microsoft.com/en-us/research/people/gray/">Jim Gray</a>
 | 
						|
and others studied the read performance of BLOBs
 | 
						|
versus file I/O for Microsoft SQL Server and found that reading BLOBs 
 | 
						|
out of the 
 | 
						|
database was faster for BLOB sizes less than between 250KiB and 1MiB.
 | 
						|
(<a href="https://www.microsoft.com/en-us/research/publication/to-blob-or-not-to-blob-large-object-storage-in-a-database-or-a-filesystem/">Paper</a>).
 | 
						|
In that study, the database still stores the filename of the content even
 | 
						|
if the content is held in a separate file.  So the database is consulted
 | 
						|
for every BLOB, even if it is only to extract the filename.  In this
 | 
						|
article, the key for the BLOB is the filename, so no preliminary database
 | 
						|
access is required.  Because the database is never used at all when
 | 
						|
reading content from individual files in this article, the threshold
 | 
						|
at which direct file I/O becomes faster is smaller than it is in Gray's
 | 
						|
paper.
 | 
						|
 | 
						|
</p><p>
 | 
						|
The <a href="intern-v-extern-blob.html">Internal Versus External BLOBs</a> article on this website is an
 | 
						|
earlier investigation (circa 2011) that uses the same approach as the
 | 
						|
Jim Gray paper — storing the blob filenames as entries in the
 | 
						|
database — but for SQLite instead of SQL Server.
 | 
						|
 | 
						|
 | 
						|
 | 
						|
</p><h1 id="how_these_measurements_are_made"><span>2. </span>How These Measurements Are Made</h1>
 | 
						|
 | 
						|
<p>I/O performance is measured using the
 | 
						|
<a href="https://www.sqlite.org/src/file/test/kvtest.c">kvtest.c</a> program
 | 
						|
from the SQLite source tree.
 | 
						|
To compile this test program, first gather the kvtest.c source file
 | 
						|
into a directory with the <a href="amalgamation.html">SQLite amalgamation</a> source
 | 
						|
files "sqlite3.c" and "sqlite3.h".  Then on unix, run a command like
 | 
						|
the following:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>gcc -Os -I. -DSQLITE_DIRECT_OVERFLOW_READ \
 | 
						|
  kvtest.c sqlite3.c -o kvtest -ldl -lpthread
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>Or on Windows with MSVC:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>cl -I. -DSQLITE_DIRECT_OVERFLOW_READ kvtest.c sqlite3.c
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>Instructions for compiling for Android
 | 
						|
are <a href="#compile-android">shown below</a>.
 | 
						|
 | 
						|
</p><p>
 | 
						|
Use the resulting "kvtest" program to
 | 
						|
generate a test database with 100,000 random uncompressible
 | 
						|
blobs, each with a random
 | 
						|
size between 8,000 and 12,000 bytes
 | 
						|
using a command like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>./kvtest init test1.db --count 100k --size 10k --variance 2k
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
If desired, you can verify the new database by running this command:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>./kvtest stat test1.db
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
Next, make copies of all the blobs into individual files in a directory
 | 
						|
using a command like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>./kvtest export test1.db test1.dir
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
At this point, you can measure the amount of disk space used by
 | 
						|
the test1.db database and the space used by the test1.dir directory
 | 
						|
and all of its content.  On a standard Ubuntu Linux desktop, the
 | 
						|
database file will be 1,024,512,000 bytes in size and the test1.dir
 | 
						|
directory will use 1,228,800,000 bytes of space (according to "du -k"),
 | 
						|
about 20% more than the database.
 | 
						|
 | 
						|
</p><p>
 | 
						|
The "test1.dir" directory created above puts all the blobs into a single
 | 
						|
folder.  It was conjectured that some operating systems would perform 
 | 
						|
poorly when a single directory contains 100,000 objects.  To test this,
 | 
						|
the kvtest program can also store the blobs in a hierarchy of folders with no
 | 
						|
more than 100 files and/or subdirectories per folder.  The alternative
 | 
						|
on-disk representation of the blobs can be created using the --tree
 | 
						|
command-line option to the "export" command, like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>./kvtest export test1.db test1.tree --tree
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
The test1.dir directory will contain 100,000 files
 | 
						|
with names like "000000", "000001", "000002" and so forth but the
 | 
						|
test1.tree directory will contain the same files in subdirectories like
 | 
						|
"00/00/00", "00/00/01", and so on.  The test1.dir and test1.test
 | 
						|
directories take up approximately the same amount of space, though
 | 
						|
test1.test is very slightly larger due to the extra directory entries.
 | 
						|
 | 
						|
</p><p>
 | 
						|
All of the experiments that follow operate the same with either 
 | 
						|
"test1.dir" or "test1.tree".  Very little performance difference is
 | 
						|
measured in either case, regardless of operating system.
 | 
						|
 | 
						|
</p><p>
 | 
						|
Measure the performance for reading blobs from the database and from
 | 
						|
individual files using these commands:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>./kvtest run test1.db --count 100k --blob-api
 | 
						|
./kvtest run test1.dir --count 100k --blob-api
 | 
						|
./kvtest run test1.tree --count 100k --blob-api
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
Depending on your hardware and operating system, you should see that reads 
 | 
						|
from the test1.db database file are about 35% faster than reads from 
 | 
						|
individual files in the test1.dir or test1.tree folders.  Results can vary
 | 
						|
significantly from one run to the next due to caching, so it is advisable
 | 
						|
to run tests multiple times and take an average or a worst case or a best
 | 
						|
case, depending on your requirements.
 | 
						|
 | 
						|
</p><p>The --blob-api option on the database read test causes kvtest to use
 | 
						|
the <a href="c3ref/blob_read.html">sqlite3_blob_read()</a> feature of SQLite to load the content of the
 | 
						|
blobs, rather than running pure SQL statements.  This helps SQLite to run
 | 
						|
a little faster on read tests.  You can omit that option to compare the
 | 
						|
performance of SQLite running SQL statements.
 | 
						|
In that case, the SQLite still out-performs direct reads, though
 | 
						|
by not as much as when using <a href="c3ref/blob_read.html">sqlite3_blob_read()</a>.
 | 
						|
The --blob-api option is ignored for tests that read from individual disk
 | 
						|
files.
 | 
						|
 | 
						|
</p><p>
 | 
						|
Measure write performance by adding the --update option.  This causes
 | 
						|
the blobs are overwritten in place with another random blob of
 | 
						|
exactly the same size.
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>./kvtest run test1.db --count 100k --update
 | 
						|
./kvtest run test1.dir --count 100k --update
 | 
						|
./kvtest run test1.tree --count 100k --update
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
The writing test above is not completely fair, since SQLite is doing
 | 
						|
<a href="transactional.html">power-safe transactions</a> whereas the direct-to-disk writing is not.
 | 
						|
To put the tests on a more equal footing, add either the --nosync
 | 
						|
option to the SQLite writes to disable calling fsync() or
 | 
						|
FlushFileBuffers() to force content to disk, or using the --fsync option
 | 
						|
for the direct-to-disk tests to force them to invoke fsync() or
 | 
						|
FlushFileBuffers() when updating disk files.
 | 
						|
 | 
						|
</p><p>
 | 
						|
By default, kvtest runs the database I/O measurements all within
 | 
						|
a single transaction.  Use the --multitrans option to run each blob
 | 
						|
read or write in a separate transaction.  The --multitrans option makes
 | 
						|
SQLite much slower, and uncompetitive with direct disk I/O.  This
 | 
						|
option proves, yet again, that to get the most performance out of
 | 
						|
SQLite, you should group as much database interaction as possible within
 | 
						|
a single transaction.
 | 
						|
 | 
						|
</p><p>
 | 
						|
There are many other testing options, which can be seen by running
 | 
						|
the command:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>./kvtest help
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<h2 id="read_performance_measurements"><span>2.1. </span>Read Performance Measurements</h2>
 | 
						|
 | 
						|
<p>The chart below shows data collected using 
 | 
						|
<a href="https://www.sqlite.org/src/file/test/kvtest.c">kvtest.c</a> on five different
 | 
						|
systems:
 | 
						|
 | 
						|
</p><ul>
 | 
						|
<li><b>Win7</b>: A circa-2009 Dell Inspiron laptop, Pentium dual-core
 | 
						|
    at 2.30GHz, 4GiB RAM, Windows7.
 | 
						|
</li><li><b>Win10</b>: A 2016 Lenovo YOGA 910, Intel i7-7500 at 2.70GHz,
 | 
						|
    16GiB RAM, Windows10.
 | 
						|
</li><li><b>Mac</b>: A 2015 MacBook Pro, 3.1GHz intel Core i7, 16GiB RAM,
 | 
						|
    MacOS 10.12.5
 | 
						|
</li><li><b>Ubuntu</b>: Desktop built from Intel i7-4770K at 3.50GHz, 32GiB RAM,
 | 
						|
    Ubuntu 16.04.2 LTS
 | 
						|
</li><li><b>Android</b>: Galaxy S3, ARMv7, 2GiB RAM
 | 
						|
</li></ul>
 | 
						|
 | 
						|
<p>All machines use SSD except Win7 which has a
 | 
						|
hard-drive. The test database is 100K blobs with sizes uniformly
 | 
						|
distributed between 8K and 12K, for a total of about 1 gigabyte
 | 
						|
of content.  The database page size
 | 
						|
is 4KiB.  The -DSQLITE_DIRECT_OVERFLOW_READ compile-time option was
 | 
						|
used for all of these tests.
 | 
						|
Tests were run multiple times.
 | 
						|
The first run was used to warm up the cache and its timings were discarded.
 | 
						|
 | 
						|
</p><p>
 | 
						|
The chart below shows average time to read a blob directly from the
 | 
						|
filesystem versus the time needed to read the same blob from the SQLite 
 | 
						|
database.
 | 
						|
The actual timings vary considerably from one system to another 
 | 
						|
(the Ubuntu desktop is much
 | 
						|
faster than the Galaxy S3 phone, for example).  
 | 
						|
This chart shows the ratio of the
 | 
						|
times needed to read blobs from a file divided by the time needed to
 | 
						|
from the database.  The left-most column in the chart is the normalized
 | 
						|
time to read from the database, for reference.
 | 
						|
 | 
						|
</p><p>
 | 
						|
In this chart, an SQL statement ("SELECT v FROM kv WHERE k=?1") 
 | 
						|
is prepared once.  Then for each blob, the blob key value is bound 
 | 
						|
to the ?1 parameter and the statement is evaluated to extract the
 | 
						|
blob content.
 | 
						|
 | 
						|
</p><p>
 | 
						|
The chart shows that on Windows10, content can be read from the SQLite
 | 
						|
database about 5 times faster than it can be read directly from disk.
 | 
						|
On Android, SQLite is only about 35% faster than reading from disk.
 | 
						|
 | 
						|
</p><center>
 | 
						|
<div class="'imgcontainer'">
 | 
						|
<img src="images/faster-read-sql.jpg">
 | 
						|
</div>
 | 
						|
<br>
 | 
						|
Chart 1:  SQLite read latency relative to direct filesystem reads.<br>
 | 
						|
100K blobs, avg 10KB each, random order using SQL
 | 
						|
</center>
 | 
						|
 | 
						|
<p>
 | 
						|
The performance can be improved slightly by bypassing the SQL layer
 | 
						|
and reading the blob content directly using the
 | 
						|
<a href="c3ref/blob_read.html">sqlite3_blob_read()</a> interface, as shown in the next chart:
 | 
						|
 | 
						|
</p><center>
 | 
						|
<div class="'imgcontainer'">
 | 
						|
<img src="images/faster-read-blobapi.jpg">
 | 
						|
</div>
 | 
						|
<br>
 | 
						|
Chart 2:  SQLite read latency relative to direct filesystem reads.<br>
 | 
						|
100K blobs, avg size 10KB, random order<br>
 | 
						|
using sqlite3_blob_read().
 | 
						|
</center>
 | 
						|
 | 
						|
<p>
 | 
						|
Further performance improves can be made by using the
 | 
						|
<a href="mmap.html">memory-mapped I/O</a> feature of SQLite.  In the next chart, the
 | 
						|
entire 1GB database file is memory mapped and blobs are read
 | 
						|
(in random order) using the <a href="c3ref/blob_read.html">sqlite3_blob_read()</a> interface.
 | 
						|
With these optimizations, SQLite is twice as fast as Android
 | 
						|
or MacOS-X and over 10 times faster than Windows.
 | 
						|
 | 
						|
</p><center>
 | 
						|
<div class="'imgcontainer'">
 | 
						|
<img src="images/faster-read-mmap.jpg">
 | 
						|
</div>
 | 
						|
<br>
 | 
						|
Chart 3:  SQLite read latency relative to direct filesystem reads.<br>
 | 
						|
100K blobs, avg size 10KB, random order<br>
 | 
						|
using sqlite3_blob_read() from a memory-mapped database.
 | 
						|
</center>
 | 
						|
 | 
						|
<p>
 | 
						|
The third chart shows that reading blob content out of SQLite can be
 | 
						|
twice as fast as reading from individual files on disk for Mac and
 | 
						|
Android, and an amazing ten times faster for Windows.
 | 
						|
 | 
						|
</p><h2 id="write_performance_measurements"><span>2.2. </span>Write Performance Measurements</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
Writes are slower.
 | 
						|
On all systems, using both direct I/O and SQLite, write performance is
 | 
						|
between 5 and 15 times slower than reads.
 | 
						|
 | 
						|
</p><p>
 | 
						|
Write performance measurements were made by replacing (overwriting)
 | 
						|
an entire blob with a different blob.  All of the blobs in these
 | 
						|
experiment are random and incompressible.  Because writes are so much
 | 
						|
slower than reads, only 10,000 of the 100,000 blobs in the database
 | 
						|
are replaced.  The blobs to be replaced are selected at random and
 | 
						|
are in no particular order.
 | 
						|
 | 
						|
</p><p>
 | 
						|
The direct-to-disk writes are accomplished using fopen()/fwrite()/fclose().
 | 
						|
By default, and in all the results shown below, the OS filesystem buffers are
 | 
						|
never flushed to persistent storage using fsync() or
 | 
						|
FlushFileBuffers().  In other words, there is no attempt to make the
 | 
						|
direct-to-disk writes transactional or power-safe.
 | 
						|
We found that invoking fsync() or FlushFileBuffers() on each file
 | 
						|
written causes direct-to-disk storage
 | 
						|
to be about 10 times or more slower than writes to SQLite.
 | 
						|
 | 
						|
</p><p>
 | 
						|
The next chart compares SQLite database updates in <a href="wal.html">WAL mode</a>
 | 
						|
against raw direct-to-disk overwrites of separate files on disk.
 | 
						|
The <a href="pragma.html#pragma_synchronous">PRAGMA synchronous</a> setting is NORMAL.
 | 
						|
All database writes are in a single transaction.
 | 
						|
The timer for the database writes is stopped after the transaction
 | 
						|
commits, but before a <a href="wal.html#ckpt">checkpoint</a> is run.
 | 
						|
Note that the SQLite writes, unlike the direct-to-disk writes,
 | 
						|
are <a href="transactional.html">transactional</a> and <a href="transactional.html">power-safe</a>, though because the synchronous
 | 
						|
setting is NORMAL instead of FULL, the transactions are not durable.
 | 
						|
 | 
						|
</p><center>
 | 
						|
<div class="'imgcontainer'">
 | 
						|
<img src="images/faster-write-safe.jpg">
 | 
						|
</div>
 | 
						|
<br>
 | 
						|
Chart 4:  SQLite write latency relative to direct filesystem writes.<br>
 | 
						|
10K blobs, avg size 10KB, random order,<br>
 | 
						|
WAL mode with synchronous NORMAL,<br>
 | 
						|
exclusive of checkpoint time
 | 
						|
</center>
 | 
						|
 | 
						|
<p>
 | 
						|
The android performance numbers for the write experiments are omitted
 | 
						|
because the performance tests on the Galaxy S3 are so random.  Two
 | 
						|
consecutive runs of the exact same experiment would give wildly different
 | 
						|
times.  And, to be fair, the performance of SQLite on android is slightly
 | 
						|
slower than writing directly to disk.
 | 
						|
 | 
						|
</p><p>
 | 
						|
The next chart shows the performance of SQLite versus direct-to-disk
 | 
						|
when transactions are disabled (<a href="pragma.html#pragma_journal_mode">PRAGMA journal_mode=OFF</a>)
 | 
						|
and <a href="pragma.html#pragma_synchronous">PRAGMA synchronous</a> is set to OFF.  These settings put SQLite on an
 | 
						|
equal footing with direct-to-disk writes, which is to say they make the
 | 
						|
data prone to corruption due to system crashes and power failures.
 | 
						|
 | 
						|
</p><center>
 | 
						|
<div class="'imgcontainer'">
 | 
						|
<img src="images/faster-write-unsafe.jpg">
 | 
						|
</div>
 | 
						|
<br>
 | 
						|
Chart 5:  SQLite write latency relative to direct filesystem writes.<br>
 | 
						|
10K blobs, avg size 10KB, random order,<br>
 | 
						|
journaling disabled, synchronous OFF.
 | 
						|
</center>
 | 
						|
 | 
						|
<p>
 | 
						|
In all of the write tests, it is important to disable anti-virus software
 | 
						|
prior to running the direct-to-disk performance tests.  We found that
 | 
						|
anti-virus software slows down direct-to-disk by an order of magnitude
 | 
						|
whereas it impacts SQLite writes very little.  This is probably due to the
 | 
						|
fact that direct-to-disk changes thousands of separate files which all need
 | 
						|
to be checked by anti-virus, whereas SQLite writes only changes the single
 | 
						|
database file.
 | 
						|
 | 
						|
</p><h2 id="variations"><span>2.3. </span>Variations</h2>
 | 
						|
 | 
						|
<p>The <a href="compile.html#direct_overflow_read">-DSQLITE_DIRECT_OVERFLOW_READ</a> compile-time option causes SQLite
 | 
						|
to bypass its page cache when reading content from overflow pages.  This
 | 
						|
helps database reads of 10K blobs run a little faster, but not all that much
 | 
						|
faster.  SQLite still holds a speed advantage over direct filesystem reads
 | 
						|
without the SQLITE_DIRECT_OVERFLOW_READ compile-time option.
 | 
						|
 | 
						|
</p><p>Other compile-time options such as using -O3 instead of -Os or
 | 
						|
using <a href="compile.html#threadsafe">-DSQLITE_THREADSAFE=0</a> and/or some of the other
 | 
						|
<a href="compile.html#rcmd">recommended compile-time options</a> might help SQLite to run even faster
 | 
						|
relative to direct filesystem reads.
 | 
						|
 | 
						|
</p><p>The size of the blobs in the test data affects performance.
 | 
						|
The filesystem will generally be faster for larger blobs, since
 | 
						|
the overhead of open() and close() is amortized over more bytes of I/O,
 | 
						|
whereas the database will be more efficient in both speed and space
 | 
						|
as the average blob size decreases.
 | 
						|
 | 
						|
 | 
						|
</p><h1 id="general_findings"><span>3. </span>General Findings</h1>
 | 
						|
 | 
						|
<ol type="A">
 | 
						|
<li>
 | 
						|
<p>SQLite is competitive with, and usually faster than, blobs stored in
 | 
						|
separate files on disk, for both reading and writing.
 | 
						|
 | 
						|
</p></li><li>
 | 
						|
<p>SQLite is much faster than direct writes to disk on Windows
 | 
						|
when anti-virus protection is turned on.  Since anti-virus software
 | 
						|
is and should be on by default in Windows, that means that SQLite
 | 
						|
is generally much faster than direct disk writes on Windows.
 | 
						|
 | 
						|
</p></li><li>
 | 
						|
<p>Reading is about an order of magnitude faster than writing, for all
 | 
						|
systems and for both SQLite and direct-to-disk I/O.
 | 
						|
 | 
						|
</p></li><li>
 | 
						|
<p>I/O performance varies widely depending on operating system and hardware.
 | 
						|
Make your own measurements before drawing conclusions.
 | 
						|
 | 
						|
</p></li><li>
 | 
						|
<p>Some other SQL database engines advise developers to store blobs in separate
 | 
						|
files and then store the filename in the database.  In that case, where
 | 
						|
the database must first be consulted to find the filename before opening
 | 
						|
and reading the file, simply storing the entire blob in the database
 | 
						|
gives much faster read and write performance with SQLite.
 | 
						|
See the <a href="intern-v-extern-blob.html">Internal Versus External BLOBs</a> article for more information.
 | 
						|
</p></li></ol>
 | 
						|
 | 
						|
 | 
						|
<h1 id="additional_notes"><span>4. </span>Additional Notes</h1>
 | 
						|
 | 
						|
<a name="compile-android"></a>
 | 
						|
<h2 id="compiling_and_testing_on_android"><span>4.1. </span>Compiling And Testing on Android</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
The kvtest program is compiled and run on Android as follows.
 | 
						|
First install the Android SDK and NDK.  Then prepare a script
 | 
						|
named "android-gcc" that looks approximately like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>#!/bin/sh
 | 
						|
#
 | 
						|
NDK=/home/drh/Android/Sdk/ndk-bundle
 | 
						|
SYSROOT=$NDK/platforms/android-16/arch-arm
 | 
						|
ABIN=$NDK/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin
 | 
						|
GCC=$ABIN/arm-linux-androideabi-gcc
 | 
						|
$GCC --sysroot=$SYSROOT -fPIC -pie $*
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>Make that script executable and put it on your $PATH.  Then
 | 
						|
compile the kvtest program as follows:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>android-gcc -Os -I. kvtest.c sqlite3.c -o kvtest-android
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>Next, move the resulting kvtest-android executable to the Android
 | 
						|
device:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>adb push kvtest-android /data/local/tmp
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>Finally use "adb shell" to get a shell prompt on the Android device,
 | 
						|
cd into the /data/local/tmp directory, and begin running the tests
 | 
						|
as with any other unix host.
 | 
						|
</p>
 |