1013 lines
		
	
	
		
			36 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			1013 lines
		
	
	
		
			36 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
| <!DOCTYPE html>
 | |
| <html><head>
 | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0">
 | |
| <meta http-equiv="content-type" content="text/html; charset=UTF-8">
 | |
| <link href="sqlite.css" rel="stylesheet">
 | |
| <title>Query Planning</title>
 | |
| <!-- path= -->
 | |
| </head>
 | |
| <body>
 | |
| <div class=nosearch>
 | |
| <a href="index.html">
 | |
| <img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
 | |
| </a>
 | |
| <div><!-- IE hack to prevent disappearing logo --></div>
 | |
| <div class="tagline desktoponly">
 | |
| Small. Fast. Reliable.<br>Choose any three.
 | |
| </div>
 | |
| <div class="menu mainmenu">
 | |
| <ul>
 | |
| <li><a href="index.html">Home</a>
 | |
| <li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
 | |
| <li class='wideonly'><a href='about.html'>About</a>
 | |
| <li class='desktoponly'><a href="docs.html">Documentation</a>
 | |
| <li class='desktoponly'><a href="download.html">Download</a>
 | |
| <li class='wideonly'><a href='copyright.html'>License</a>
 | |
| <li class='desktoponly'><a href="support.html">Support</a>
 | |
| <li class='desktoponly'><a href="prosupport.html">Purchase</a>
 | |
| <li class='search' id='search_menubutton'>
 | |
| <a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
 | |
| </ul>
 | |
| </div>
 | |
| <div class="menu submenu" id="submenu">
 | |
| <ul>
 | |
| <li><a href='about.html'>About</a>
 | |
| <li><a href='docs.html'>Documentation</a>
 | |
| <li><a href='download.html'>Download</a>
 | |
| <li><a href='support.html'>Support</a>
 | |
| <li><a href='prosupport.html'>Purchase</a>
 | |
| </ul>
 | |
| </div>
 | |
| <div class="searchmenu" id="searchmenu">
 | |
| <form method="GET" action="search">
 | |
| <select name="s" id="searchtype">
 | |
| <option value="d">Search Documentation</option>
 | |
| <option value="c">Search Changelog</option>
 | |
| </select>
 | |
| <input type="text" name="q" id="searchbox" value="">
 | |
| <input type="submit" value="Go">
 | |
| </form>
 | |
| </div>
 | |
| </div>
 | |
| <script>
 | |
| function toggle_div(nm) {
 | |
| var w = document.getElementById(nm);
 | |
| if( w.style.display=="block" ){
 | |
| w.style.display = "none";
 | |
| }else{
 | |
| w.style.display = "block";
 | |
| }
 | |
| }
 | |
| function toggle_search() {
 | |
| var w = document.getElementById("searchmenu");
 | |
| if( w.style.display=="block" ){
 | |
| w.style.display = "none";
 | |
| } else {
 | |
| w.style.display = "block";
 | |
| setTimeout(function(){
 | |
| document.getElementById("searchbox").focus()
 | |
| }, 30);
 | |
| }
 | |
| }
 | |
| function div_off(nm){document.getElementById(nm).style.display="none";}
 | |
| window.onbeforeunload = function(e){div_off("submenu");}
 | |
| /* Disable the Search feature if we are not operating from CGI, since */
 | |
| /* Search is accomplished using CGI and will not work without it. */
 | |
| if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
 | |
| document.getElementById("search_menubutton").style.display = "none";
 | |
| }
 | |
| /* Used by the Hide/Show button beside syntax diagrams, to toggle the */
 | |
| function hideorshow(btn,obj){
 | |
| var x = document.getElementById(obj);
 | |
| var b = document.getElementById(btn);
 | |
| if( x.style.display!='none' ){
 | |
| x.style.display = 'none';
 | |
| b.innerHTML='show';
 | |
| }else{
 | |
| x.style.display = '';
 | |
| b.innerHTML='hide';
 | |
| }
 | |
| return false;
 | |
| }
 | |
| </script>
 | |
| </div>
 | |
| <div class=fancy>
 | |
| <div class=nosearch>
 | |
| <div class="fancy_title">
 | |
| Query Planning
 | |
| </div>
 | |
| <div class="fancy_toc">
 | |
| <a onclick="toggle_toc()">
 | |
| <span class="fancy_toc_mark" id="toc_mk">►</span>
 | |
| Table Of Contents
 | |
| </a>
 | |
| <div id="toc_sub"><div class="fancy-toc1"><a href="#_searching">1.  Searching</a></div>
 | |
| <div class="fancy-toc2"><a href="#_tables_without_indices">1.1.  Tables Without Indices</a></div>
 | |
| <div class="fancy-toc2"><a href="#_lookup_by_rowid">1.2.  Lookup By Rowid</a></div>
 | |
| <div class="fancy-toc2"><a href="#_lookup_by_index">1.3.  Lookup By Index</a></div>
 | |
| <div class="fancy-toc2"><a href="#_multiple_result_rows">1.4.  Multiple Result Rows</a></div>
 | |
| <div class="fancy-toc2"><a href="#_multiple_and_connected_where_clause_terms">1.5.  Multiple AND-Connected WHERE-Clause Terms</a></div>
 | |
| <div class="fancy-toc2"><a href="#_multi_column_indices">1.6.  Multi-Column Indices</a></div>
 | |
| <div class="fancy-toc2"><a href="#_covering_indices">1.7.  Covering Indices</a></div>
 | |
| <div class="fancy-toc2"><a href="#_or_connected_terms_in_the_where_clause">1.8.  OR-Connected Terms In The WHERE Clause</a></div>
 | |
| <div class="fancy-toc1"><a href="#_sorting">2.  Sorting</a></div>
 | |
| <div class="fancy-toc2"><a href="#_sorting_by_rowid">2.1.  Sorting By Rowid</a></div>
 | |
| <div class="fancy-toc2"><a href="#_sorting_by_index">2.2.  Sorting By Index</a></div>
 | |
| <div class="fancy-toc2"><a href="#_sorting_by_covering_index">2.3.  Sorting By Covering Index</a></div>
 | |
| <div class="fancy-toc1"><a href="#_searching_and_sorting_at_the_same_time">3.  Searching And Sorting At The Same Time</a></div>
 | |
| <div class="fancy-toc2"><a href="#_searching_and_sorting_with_a_multi_column_index">3.1.  Searching And Sorting With A Multi-Column Index</a></div>
 | |
| <div class="fancy-toc2"><a href="#_searching_and_sorting_with_a_covering_index">3.2.  Searching And Sorting With A Covering Index</a></div>
 | |
| <div class="fancy-toc2"><a href="#_partial_sorting_using_an_index_a_k_a_block_sorting_">3.3.  Partial Sorting Using An Index (a.k.a. Block Sorting)</a></div>
 | |
| <div class="fancy-toc1"><a href="#_without_rowid_tables">4.  WITHOUT ROWID tables</a></div>
 | |
| </div>
 | |
| </div>
 | |
| <script>
 | |
| function toggle_toc(){
 | |
| var sub = document.getElementById("toc_sub")
 | |
| var mk = document.getElementById("toc_mk")
 | |
| if( sub.style.display!="block" ){
 | |
| sub.style.display = "block";
 | |
| mk.innerHTML = "▼";
 | |
| } else {
 | |
| sub.style.display = "none";
 | |
| mk.innerHTML = "►";
 | |
| }
 | |
| }
 | |
| </script>
 | |
| </div>
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| <h2 style="margin-left:1.0em" notoc="1" id="overview"> Overview</h2> 
 | |
| 
 | |
| <p>
 | |
| The best feature of SQL (in <u>all</u> its implementations, not just SQLite)
 | |
| is that it is a <i>declarative</i> language, not a <i>procedural</i>
 | |
| language.  When programming in SQL you tell the system <i>what</i> you
 | |
| want to compute, not <i>how</i> to compute it.  The task of figuring out
 | |
| the <i>how</i> is delegated to the <i>query planner</i> subsystem within 
 | |
| the SQL database engine.</p>
 | |
| 
 | |
| <p>For any given SQL statement, there might be hundreds or thousands or
 | |
| even millions of different algorithms of performing the operation.  All
 | |
| of these algorithms will get the correct answer, though some will run
 | |
| faster than others.
 | |
| The query planner is an 
 | |
| <a href="https://en.wikipedia.org/wiki/Artificial_intelligence">AI</a> that 
 | |
| tries to pick the fastest and most efficient algorithm for each SQL
 | |
| statement.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| Most of the time, the query planner in SQLite does a good job.
 | |
| However, the query planner needs indices to
 | |
| work with.  
 | |
| These indices must normally be added by programmers.
 | |
| Rarely, the query planner AI will make a suboptimal algorithm
 | |
| choice.
 | |
| In those cases, programmers may want to provide additional
 | |
| hints to help the query planner do a better job.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| This document provides background information about how the
 | |
| SQLite query planner and query engine work.
 | |
| Programmers can use this information to help create better
 | |
| indexes, and provide hints to help the query planner when
 | |
| needed.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| Additional information is provided in the
 | |
| <a href="optoverview.html">SQLite query planner</a> and 
 | |
| <a href="queryplanner-ng.html">next generation query planner</a> documents.
 | |
| </p>
 | |
| 
 | |
| <a name="searching"></a>
 | |
| 
 | |
| <h1 id="_searching"><span>1. </span> Searching</h1>
 | |
| 
 | |
| <h2 id="_tables_without_indices"><span>1.1. </span> Tables Without Indices</h2>
 | |
| 
 | |
| <p>
 | |
| Most tables in SQLite consist of zero or more rows with a unique integer
 | |
| key (the <a href="lang_createtable.html#rowid">rowid</a> or <a href="lang_createtable.html#rowid">INTEGER PRIMARY KEY</a>) followed by content.  
 | |
| (The exception is <a href="withoutrowid.html">WITHOUT ROWID</a> tables.)
 | |
| The rows
 | |
| are logically stored in order of increasing rowid.  As an example, this
 | |
| article uses a table named "FruitsForSale" which relates various fruits 
 | |
| to the state
 | |
| where they are grown and their unit price at market.  The schema is this:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| CREATE TABLE FruitsForSale(
 | |
|   Fruit TEXT,
 | |
|   State TEXT,
 | |
|   Price REAL
 | |
| );
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| With some (arbitrary) data, such a table might be logically stored on disk
 | |
| as shown in figure 1:
 | |
| </p>
 | |
| 
 | |
| <a name='fig1'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/tab.gif" alt="figure 1"><br>
 | |
| Figure 1: Logical Layout Of Table "FruitsForSale"
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| In this example, the rowids are not
 | |
| consecutive but they are ordered.  SQLite usually creates rowids beginning
 | |
| with one and increasing by one with each added row.  But if rows are 
 | |
| deleted, gaps can appear in the sequence.  And the application can control
 | |
| the rowid assigned if desired, so that rows are not necessarily inserted 
 | |
| at the bottom.  But regardless of what happens, the rowids are always 
 | |
| unique and in strictly ascending order.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| Suppose you want to look up the price of peaches.  The query would
 | |
| be as follows:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM fruitsforsale WHERE fruit='Peach';
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| To satisfy this query, SQLite reads every row out of the
 | |
| table, checks to see if the "fruit" column has the value of "Peach" and if
 | |
| so, outputs the "price" column from that row.  The process is illustrated
 | |
| by <a href="#fig2">figure 2</a> below.
 | |
| This is algorithm is called a <i>full table scan</i> 
 | |
| since the entire content of the
 | |
| table must be read and examined in order to find the one row of interest.
 | |
| With a table of only 7 rows, a full table scan is acceptable, 
 | |
| but if the table contained 7 million rows, a full table scan might read 
 | |
| megabytes of content in order to find a single 8-byte number.  
 | |
| For that reason, one normally tries to avoid full table scans.
 | |
| </p>
 | |
| 
 | |
| <a name='fig2'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/fullscan.gif" alt="figure 2"><br>
 | |
| Figure 2: Full Table Scan
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <h2 id="_lookup_by_rowid"><span>1.2. </span> Lookup By Rowid</h2>
 | |
| 
 | |
| <p>
 | |
| One technique for avoiding a full table scan is to do lookups by
 | |
| rowid (or by the equivalent <a href="lang_createtable.html#rowid">INTEGER PRIMARY KEY</a>).   To lookup the
 | |
| price of peaches, one would query for the entry with a rowid of 4:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM fruitsforsale WHERE rowid=4;
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| Since the information is stored in the table in rowid order, SQLite
 | |
| can find the correct row using a binary search.
 | |
| If the table contains N elements, the time required to look up the
 | |
| desired row is proportional to logN rather than being proportional
 | |
| to N as in a full table scan.  If the table contains 10 million elements,
 | |
| that means the query will be on the order of N/logN or about 1 million
 | |
| times faster.
 | |
| </p>
 | |
| 
 | |
| <a name='fig3'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/rowidlu.gif" alt="figure 3"><br>
 | |
| Figure 3: Lookup By Rowid
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <h2 id="_lookup_by_index"><span>1.3. </span> Lookup By Index</h2>
 | |
| <p>
 | |
| The problem with looking up information by rowid is that you probably
 | |
| do not care what the price of "item 4" is - you want to know the price
 | |
| of peaches.  And so a rowid lookup is not helpful.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| To make the original query more efficient, we can add an index on the
 | |
| "fruit" column of the "fruitsforsale" table like this:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| CREATE INDEX Idx1 ON fruitsforsale(fruit);
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| An index is another table similar to the original "fruitsforsale" table
 | |
| but with the content (the fruit column in this case) stored in front of the
 | |
| rowid and with all rows in content order.
 | |
| <a href="#fig4">Figure 4</a> gives a logical view of the Idx1 index.
 | |
| The "fruit" column is the primary key used to order the elements of the
 | |
| table and the "rowid" is the secondary key used to break the tie when
 | |
| two or more rows have the same "fruit".  In the example, the rowid
 | |
| has to be used as a tie-breaker for the "Orange" rows.
 | |
| Notice that since the rowid
 | |
| is always unique over all elements of the original table, the composite key
 | |
| of "fruit" followed by "rowid" will be unique over all elements of the index.
 | |
| </p>
 | |
| 
 | |
| <a name='fig4'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx1.gif" alt="figure 4"><br>
 | |
| Figure 4: An Index On The Fruit Column
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| This new index can be used to implement a faster algorithm for the
 | |
| original "Price of Peaches" query.
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM fruitsforsale WHERE fruit='Peach';
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| The query starts by doing a binary search on the Idx1 index for entries
 | |
| that have fruit='Peach'.  SQLite can do this binary search on the Idx1 index
 | |
| but not on the original FruitsForSale table because the rows in Idx1 are sorted
 | |
| by the "fruit" column.  Having found a row in the Idx1 index that has
 | |
| fruit='Peach', the database engine can extract the rowid for that row.
 | |
| Then the database engines does a second binary search
 | |
| on the original FruitsForSale table to find the
 | |
| original row that contains fruit='Peach'.  
 | |
| From the row in the FruitsForSale table,
 | |
| SQLite can then extract the value of the price column.
 | |
| This procedure is illustrated by <a href="#fig5">figure 5</a>.
 | |
| </p>
 | |
| 
 | |
| <a name='fig5'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx1lu1.gif" alt="figure 5"><br>
 | |
| Figure 5: Indexed Lookup For The Price Of Peaches
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| SQLite has to do two binary searches to find the price of peaches using
 | |
| the method show above.  But for a table with a large number of rows, this
 | |
| is still much faster than doing a full table scan.
 | |
| </p>
 | |
| 
 | |
| <h2 id="_multiple_result_rows"><span>1.4. </span> Multiple Result Rows</h2>
 | |
| 
 | |
| <p>
 | |
| In the previous query the fruit='Peach' constraint narrowed the result
 | |
| down to a single row.  But the same technique works even if multiple
 | |
| rows are obtained.  Suppose we looked up the price of Oranges instead of
 | |
| Peaches:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM fruitsforsale WHERE fruit='Orange'
 | |
| </pre></table></center>
 | |
| <a name='fig6'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx1lu2.gif" alt="figure 6"><br>
 | |
| Figure 6: Indexed Lookup For The Price Of Oranges
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| In this case, SQLite still does a single binary search to find the first
 | |
| entry of the index where fruit='Orange'.  Then it extracts the rowid from
 | |
| the index and uses that rowid to lookup the original table entry via
 | |
| binary search and output the price from the original table.  But instead
 | |
| of quitting, the database engine then advances to the next row of index
 | |
| to repeat the process for next fruit='Orange' entry.  Advancing to the
 | |
| next row of an index (or table) is much less costly than doing a binary
 | |
| search since the next row is often located on the same database page as
 | |
| the current row.  In fact, the cost of advancing to the next row is so
 | |
| cheap in comparison to a binary search that we usually ignore it.  So
 | |
| our estimate for the total cost of this query is 3 binary searches.
 | |
| If the number of rows of output is K and the number of rows in the table
 | |
| is N, then in general the cost of doing the query is proportional
 | |
| to (K+1)*logN.
 | |
| </p>
 | |
| 
 | |
| <h2 id="_multiple_and_connected_where_clause_terms"><span>1.5. </span> Multiple AND-Connected WHERE-Clause Terms</h2>
 | |
| 
 | |
| <p>
 | |
| Next, suppose that you want to look up the price of not just any orange,
 | |
| but specifically California-grown oranges.  The appropriate query would
 | |
| be as follows:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM fruitsforsale WHERE fruit='Orange' AND state='CA'
 | |
| </pre></table></center>
 | |
| <a name='fig7'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx1lu3.gif" alt="figure 7"><br>
 | |
| Figure 7: Indexed Lookup Of California Oranges
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| One approach to this query is to use the fruit='Orange' term of the WHERE
 | |
| clause to find all rows dealing with oranges, then filter those rows
 | |
| by rejecting any that are from states other than California.  This
 | |
| process is shown by <a href="#fig7">figure 7</a> above.  This is a perfectly
 | |
| reasonable approach in most cases.  Yes, the database engine did have
 | |
| to do an extra binary search for the Florida orange row that was
 | |
| later rejected, so it was not as efficient as we might hope, though
 | |
| for many applications it is efficient enough.  
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| Suppose that in addition to the index on "fruit" there was also
 | |
| an index on "state".
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| CREATE INDEX Idx2 ON fruitsforsale(state);
 | |
| </pre></table></center>
 | |
| <a name='fig8'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx2.gif" alt="figure 8"><br>
 | |
| Figure 8: Index On The State Column
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| The "state" index works just like the "fruit" index in that it is a
 | |
| new table with an extra column in front of the rowid and sorted by
 | |
| that extra column as the primary key.  The only difference is that
 | |
| in Idx2, the first column is "state" instead of "fruit" as it is with
 | |
| Idx1.  In our example data set, there is more redundancy in the "state"
 | |
| column and so they are more duplicate entries.  The ties are still
 | |
| resolved using the rowid.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| Using the new Idx2 index on "state", SQLite has another option for
 | |
| lookup up the price of California oranges:  it can look up every row
 | |
| that contains fruit from California and filter out those rows that
 | |
| are not oranges.
 | |
| </p>
 | |
| 
 | |
| <a name='fig9'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx2lu1.gif" alt="figure 9"><br>
 | |
| Figure 9: Indexed Lookup Of California Oranges
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| Using Idx2 instead of Idx1 causes SQLite to examine a different set of
 | |
| rows, but it gets the same answer in the end (which is very important -
 | |
| remember that indices should never change the answer, only help SQLite to
 | |
| get to the answer more quickly) and it does the same amount of work.
 | |
| So the Idx2 index did not help performance in this case.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| The last two queries take the same amount of time, in our example.
 | |
| So which index, Idx1 or Idx2, will SQLite choose?  If the
 | |
| <a href="lang_analyze.html">ANALYZE</a> command has been run on the database, so that SQLite has
 | |
| had an opportunity to gather statistics about the available indices,
 | |
| then SQLite will know that the Idx1 index usually narrows the search
 | |
| down to a single item (our example of fruit='Orange' is the exception
 | |
| to this rule) whereas the Idx2 index will normally only narrow the 
 | |
| search down to two rows.  So, if all else is equal, SQLite will
 | |
| choose Idx1 with the hope of narrowing the search to as small
 | |
| a number of rows as possible.  This choice is only possible because
 | |
| of the statistics provided by <a href="lang_analyze.html">ANALYZE</a>.  If <a href="lang_analyze.html">ANALYZE</a> has not been
 | |
| run then the choice of which index to use is arbitrary.
 | |
| </p>
 | |
| 
 | |
| <h2 id="_multi_column_indices"><span>1.6. </span> Multi-Column Indices</h2>
 | |
| 
 | |
| <p>
 | |
| To get the maximum performance out of a query with multiple AND-connected
 | |
| terms in the WHERE clause, you really want a multi-column index with
 | |
| columns for each of the AND terms.  In this case we create a new index
 | |
| on the "fruit" and "state" columns of FruitsForSale:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| CREATE INDEX Idx3 ON FruitsForSale(fruit, state);
 | |
| </pre></table></center>
 | |
| <a name='fig10'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx3.gif" alt="figure 1"><br>
 | |
| Figure 1: A Two-Column Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| A multi-column index follows the same pattern as a single-column index;
 | |
| the indexed columns are added in front of the rowid.  The only difference
 | |
| is that now multiple columns are added.  The left-most column is the
 | |
| primary key used for ordering the rows in the index.  The second column is
 | |
| used to break ties in the left-most column.  If there were a third column,
 | |
| it would be used to break ties for the first two columns.  And so forth for
 | |
| all columns in the index.  Because rowid is guaranteed
 | |
| to be unique, every row of the index will be unique even if all of the
 | |
| content columns for two rows are the same.  That case does not happen
 | |
| in our sample data, but there is one case (fruit='Orange') where there
 | |
| is a tie on the first column which must be broken by the second column.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| Given the new multi-column Idx3 index, it is now possible for SQLite
 | |
| to find the price of California oranges using only 2 binary searches:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM fruitsforsale WHERE fruit='Orange' AND state='CA'
 | |
| </pre></table></center>
 | |
| <a name='fig11'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx3lu1.gif" alt="figure 11"><br>
 | |
| Figure 11: Lookup Using A Two-Column Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| With the Idx3 index on both columns that are constrained by the WHERE clause,
 | |
| SQLite can do a single binary search against Idx3 to find the one rowid
 | |
| for California oranges, then do a single binary search to find the price
 | |
| for that item in the original table.  There are no dead-ends and no
 | |
| wasted binary searches.  This is a more efficient query.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| Note that Idx3 contains all the same information as the original 
 | |
| <a href="#fig3">Idx1</a>.  And so if we have Idx3, we do not really need Idx1
 | |
| any more.  The "price of peaches" query can be satisfied using Idx3
 | |
| by simply ignoring the "state" column of Idx3:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM fruitsforsale WHERE fruit='Peach'
 | |
| </pre></table></center>
 | |
| <a name='fig12'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx3lu2.gif" alt="figure 12"><br>
 | |
| Figure 12: Single-Column Lookup On A Multi-Column Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| Hence, a good rule of thumb is that your database schema should never
 | |
| contain two indices where one index is a prefix of the other.  Drop the
 | |
| index with fewer columns.  SQLite will still be able to do efficient
 | |
| lookups with the longer index.
 | |
| </p>
 | |
| 
 | |
| <a name="covidx"></a>
 | |
| 
 | |
| <h2 id="_covering_indices"><span>1.7. </span> Covering Indices</h2>
 | |
| 
 | |
| <p>
 | |
| The "price of California oranges" query was made more efficient through
 | |
| the use of a two-column index.  But SQLite can do even better with a
 | |
| three-column index that also includes the "price" column:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| CREATE INDEX Idx4 ON FruitsForSale(fruit, state, price);
 | |
| </pre></table></center>
 | |
| <a name='fig13'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx4.gif" alt="figure 13"><br>
 | |
| Figure 13: A Covering Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| This new index contains all the columns of the original FruitsForSale table that
 | |
| are used by the query - both the search terms and the output.  We call
 | |
| this a "covering index".  Because all of the information needed is in
 | |
| the covering index, SQLite never needs to consult the original table
 | |
| in order to find the price.
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM fruitsforsale WHERE fruit='Orange' AND state='CA';
 | |
| </pre></table></center>
 | |
| <a name='fig14'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/idx4lu1.gif" alt="figure 14"><br>
 | |
| Figure 14: Query Using A Covering Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| Hence, by adding extra "output" columns onto the end of an index, one
 | |
| can avoid having to reference the original table and thereby
 | |
| cut the number of binary searches for a query in half.  This is a
 | |
| constant-factor improvement in performance (roughly a doubling of
 | |
| the speed).  But on the other hand, it is also just a refinement;
 | |
| A two-fold performance increase is not nearly as dramatic as the
 | |
| one-million-fold increase seen when the table was first indexed.
 | |
| And for most queries, the difference between 1 microsecond and
 | |
| 2 microseconds is unlikely to be noticed.
 | |
| </p>
 | |
| 
 | |
| <a name="or_in_where"></a>
 | |
| 
 | |
| <h2 id="_or_connected_terms_in_the_where_clause"><span>1.8. </span> OR-Connected Terms In The WHERE Clause</h2>
 | |
| 
 | |
| <p>
 | |
| Multi-column indices only work if the constraint terms in the WHERE
 | |
| clause of the query are connected by AND.
 | |
| So Idx3 and Idx4 are helpful when the search is for items that
 | |
| are both Oranges and grown in California, but neither index would
 | |
| be that useful if we wanted all items that were either oranges
 | |
| <i>or</i> are grown in California.
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM FruitsForSale WHERE fruit='Orange' OR state='CA';
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| When confronted with OR-connected terms in a WHERE clause, SQLite 
 | |
| examines each OR term separately and tries to use an index to
 | |
| find the rowids associated with each term.
 | |
| It then takes the union of the resulting rowid sets to find
 | |
| the end result.  The following figure illustrates this process:
 | |
| </p>
 | |
| 
 | |
| <a name='fig15'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/orquery.gif" alt="figure 15"><br>
 | |
| Figure 15: Query With OR Constraints
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| The diagram above implies that SQLite computes all of the rowids first
 | |
| and then combines them with a union operation before starting to do
 | |
| rowid lookups on the original table.  In reality, the rowid lookups
 | |
| are interspersed with rowid computations.  SQLite uses one index at
 | |
| a time to find rowids while remembering which rowids it has seen
 | |
| before so as to avoid duplicates.  That is just an implementation
 | |
| detail, though.  The diagram, while not 100% accurate, provides a good
 | |
| overview of what is happening.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| In order for the OR-by-UNION technique shown above to be useful, there
 | |
| must be an index available that helps resolve every OR-connected term
 | |
| in the WHERE clause.  If even a single OR-connected term is not indexed,
 | |
| then a full table scan would have to be done in order to find the rowids
 | |
| generated by the one term, and if SQLite has to do a full table scan, it
 | |
| might as well do it on the original table and get all of the results in
 | |
| a single pass without having to mess with union operations and follow-on
 | |
| binary searches.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| One can see how the OR-by-UNION technique could also be leveraged to
 | |
| use multiple indices on queries where the WHERE clause has terms connected
 | |
| by AND, by using an intersect operator in place of union.  Many SQL
 | |
| database engines will do just that.  But the performance gain over using
 | |
| just a single index is slight and so SQLite does not implement that technique
 | |
| at this time.  However, a future version SQLite might be enhanced to support
 | |
| AND-by-INTERSECT.
 | |
| </p>
 | |
| 
 | |
| <a name="sorting"></a>
 | |
| 
 | |
| <h1 id="_sorting"><span>2. </span> Sorting</h1>
 | |
| 
 | |
| <p>
 | |
| SQLite (like all other SQL database engines) can also use indices to
 | |
| satisfy the ORDER BY clauses in a query, in addition to expediting
 | |
| lookup.  In other words, indices can be used to speed up sorting as
 | |
| well as searching.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| When no appropriate indices are available, a query with an ORDER BY
 | |
| clause must be sorted as a separate step.  Consider this query:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT * FROM fruitsforsale ORDER BY fruit;
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| SQLite processes this by gathering all the output of query and then
 | |
| running that output through a sorter.
 | |
| </p>
 | |
| 
 | |
| <a name='fig16'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/obfruitnoidx.gif" alt="figure 16"><br>
 | |
| Figure 16: Sorting Without An Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| If the number of output rows is K, then the time needed to sort is
 | |
| proportional to KlogK.  If K is small, the sorting time is usually
 | |
| not a factor, but in a query such as the above where K==N, the time
 | |
| needed to sort can be much greater than the time needed to do a
 | |
| full table scan.  Furthermore, the entire output is accumulated in
 | |
| temporary storage (which might be either in main memory or on disk,
 | |
| depending on various compile-time and run-time settings)
 | |
| which can mean that a lot of temporary storage is required to complete
 | |
| the query.
 | |
| </p>
 | |
| 
 | |
| <h2 id="_sorting_by_rowid"><span>2.1. </span> Sorting By Rowid</h2>
 | |
| 
 | |
| <p>
 | |
| Because sorting can be expensive, SQLite works hard to convert ORDER BY
 | |
| clauses into no-ops.  If SQLite determines that output will
 | |
| naturally appear in the order specified, then no sorting is done.
 | |
| So, for example, if you request the output in rowid order, no sorting
 | |
| will be done:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT * FROM fruitsforsale ORDER BY rowid;
 | |
| </pre></table></center>
 | |
| <a name='fig17'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/obrowid.gif" alt="figure 17"><br>
 | |
| Figure 17: Sorting By Rowid
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| You can also request a reverse-order sort like this:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT * FROM fruitsforsale ORDER BY rowid DESC;
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| SQLite will still omit the sorting step.  But in order for output to
 | |
| appear in the correct order, SQLite will do the table scan starting at
 | |
| the end and working toward the beginning, rather than starting at the
 | |
| beginning and working toward the end as shown in 
 | |
| <a href="#fig17">figure 17</a>.
 | |
| </p>
 | |
| 
 | |
| <h2 id="_sorting_by_index"><span>2.2. </span> Sorting By Index</h2>
 | |
| 
 | |
| <p>
 | |
| Of course, ordering the output of a query by rowid is seldom useful.
 | |
| Usually one wants to order the output by some other column.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| If an index is available on the ORDER BY column, that index can be used
 | |
| for sorting.  Consider the request for all items sorted by "fruit":
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT * FROM fruitsforsale ORDER BY fruit;
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <a name='fig18'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/obfruitidx1.gif" alt="figure 18"><br>
 | |
| Figure 18: Sorting With An Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| The Idx1 index is scanned from top to bottom (or from bottom to top if
 | |
| "ORDER BY fruit DESC" is used) in order to find the rowids for each item
 | |
| in order by fruit.  Then for each rowid, a binary search is done to lookup
 | |
| and output that row.  In this way, the output appears in the requested order
 | |
| without the need to gather the entire output and sort it using a separate step.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| But does this really save time?  The number of steps in the 
 | |
| <a href="#fig16">original indexless sort</a> is proportional to NlogN since
 | |
| that is how much time it takes to sort N rows.  But when we use Idx1 as
 | |
| shown here, we have to do N rowid lookups which take logN time each, so
 | |
| the total time of NlogN is the same!
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| SQLite uses a cost-based query planner.  When there are two or more ways
 | |
| of solving the same query, SQLite tries to estimate the total amount of
 | |
| time needed to run the query using each plan, and then uses the plan with
 | |
| the lowest estimated cost.  A cost is computed mostly from the estimated
 | |
| time, and so this case could go either way depending on the table size and
 | |
| what WHERE clause constraints were available, and so forth.  But generally
 | |
| speaking, the indexed sort would probably be chosen, if for no other
 | |
| reason, because it does not need to accumulate the entire result set in
 | |
| temporary storage before sorting and thus uses much less temporary storage.
 | |
| </p>
 | |
| 
 | |
| <h2 id="_sorting_by_covering_index"><span>2.3. </span> Sorting By Covering Index</h2>
 | |
| 
 | |
| <p>
 | |
| If a covering index can be used for a query, then the multiple rowid lookups
 | |
| can be avoided and the cost of the query drops dramatically.
 | |
| </p>
 | |
| 
 | |
| <a name='fig19'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/obfruitidx4.gif" alt="figure 19"><br>
 | |
| Figure 19: Sorting With A Covering Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| With a covering index, SQLite can simply walk the index from one end to the
 | |
| other and deliver the output in time proportional to N and without having
 | |
| allocate a large buffer to hold the result set.
 | |
| </p>
 | |
| 
 | |
| <h1 id="_searching_and_sorting_at_the_same_time"><span>3. </span> Searching And Sorting At The Same Time</h1>
 | |
| 
 | |
| <p>
 | |
| The previous discussion has treated searching and sorting as separate
 | |
| topics.  But in practice, it is often the case that one wants to search
 | |
| and sort at the same time.  Fortunately, it is possible to do this
 | |
| using a single index.
 | |
| </p>
 | |
| 
 | |
| <h2 id="_searching_and_sorting_with_a_multi_column_index"><span>3.1. </span> Searching And Sorting With A Multi-Column Index</h2>
 | |
| 
 | |
| <p>
 | |
| Suppose we want to find the prices of all kinds of oranges sorted in
 | |
| order of the state where they are grown.  The query is this:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT price FROM fruitforsale WHERE fruit='Orange' ORDER BY state
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| The query contains both a search restriction in the WHERE clause
 | |
| and a sort order in the ORDER BY clause.  Both the search and the sort
 | |
| can be accomplished at the same time using the two-column index Idx3.
 | |
| </p>
 | |
| 
 | |
| <a name='fig20'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/fruitobstate0.gif" alt="figure 20"><br>
 | |
| Figure 20: Search And Sort By Multi-Column Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| The query does a binary search on the index to find the subset of rows
 | |
| that have fruit='Orange'.  (Because the fruit column is the left-most column
 | |
| of the index and the rows of the index are in sorted order, all such 
 | |
| rows will be adjacent.)  Then it scans the matching index rows from top to
 | |
| bottom to get the rowids for the original table, and for each rowid does
 | |
| a binary search on the original table to find the price.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| You will notice that there is no "sort" box anywhere in the above diagram.
 | |
| The ORDER BY clause of the query has become a no-op.  No sorting has to be
 | |
| done here because the output order is by the state column and the state
 | |
| column also happens to be the first column after the fruit column in the
 | |
| index.  So, if we scan entries of the index that have the same value for
 | |
| the fruit column from top to bottom, those index entries are guaranteed to
 | |
| be ordered by the state column.
 | |
| </p>
 | |
| 
 | |
| <a name="srchsortcovidx"></a>
 | |
| 
 | |
| <h2 id="_searching_and_sorting_with_a_covering_index"><span>3.2. </span> Searching And Sorting With A Covering Index</h2>
 | |
| 
 | |
| <p>
 | |
| A <a href="queryplanner.html#covidx">covering index</a> can also be used to search and sort at the same time.
 | |
| Consider the following:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT * FROM fruitforsale WHERE fruit='Orange' ORDER BY state
 | |
| </pre></table></center>
 | |
| <a name='fig21'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/fruitobstate.gif" alt="figure 21"><br>
 | |
| Figure 21: Search And Sort By Covering Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| As before, SQLite does single binary search
 | |
| for the range of rows in the covering
 | |
| index that satisfy the WHERE clause, the scans that range from top to 
 | |
| bottom to get the desired results.  
 | |
| The rows that satisfy the WHERE clause are guaranteed to be adjacent
 | |
| since the WHERE clause is an equality constraint on the left-most
 | |
| column of the index.  And by scanning the matching index rows from
 | |
| top to bottom, the output is guaranteed to be ordered by state since the
 | |
| state column is the very next column to the right of the fruit column.
 | |
| And so the resulting query is very efficient.
 | |
| </p>
 | |
| 
 | |
| <p>
 | |
| SQLite can pull a similar trick for a descending ORDER BY:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT * FROM fruitforsale WHERE fruit='Orange' ORDER BY state DESC
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| The same basic algorithm is followed, except this time the matching rows
 | |
| of the index are scanned from bottom to top instead of from top to bottom,
 | |
| so that the states will appear in descending order.
 | |
| </p>
 | |
| 
 | |
| <a name="partialsort"></a>
 | |
| 
 | |
| <h2 id="_partial_sorting_using_an_index_a_k_a_block_sorting_"><span>3.3. </span> Partial Sorting Using An Index (a.k.a. Block Sorting)</h2>
 | |
| 
 | |
| <p>
 | |
| Sometimes only part of an ORDER BY clause can be satisfied using indexes.
 | |
| Consider, for example, the following query:
 | |
| </p>
 | |
| 
 | |
| <center><table><tr><td><pre>
 | |
| SELECT * FROM fruitforsale ORDER BY fruit, price
 | |
| </pre></table></center>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| If the covering index is used for the scan, the "fruit" column will appear
 | |
| naturally in the correct order, but when there are two or more rows with
 | |
| the same fruit, the price might be out of order.  When this occurs, SQLite
 | |
| does many small sorts, one sort for each distinct value of fruit, rather
 | |
| than one large sort.  Figure 22 below illustrates the concept.
 | |
| </p>
 | |
| 
 | |
| <a name='fig22'></a>
 | |
| <p><center>
 | |
| <img src="images/qp/partial-sort.gif" alt="figure 22"><br>
 | |
| Figure 22: Partial Sort By Index
 | |
| </center></p>
 | |
| 
 | |
| 
 | |
| <p>
 | |
| In the example, instead of a single sort of 7 elements, there
 | |
| are 5 sorts of one-element each and 1 sort of 2 elements for the
 | |
| case of fruit=='Orange'.
 | |
| 
 | |
| </p><p>
 | |
| The advantages of doing many smaller sorts instead of a single large sort
 | |
| are:
 | |
| </p><ol>
 | |
| <li>Multiple small sorts collectively use fewer CPU cycles than a single
 | |
|     large sort.
 | |
| </li><li>Each small sort is run independently, meaning that much less information
 | |
|     needs to be kept in temporary storage at any one time.
 | |
| </li><li>Those columns of the ORDER BY that are already in the correct order
 | |
|     due to indexes can be omitted from the sort key, further reducing
 | |
|     storage requirements and CPU time.
 | |
| </li><li>Output rows can be returned to the application as each small sort
 | |
|     completes, and well before the table scan is complete.
 | |
| </li><li>If a LIMIT clause is present, it might be possible to avoid scanning
 | |
|     the entire table.
 | |
| </li></ol>
 | |
| 
 | |
| <p>Because of these advantages, SQLite always tries to do a partial sort using an
 | |
| index even if a complete sort by index is not possible.</p>
 | |
| 
 | |
| <h1 id="_without_rowid_tables"><span>4. </span> WITHOUT ROWID tables</h1>
 | |
| 
 | |
| <p>
 | |
| The basic principals described above apply to both ordinary rowid tables
 | |
| and <a href="withoutrowid.html">WITHOUT ROWID</a> tables.
 | |
| The only difference is that the rowid column that serves as the key for
 | |
| tables and that appears as the right-most term in indexes is replaced by
 | |
| the PRIMARY KEY.
 | |
| </p>
 | |
| 
 |