1563 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			1563 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
<!DOCTYPE html>
 | 
						|
<html><head>
 | 
						|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
						|
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
 | 
						|
<link href="sqlite.css" rel="stylesheet">
 | 
						|
<title>The SQLite Query Optimizer Overview</title>
 | 
						|
<!-- path= -->
 | 
						|
</head>
 | 
						|
<body>
 | 
						|
<div class=nosearch>
 | 
						|
<a href="index.html">
 | 
						|
<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
 | 
						|
</a>
 | 
						|
<div><!-- IE hack to prevent disappearing logo --></div>
 | 
						|
<div class="tagline desktoponly">
 | 
						|
Small. Fast. Reliable.<br>Choose any three.
 | 
						|
</div>
 | 
						|
<div class="menu mainmenu">
 | 
						|
<ul>
 | 
						|
<li><a href="index.html">Home</a>
 | 
						|
<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
 | 
						|
<li class='wideonly'><a href='about.html'>About</a>
 | 
						|
<li class='desktoponly'><a href="docs.html">Documentation</a>
 | 
						|
<li class='desktoponly'><a href="download.html">Download</a>
 | 
						|
<li class='wideonly'><a href='copyright.html'>License</a>
 | 
						|
<li class='desktoponly'><a href="support.html">Support</a>
 | 
						|
<li class='desktoponly'><a href="prosupport.html">Purchase</a>
 | 
						|
<li class='search' id='search_menubutton'>
 | 
						|
<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
 | 
						|
</ul>
 | 
						|
</div>
 | 
						|
<div class="menu submenu" id="submenu">
 | 
						|
<ul>
 | 
						|
<li><a href='about.html'>About</a>
 | 
						|
<li><a href='docs.html'>Documentation</a>
 | 
						|
<li><a href='download.html'>Download</a>
 | 
						|
<li><a href='support.html'>Support</a>
 | 
						|
<li><a href='prosupport.html'>Purchase</a>
 | 
						|
</ul>
 | 
						|
</div>
 | 
						|
<div class="searchmenu" id="searchmenu">
 | 
						|
<form method="GET" action="search">
 | 
						|
<select name="s" id="searchtype">
 | 
						|
<option value="d">Search Documentation</option>
 | 
						|
<option value="c">Search Changelog</option>
 | 
						|
</select>
 | 
						|
<input type="text" name="q" id="searchbox" value="">
 | 
						|
<input type="submit" value="Go">
 | 
						|
</form>
 | 
						|
</div>
 | 
						|
</div>
 | 
						|
<script>
 | 
						|
function toggle_div(nm) {
 | 
						|
var w = document.getElementById(nm);
 | 
						|
if( w.style.display=="block" ){
 | 
						|
w.style.display = "none";
 | 
						|
}else{
 | 
						|
w.style.display = "block";
 | 
						|
}
 | 
						|
}
 | 
						|
function toggle_search() {
 | 
						|
var w = document.getElementById("searchmenu");
 | 
						|
if( w.style.display=="block" ){
 | 
						|
w.style.display = "none";
 | 
						|
} else {
 | 
						|
w.style.display = "block";
 | 
						|
setTimeout(function(){
 | 
						|
document.getElementById("searchbox").focus()
 | 
						|
}, 30);
 | 
						|
}
 | 
						|
}
 | 
						|
function div_off(nm){document.getElementById(nm).style.display="none";}
 | 
						|
window.onbeforeunload = function(e){div_off("submenu");}
 | 
						|
/* Disable the Search feature if we are not operating from CGI, since */
 | 
						|
/* Search is accomplished using CGI and will not work without it. */
 | 
						|
if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
 | 
						|
document.getElementById("search_menubutton").style.display = "none";
 | 
						|
}
 | 
						|
/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
 | 
						|
function hideorshow(btn,obj){
 | 
						|
var x = document.getElementById(obj);
 | 
						|
var b = document.getElementById(btn);
 | 
						|
if( x.style.display!='none' ){
 | 
						|
x.style.display = 'none';
 | 
						|
b.innerHTML='show';
 | 
						|
}else{
 | 
						|
x.style.display = '';
 | 
						|
b.innerHTML='hide';
 | 
						|
}
 | 
						|
return false;
 | 
						|
}
 | 
						|
</script>
 | 
						|
</div>
 | 
						|
<div class=fancy>
 | 
						|
<div class=nosearch>
 | 
						|
<div class="fancy_title">
 | 
						|
The SQLite Query Optimizer Overview
 | 
						|
</div>
 | 
						|
<div class="fancy_toc">
 | 
						|
<a onclick="toggle_toc()">
 | 
						|
<span class="fancy_toc_mark" id="toc_mk">►</span>
 | 
						|
Table Of Contents
 | 
						|
</a>
 | 
						|
<div id="toc_sub"><div class="fancy-toc1"><a href="#introduction">1. Introduction</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#where_clause_analysis">2. WHERE clause analysis</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#index_term_usage_examples">2.1. Index term usage examples</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#the_between_optimization">3. The BETWEEN optimization</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#or_optimizations">4. OR optimizations</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#the_like_optimization">5. The LIKE optimization</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#the_skip_scan_optimization">6. The Skip-Scan Optimization</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#joins">7. Joins</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#order_of_tables_in_a_join">7.1. Order of tables in a join</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#manual_control_of_query_plans_using_sqlite_stat_tables">7.2. Manual Control Of Query Plans Using SQLITE_STAT Tables</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#manual_control_of_query_plans_using_cross_join">7.3. Manual Control Of Query Plans Using CROSS JOIN</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#choosing_between_multiple_indices">8. Choosing between multiple indices</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#disqualifying_where_clause_terms_using_unary_">8.1. Disqualifying WHERE Clause Terms Using Unary-"+"</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#range_queries">8.2. Range Queries</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#covering_indices">9. Covering Indices</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#order_by_optimizations">10. ORDER BY optimizations</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#partial_order_by_via_index">10.1. Partial ORDER BY Via Index</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#subquery_flattening">11. Subquery flattening</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#subquery_co_routines">12. Subquery Co-routines</a></div>
 | 
						|
<div class="fancy-toc2"><a href="#using_co_routines_to_defer_work_until_after_the_sorting">12.1. Using Co-routines To Defer Work Until After The Sorting</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#the_min_max_optimization">13. The MIN/MAX optimization</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#automatic_indexes">14. Automatic Indexes</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#the_push_down_optimization">15. The Push-Down Optimization</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#the_left_join_strength_reduction_optimization">16. The LEFT JOIN Strength Reduction Optimization</a></div>
 | 
						|
<div class="fancy-toc1"><a href="#the_omit_left_join_optimization">17. The Omit LEFT JOIN Optimization</a></div>
 | 
						|
</div>
 | 
						|
</div>
 | 
						|
<script>
 | 
						|
function toggle_toc(){
 | 
						|
var sub = document.getElementById("toc_sub")
 | 
						|
var mk = document.getElementById("toc_mk")
 | 
						|
if( sub.style.display!="block" ){
 | 
						|
sub.style.display = "block";
 | 
						|
mk.innerHTML = "▼";
 | 
						|
} else {
 | 
						|
sub.style.display = "none";
 | 
						|
mk.innerHTML = "►";
 | 
						|
}
 | 
						|
}
 | 
						|
</script>
 | 
						|
</div>
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
<h1 id="introduction"><span>1. </span>Introduction</h1>
 | 
						|
<p>
 | 
						|
  This document provides an overview of how the query planner and optimizer
 | 
						|
  for SQLite works.
 | 
						|
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Given a single SQL statement, there might be dozens, hundreds, or even
 | 
						|
  thousands of ways to implement that statement, depending on the complexity
 | 
						|
  of the statement itself and of the underlying database schema.  The 
 | 
						|
  task of the query planner is to select an algorithm from among the many
 | 
						|
  choices that provides the answer with a minimum of disk I/O and CPU
 | 
						|
  overhead.
 | 
						|
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Additional background information is available in the
 | 
						|
  <a href="queryplanner.html">indexing tutorial</a> document.
 | 
						|
 | 
						|
 | 
						|
</p><p>
 | 
						|
  With release 3.8.0 (2013-08-26),
 | 
						|
  the SQLite query planner was reimplemented as the
 | 
						|
  <a href="queryplanner-ng.html">Next Generation Query Planner</a> or "NGQP".  All of the features, techniques,
 | 
						|
  and algorithms described in this document are applicable to both the
 | 
						|
  pre-3.8.0 legacy query planner and to the NGQP.  For further information on
 | 
						|
  how the NGQP differs from the legacy query planner, see the 
 | 
						|
  <a href="queryplanner-ng.html">detailed description of the NGQP</a>.
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
<a name="where_clause"></a>
 | 
						|
 | 
						|
</p><h1 id="where_clause_analysis"><span>2. </span>WHERE clause analysis</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  The WHERE clause on a query is broken up into "terms" where each term
 | 
						|
  is separated from the others by an AND operator.
 | 
						|
  If the WHERE clause is composed of constraints separate by the OR
 | 
						|
  operator then the entire clause is considered to be a single "term"
 | 
						|
  to which the <a href="#or_opt">OR-clause optimization</a> is applied.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  All terms of the WHERE clause are analyzed to see if they can be
 | 
						|
  satisfied using indices.
 | 
						|
  To be usable by an index a term must be of one of the following
 | 
						|
  forms:
 | 
						|
 | 
						|
<blockquote><pre><b>
 | 
						|
  </b><i>column</i><b> = </b><i>expression</i><b>
 | 
						|
  </b><i>column</i><b> IS </b><i>expression</i><b>
 | 
						|
  </b><i>column</i><b> > </b><i>expression</i><b>
 | 
						|
  </b><i>column</i><b> >= </b><i>expression</i><b>
 | 
						|
  </b><i>column</i><b> < </b><i>expression</i><b>
 | 
						|
  </b><i>column</i><b> <= </b><i>expression</i><b>
 | 
						|
  </b><i>expression</i><b> = </b><i>column</i><b>
 | 
						|
  </b><i>expression</i><b> > </b><i>column</i><b>
 | 
						|
  </b><i>expression</i><b> >= </b><i>column</i><b>
 | 
						|
  </b><i>expression</i><b> < </b><i>column</i><b>
 | 
						|
  </b><i>expression</i><b> <= </b><i>column</i><b>
 | 
						|
  </b><i>column</i><b> IN (</b><i>expression-list</i><b>)
 | 
						|
  </b><i>column</i><b> IN (</b><i>subquery</i><b>)
 | 
						|
  </b><i>column</i><b> IS NULL
 | 
						|
</b></pre></blockquote>
 | 
						|
 | 
						|
</p><p>
 | 
						|
  If an index is created using a statement like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE INDEX idx_ex1 ON ex1(a,b,c,d,e,...,y,z);
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
  Then the index might be used if the initial columns of the index
 | 
						|
  (columns a, b, and so forth) appear in WHERE clause terms.
 | 
						|
  The initial columns of the index must be used with
 | 
						|
  the *=* or *IN* or *IS* operators.  
 | 
						|
  The right-most column that is used can employ inequalities.  
 | 
						|
  For the right-most
 | 
						|
  column of an index that is used, there can be up to two inequalities
 | 
						|
  that must sandwich the allowed values of the column between two extremes.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  It is not necessary for every column of an index to appear in a
 | 
						|
  WHERE clause term in order for that index to be used. 
 | 
						|
  But there cannot be gaps in the columns of the index that are used.
 | 
						|
  Thus for the example index above, if there is no WHERE clause term
 | 
						|
  that constraints column c, then terms that constrain columns a and b can
 | 
						|
  be used with the index but not terms that constraint columns d through z.
 | 
						|
  Similarly, index columns will not normally be used (for indexing purposes)
 | 
						|
  if they are to the right of a 
 | 
						|
  column that is constrained only by inequalities.
 | 
						|
  (See the <a href="optoverview.html#skipscan">skip-scan optimization</a> below for the exception.)
 | 
						|
 | 
						|
</p><p>
 | 
						|
  In the case of <a href="expridx.html">indexes on expressions</a>, whenever the word "column" is
 | 
						|
  used in the foregoing text, one can substitute "indexed expression"
 | 
						|
  (meaning a copy of the expression that appears in the <a href="lang_createindex.html">CREATE INDEX</a>
 | 
						|
  statement) and everything will work the same.
 | 
						|
 | 
						|
<a name="idxexamp"></a>
 | 
						|
 | 
						|
</p><h2 id="index_term_usage_examples"><span>2.1. </span>Index term usage examples</h2>
 | 
						|
<p>
 | 
						|
  For the index above and WHERE clause like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND c IS NULL AND d='hello'
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  The first four columns a, b, c, and d of the index would be usable since
 | 
						|
  those four columns form a prefix of the index and are all bound by
 | 
						|
  equality constraints.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  For the index above and WHERE clause like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND c>12 AND d='hello'
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  Only columns a, b, and c of the index would be usable.  The d column
 | 
						|
  would not be usable because it occurs to the right of c and c is
 | 
						|
  constrained only by inequalities.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  For the index above and WHERE clause like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND d='hello'
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  Only columns a and b of the index would be usable.  The d column
 | 
						|
  would not be usable because column c is not constrained and there can
 | 
						|
  be no gaps in the set of columns that usable by the index.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  For the index above and WHERE clause like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>... WHERE b IN (1,2,3) AND c NOT NULL AND d='hello'
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  The index is not usable at all because the left-most column of the
 | 
						|
  index (column "a") is not constrained.  Assuming there are no other
 | 
						|
  indices, the query above would result in a full table scan.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  For the index above and WHERE clause like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>... WHERE a=5 OR b IN (1,2,3) OR c NOT NULL OR d='hello'
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  The index is not usable because the WHERE clause terms are connected
 | 
						|
  by OR instead of AND. This query would result in a full table scan.
 | 
						|
  However, if three additional indices where added that contained columns
 | 
						|
  b, c, and d as their left-most columns, then the
 | 
						|
  <a href="#or_opt">OR-clause optimization</a> might apply.
 | 
						|
 | 
						|
 | 
						|
<a name="between_opt"></a>
 | 
						|
 | 
						|
</p><h1 id="the_between_optimization"><span>3. </span>The BETWEEN optimization</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  If a term of the WHERE clause is of the following form:
 | 
						|
 | 
						|
<blockquote><pre><b>
 | 
						|
  </b><i>expr1</i><b> BETWEEN </b><i>expr2</i><b> AND </b><i>expr3</i><b>
 | 
						|
</b></pre></blockquote>
 | 
						|
</p><p>
 | 
						|
  Then two "virtual" terms are added as follows:
 | 
						|
 | 
						|
<blockquote><pre><b>
 | 
						|
  </b><i>expr1</i><b> >= </b><i>expr2</i><b> AND </b><i>expr1</i><b> <= </b><i>expr3</i><b>
 | 
						|
</b></pre></blockquote>
 | 
						|
</p><p>
 | 
						|
  Virtual terms are used for analysis only and do not cause any byte-code
 | 
						|
  to be generated.
 | 
						|
  If both virtual terms end up being used as constraints on an index,
 | 
						|
  then the original BETWEEN term is omitted and the corresponding test
 | 
						|
  is not performed on input rows.
 | 
						|
  Thus if the BETWEEN term ends up being used as an index constraint
 | 
						|
  no tests are ever performed on that term.
 | 
						|
  On the other hand, the
 | 
						|
  virtual terms themselves never causes tests to be performed on
 | 
						|
  input rows.
 | 
						|
  Thus if the BETWEEN term is not used as an index constraint and
 | 
						|
  instead must be used to test input rows, the <i>expr1</i> expression is
 | 
						|
  only evaluated once.
 | 
						|
 | 
						|
<a name="or_opt"></a>
 | 
						|
 | 
						|
</p><h1 id="or_optimizations"><span>4. </span>OR optimizations</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  WHERE clause constraints that are connected by OR instead of AND can
 | 
						|
  be handled in two different ways.
 | 
						|
  If a term consists of multiple subterms containing a common column
 | 
						|
  name and separated by OR, like this:
 | 
						|
 | 
						|
<blockquote><pre><b>
 | 
						|
  </b><i>column</i><b> = </b><i>expr1</i><b> OR </b><i>column</i><b> = </b><i>expr2</i><b> OR </b><i>column</i><b> = </b><i>expr3</i><b> OR ...
 | 
						|
</b></pre></blockquote>
 | 
						|
</p><p>
 | 
						|
  Then that term is rewritten as follows:
 | 
						|
 | 
						|
<blockquote><pre><b>
 | 
						|
  </b><i>column</i><b> IN (</b><i>expr1</i><b>,</b><i>expr2</i><b>,</b><i>expr3</i><b>,...)
 | 
						|
</b></pre></blockquote>
 | 
						|
</p><p>
 | 
						|
  The rewritten term then might go on to constrain an index using the
 | 
						|
  normal rules for *IN* operators.  Note that <i>column</i> must be
 | 
						|
  the same column in every OR-connected subterm,
 | 
						|
  although the column can occur on either the left or the right side of
 | 
						|
  the *=* operator.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  If and only if the previously described conversion of OR to an IN operator
 | 
						|
  does not work, the second OR-clause optimization is attempted.
 | 
						|
  Suppose the OR clause consists of multiple subterms as follows:
 | 
						|
 | 
						|
<blockquote><pre><b>
 | 
						|
  </b><i>expr1</i><b> OR </b><i>expr2</i><b> OR </b><i>expr3</i><b>
 | 
						|
</b></pre></blockquote>
 | 
						|
</p><p>
 | 
						|
  Individual subterms might be a single comparison expression like
 | 
						|
  *a=5* or *x>y* or they can be LIKE or BETWEEN expressions, or a subterm
 | 
						|
  can be a parenthesized list of AND-connected sub-subterms.
 | 
						|
  Each subterm is analyzed as if it were itself the entire WHERE clause
 | 
						|
  in order to see if the subterm is indexable by itself.
 | 
						|
  If <u>every</u> subterm of an OR clause is separately indexable
 | 
						|
  then the OR clause might be coded such that a separate index is used
 | 
						|
  to evaluate each term of the OR clause.  One way to think about how
 | 
						|
  SQLite uses separate indices for each OR clause term is to imagine
 | 
						|
  that the WHERE clause where rewritten as follows:
 | 
						|
 | 
						|
<blockquote><pre><b>
 | 
						|
  rowid IN (SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr1</i><b>
 | 
						|
            UNION SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr2</i><b>
 | 
						|
            UNION SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr3</i><b>)
 | 
						|
</b></pre></blockquote>
 | 
						|
</p><p>
 | 
						|
  The rewritten expression above is conceptual; WHERE clauses containing
 | 
						|
  OR are not really rewritten this way.
 | 
						|
  The actual implementation of the OR clause uses a mechanism that is
 | 
						|
  more efficient and that works even for <a href="withoutrowid.html">WITHOUT ROWID</a> tables or 
 | 
						|
  tables in which the "rowid" is inaccessible.
 | 
						|
  But the essence of the implementation is captured by the statement
 | 
						|
  above:  Separate indices are used to find candidate result rows
 | 
						|
  from each OR clause term and the final result is the union of
 | 
						|
  those rows.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Note that in most cases, SQLite will only use a single index for each
 | 
						|
  table in the FROM clause of a query.  The second OR-clause optimization
 | 
						|
  described here is the exception to that rule.  With an OR-clause,
 | 
						|
  a different index might be used for each subterm in the OR-clause.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  For any given query, the fact that the OR-clause optimization described
 | 
						|
  here can be used does not guarantee that it will be used.
 | 
						|
  SQLite uses a cost-based query planner that estimates the CPU and
 | 
						|
  disk I/O costs of various competing query plans and chooses the plan
 | 
						|
  that it thinks will be the fastest.  If there are many OR terms in
 | 
						|
  the WHERE clause or if some of the indices on individual OR-clause 
 | 
						|
  subterms are not very selective, then SQLite might decide that it is
 | 
						|
  faster to use a different query algorithm, or even a full-table scan.
 | 
						|
  Application developers can use the
 | 
						|
  <a href="lang_explain.html">EXPLAIN QUERY PLAN</a> prefix on a statement to get a
 | 
						|
  high-level overview of the chosen query strategy.
 | 
						|
 | 
						|
<a name="like_opt"></a>
 | 
						|
 | 
						|
</p><h1 id="the_like_optimization"><span>5. </span>The LIKE optimization</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  A WHERE-clause term that uses the <a href="lang_expr.html#like">LIKE</a> or <a href="lang_expr.html#glob">GLOB</a> operator
 | 
						|
  can sometimes be used with an index to do a range search, 
 | 
						|
  almost as if the LIKE or GLOB were an alternative to a <a href="lang_expr.html#between">BETWEEN</a>
 | 
						|
  operator.
 | 
						|
  There are many conditions on this optimization:
 | 
						|
 | 
						|
</p><p>
 | 
						|
  </p><ol>
 | 
						|
  <li>The right-hand side of the LIKE or GLOB must be either a string literal
 | 
						|
      or a <a href="lang_expr.html#varparam">parameter</a> bound to a string literal
 | 
						|
      that does not begin with a wildcard character.</li>
 | 
						|
  <li>It must not be possible to make the LIKE or GLOB operator true by
 | 
						|
      having a numeric value (instead of a string or blob) on the
 | 
						|
      left-hand side. This means that either:
 | 
						|
      <ol type="A">
 | 
						|
      <li> the left-hand side of the LIKE or GLOB operator is the name
 | 
						|
           of an indexed column with <a href="datatype3.html#affinity">TEXT affinity</a>, or</li>
 | 
						|
      <li> the right-hand side pattern argument does not begin with a
 | 
						|
           minus sign ("-") or a digit.</li>
 | 
						|
      </ol>
 | 
						|
      This constraint arises from the fact that numbers do not sort in
 | 
						|
      lexicographical order.  For example: 9<10 but '9'>'10'.</li>
 | 
						|
  <li>The built-in functions used to implement LIKE and GLOB must not
 | 
						|
      have been overloaded using the <a href="c3ref/create_function.html">sqlite3_create_function()</a> API.</li>
 | 
						|
  <li>For the GLOB operator, the column must be indexed using the 
 | 
						|
      built-in BINARY collating sequence.</li>
 | 
						|
  <li>For the LIKE operator, if <a href="pragma.html#pragma_case_sensitive_like">case_sensitive_like</a> mode is enabled then
 | 
						|
      the column must indexed using BINARY collating sequence, or if
 | 
						|
      <a href="pragma.html#pragma_case_sensitive_like">case_sensitive_like</a> mode is disabled then the column must indexed
 | 
						|
      using built-in NOCASE collating sequence.</li>
 | 
						|
  <li>If the ESCAPE option is used, the ESCAPE character must be ASCII,
 | 
						|
      or a single-byte character in UTF-8.
 | 
						|
  </li></ol>
 | 
						|
 | 
						|
<p>
 | 
						|
  The LIKE operator has two modes that can be set by a
 | 
						|
  <a href="pragma.html#pragma_case_sensitive_like">pragma</a>.  The
 | 
						|
  default mode is for LIKE comparisons to be insensitive to differences
 | 
						|
  of case for latin1 characters.  Thus, by default, the following
 | 
						|
  expression is true:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>'a' LIKE 'A'
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  But if the case_sensitive_like pragma is enabled as follows:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>PRAGMA case_sensitive_like=ON;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  Then the LIKE operator pays attention to case and the example above would
 | 
						|
  evaluate to false.  Note that case insensitivity only applies to
 | 
						|
  latin1 characters - basically the upper and lower case letters of English
 | 
						|
  in the lower 127 byte codes of ASCII.  International character sets
 | 
						|
  are case sensitive in SQLite unless an application-defined
 | 
						|
  <a href="datatype3.html#collation">collating sequence</a> and <a href="lang_corefunc.html#like">like() SQL function</a> are provided that
 | 
						|
  take non-ASCII characters into account.
 | 
						|
  But if an application-defined collating sequence and/or like() SQL
 | 
						|
  function are provided, the LIKE optimization described here will never
 | 
						|
  be taken.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The LIKE operator is case insensitive by default because this is what
 | 
						|
  the SQL standard requires.  You can change the default behavior at
 | 
						|
  compile time by using the <a href="compile.html#case_sensitive_like">SQLITE_CASE_SENSITIVE_LIKE</a> command-line option
 | 
						|
  to the compiler.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The LIKE optimization might occur if the column named on the left of the
 | 
						|
  operator is indexed using the built-in BINARY collating sequence and
 | 
						|
  case_sensitive_like is turned on.  Or the optimization might occur if
 | 
						|
  the column is indexed using the built-in NOCASE collating sequence and the 
 | 
						|
  case_sensitive_like mode is off.  These are the only two combinations
 | 
						|
  under which LIKE operators will be optimized.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The GLOB operator is always case sensitive.  The column on the left side
 | 
						|
  of the GLOB operator must always use the built-in BINARY collating sequence
 | 
						|
  or no attempt will be made to optimize that operator with indices.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The LIKE optimization will only be attempted if
 | 
						|
  the right-hand side of the GLOB or LIKE operator is either
 | 
						|
  literal string or a <a href="lang_expr.html#varparam">parameter</a> that has been <a href="c3ref/bind_blob.html">bound</a>
 | 
						|
  to a string literal.  The string literal must not
 | 
						|
  begin with a wildcard; if the right-hand side begins with a wildcard
 | 
						|
  character then this optimization is not attempted.  If the right-hand side 
 | 
						|
  is a <a href="lang_expr.html#varparam">parameter</a> that is bound to a string, then this optimization is
 | 
						|
  only attempted if the <a href="c3ref/stmt.html">prepared statement</a> containing the expression
 | 
						|
  was compiled with <a href="c3ref/prepare.html">sqlite3_prepare_v2()</a> or <a href="c3ref/prepare.html">sqlite3_prepare16_v2()</a>.
 | 
						|
  The LIKE optimization is not attempted if the
 | 
						|
  right-hand side is a <a href="lang_expr.html#varparam">parameter</a> and the statement was prepared using
 | 
						|
  <a href="c3ref/prepare.html">sqlite3_prepare()</a> or <a href="c3ref/prepare.html">sqlite3_prepare16()</a>.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Suppose the initial sequence of non-wildcard characters on the right-hand
 | 
						|
  side of the LIKE or GLOB operator is <i>x</i>.  We are using a single 
 | 
						|
  character to denote this non-wildcard prefix but the reader should
 | 
						|
  understand that the prefix can consist of more than 1 character.
 | 
						|
  Let <i>y</i> be the smallest string that is the same length as /x/ but which
 | 
						|
  compares greater than <i>x</i>.  For example, if <i>x</i> is *hello* then
 | 
						|
  <i>y</i> would be *hellp*.
 | 
						|
  The LIKE and GLOB optimizations consist of adding two virtual terms
 | 
						|
  like this:
 | 
						|
 | 
						|
<blockquote><pre><b>
 | 
						|
  </b><i>column</i><b> >= </b><i>x</i><b> AND </b><i>column</i><b> < </b><i>y</i><b>
 | 
						|
</b></pre></blockquote>
 | 
						|
</p><p>
 | 
						|
  Under most circumstances, the original LIKE or GLOB operator is still
 | 
						|
  tested against each input row even if the virtual terms are used to
 | 
						|
  constrain an index.  This is because we do not know what additional
 | 
						|
  constraints may be imposed by characters to the right
 | 
						|
  of the <i>x</i> prefix.  However, if there is only a single
 | 
						|
  global wildcard to the right of <i>x</i>, then the original LIKE or 
 | 
						|
  GLOB test is disabled.
 | 
						|
  In other words, if the pattern is like this:
 | 
						|
 | 
						|
<blockquote><pre><b>
 | 
						|
  </b><i>column</i><b> LIKE </b><i>x</i><b>%
 | 
						|
  </b><i>column</i><b> GLOB </b><i>x</i><b>*
 | 
						|
</b></pre></blockquote>
 | 
						|
</p><p>
 | 
						|
  then the original LIKE or GLOB tests are disabled when the virtual
 | 
						|
  terms constrain an index because in that case we know that all of the
 | 
						|
  rows selected by the index will pass the LIKE or GLOB test.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Note that when the right-hand side of a LIKE or GLOB operator is
 | 
						|
  a <a href="lang_expr.html#varparam">parameter</a> and the statement is prepared using <a href="c3ref/prepare.html">sqlite3_prepare_v2()</a>
 | 
						|
  or <a href="c3ref/prepare.html">sqlite3_prepare16_v2()</a> then the statement is automatically reparsed
 | 
						|
  and recompiled on the first <a href="c3ref/step.html">sqlite3_step()</a> call of each run if the binding
 | 
						|
  to the right-hand side parameter has changed since the previous run.
 | 
						|
  This reparse and recompile is essentially the same action that occurs
 | 
						|
  following a schema change.  The recompile is necessary so that the query
 | 
						|
  planner can examine the new value bound to the right-hand side of the
 | 
						|
  LIKE or GLOB operator and determine whether or not to employ the
 | 
						|
  optimization described above.
 | 
						|
 | 
						|
<a name="skipscan"></a>
 | 
						|
 | 
						|
</p><h1 id="the_skip_scan_optimization"><span>6. </span>The Skip-Scan Optimization</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  The general rule is that indexes are only useful if there are 
 | 
						|
  WHERE-clause constraints on the left-most columns of the index.
 | 
						|
  However, in some cases,
 | 
						|
  SQLite is able to use an index even if the first few columns of
 | 
						|
  the index are omitted from the WHERE clause but later columns 
 | 
						|
  are included.
 | 
						|
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Consider a table such as the following:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE TABLE people(
 | 
						|
  name TEXT PRIMARY KEY,
 | 
						|
  role TEXT NOT NULL,
 | 
						|
  height INT NOT NULL, -- in cm
 | 
						|
  CHECK( role IN ('student','teacher') )
 | 
						|
);
 | 
						|
CREATE INDEX people_idx1 ON people(role, height);
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
  The people table has one entry for each person in a large
 | 
						|
  organization.  Each person is either a "student" or a "teacher",
 | 
						|
  as determined by the "role" field.  And we record the height in
 | 
						|
  centimeters of each person.  The role and height are indexed.
 | 
						|
  Notice that the left-most column of the index is not very
 | 
						|
  selective - it only contains two possible values.
 | 
						|
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Now consider a query to find the names of everyone in the
 | 
						|
  organization that is 180cm tall or taller:
 | 
						|
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT name FROM people WHERE height>=180;
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
  Because the left-most column of the index does not appear in the
 | 
						|
  WHERE clause of the query, one is tempted to conclude that the
 | 
						|
  index is not usable here.  But SQLite is able to use the index.
 | 
						|
  Conceptually, SQLite uses the index as if the query were more
 | 
						|
  like the following:
 | 
						|
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT name FROM people
 | 
						|
 WHERE role IN (SELECT DISTINCT role FROM people)
 | 
						|
   AND height>=180;
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
  Or this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT name FROM people WHERE role='teacher' AND height>=180
 | 
						|
UNION ALL
 | 
						|
SELECT name FROM people WHERE role='student' AND height>=180;
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
  The alternative query formulations shown above are conceptual only.
 | 
						|
  SQLite does not really transform the query. 
 | 
						|
  The actual query plan is like this:
 | 
						|
  SQLite locates the first possible value for "role", which it
 | 
						|
  can do by rewinding the "people_idx1" index to the beginning and reading
 | 
						|
  the first record.  SQLite stores this first "role" value in an
 | 
						|
  internal variable that we will here call "$role".  Then SQLite
 | 
						|
  runs a query like: "SELECT name FROM people WHERE role=$role AND height>=180".
 | 
						|
  This query has an equality constraint on the left-most column of the
 | 
						|
  index and so the index can be used to resolve that query.  Once
 | 
						|
  that query is finished, SQLite then uses the "people_idx1" index to
 | 
						|
  locate the next value of the "role" column, using code that is logically
 | 
						|
  similar to "SELECT role FROM people WHERE role>$role LIMIT 1".
 | 
						|
  This new "role" value overwrites the $role variable, and the process
 | 
						|
  repeats until all possible values for "role" have been examined.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  We call this kind of index usage a "skip-scan" because the database
 | 
						|
  engine is basically doing a full scan of the index but it optimizes the
 | 
						|
  scan (making it less than "full") by occasionally skipping ahead to the
 | 
						|
  next candidate value.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  SQLite might use a skip-scan on an index if it knows that the first
 | 
						|
  one or more columns contain many duplication values.
 | 
						|
  If there are too few duplicates
 | 
						|
  in the left-most columns of the index, then it would
 | 
						|
  be faster to simply step ahead to the next value, and thus do
 | 
						|
  a full table scan, than to do a binary search on an index to locate
 | 
						|
  the next left-column value.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The only way that SQLite can know that the left-most columns of an index
 | 
						|
  have many duplicate is if the <a href="lang_analyze.html">ANALYZE</a> command has been run
 | 
						|
  on the database.
 | 
						|
  Without the results of ANALYZE, SQLite has to guess at the "shape" of
 | 
						|
  the data in the table, and the default guess is that there are an average
 | 
						|
  of 10 duplicates for every value in the left-most column of the index.
 | 
						|
  But skip-scan only becomes profitable (it only gets to be faster than
 | 
						|
  a full table scan) when the number of duplicates is about 18 or more.
 | 
						|
  Hence, a skip-scan is never used on a database that has not been analyzed.
 | 
						|
 | 
						|
<a name="joins"></a>
 | 
						|
 | 
						|
</p><h1 id="joins"><span>7. </span>Joins</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  The ON and USING clauses of an inner join are converted into additional
 | 
						|
  terms of the WHERE clause prior to WHERE clause analysis described
 | 
						|
  above in paragraph 1.0.  Thus with SQLite, there is no computational
 | 
						|
  advantage to use the newer SQL92 join syntax
 | 
						|
  over the older SQL89 comma-join syntax.  They both end up accomplishing
 | 
						|
  exactly the same thing on inner joins.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  For a LEFT OUTER JOIN the situation is more complex.  The following
 | 
						|
  two queries are not equivalent:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT * FROM tab1 LEFT JOIN tab2 ON tab1.x=tab2.y;
 | 
						|
SELECT * FROM tab1 LEFT JOIN tab2 WHERE tab1.x=tab2.y;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  For an inner join, the two queries above would be identical.  But
 | 
						|
  special processing applies to the ON and USING clauses of an OUTER join:
 | 
						|
  specifically, the constraints in an ON or USING clause do not apply if
 | 
						|
  the right table of the join is on a null row, but the constraints do apply
 | 
						|
  in the WHERE clause.  The net effect is that putting the ON or USING
 | 
						|
  clause expressions for a LEFT JOIN in the WHERE clause effectively converts
 | 
						|
  the query to an
 | 
						|
  ordinary INNER JOIN - albeit an inner join that runs more slowly.
 | 
						|
 | 
						|
<a name="table_order"></a>
 | 
						|
 | 
						|
</p><h2 id="order_of_tables_in_a_join"><span>7.1. </span>Order of tables in a join</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
  The current implementation of 
 | 
						|
  SQLite uses only loop joins.  That is to say, joins are implemented as
 | 
						|
  nested loops.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The default order of the nested loops in a join is for the left-most
 | 
						|
  table in the FROM clause to form the outer loop and the right-most
 | 
						|
  table to form the inner loop.
 | 
						|
  However, SQLite will nest the loops in a different order if doing so
 | 
						|
  will help it to select better indices.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Inner joins can be freely reordered.  However a left outer join is
 | 
						|
  neither commutative nor associative and hence will not be reordered.
 | 
						|
  Inner joins to the left and right of the outer join might be reordered
 | 
						|
  if the optimizer thinks that is advantageous but the outer joins are
 | 
						|
  always evaluated in the order in which they occur.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  SQLite <a href="lang_select.html#crossjoin">treats the CROSS JOIN operator specially</a>.
 | 
						|
  The CROSS JOIN operator is commutative in theory.  But SQLite chooses to
 | 
						|
  never reorder tables in a CROSS JOIN.  This provides a mechanism
 | 
						|
  by which the programmer can force SQLite to choose a particular loop nesting
 | 
						|
  order.  
 | 
						|
 | 
						|
</p><p>
 | 
						|
  When selecting the order of tables in a join, SQLite uses an efficient
 | 
						|
  polynomial-time algorithm.  Because of this,
 | 
						|
  SQLite is able to plan queries with 50- or 60-way joins in a matter of
 | 
						|
  microseconds
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Join reordering is automatic and usually works well enough that
 | 
						|
  programmers do not have to think about it, especially if <a href="lang_analyze.html">ANALYZE</a>
 | 
						|
  has been used to gather statistics about the available indices.
 | 
						|
  But occasionally some hints from the programmer are needed.
 | 
						|
  Consider, for example, the following schema:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE TABLE node(
 | 
						|
   id INTEGER PRIMARY KEY,
 | 
						|
   name TEXT
 | 
						|
);
 | 
						|
CREATE INDEX node_idx ON node(name);
 | 
						|
CREATE TABLE edge(
 | 
						|
   orig INTEGER REFERENCES node,
 | 
						|
   dest INTEGER REFERENCES node,
 | 
						|
   PRIMARY KEY(orig, dest)
 | 
						|
);
 | 
						|
CREATE INDEX edge_idx ON edge(dest,orig);
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  The schema above defines a directed graph with the ability to store a
 | 
						|
  name at each node. Now consider a query against this schema:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT *
 | 
						|
  FROM edge AS e,
 | 
						|
       node AS n1,
 | 
						|
       node AS n2
 | 
						|
 WHERE n1.name = 'alice'
 | 
						|
   AND n2.name = 'bob'
 | 
						|
   AND e.orig = n1.id
 | 
						|
   AND e.dest = n2.id;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  This query asks for is all information about edges that go from
 | 
						|
  nodes labeled "alice" to nodes labeled "bob".
 | 
						|
  The query optimizer in SQLite has basically two choices on how to
 | 
						|
  implement this query.  (There are actually six different choices, but
 | 
						|
  we will only consider two of them here.)
 | 
						|
  Pseudocode below demonstrating these two choices.
 | 
						|
 | 
						|
<a name="option1"></a>
 | 
						|
 | 
						|
</p><p>Option 1:
 | 
						|
</p><div class="codeblock"><pre>foreach n1 where n1.name='alice' do:
 | 
						|
  foreach n2 where n2.name='bob' do:
 | 
						|
    foreach e where e.orig=n1.id and e.dest=n2.id
 | 
						|
      return n1.*, n2.*, e.*
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 | 
						|
</pre></div>
 | 
						|
<a name="option2"></a>
 | 
						|
 | 
						|
<p>Option 2:
 | 
						|
</p><div class="codeblock"><pre>foreach n1 where n1.name='alice' do:
 | 
						|
  foreach e where e.orig=n1.id do:
 | 
						|
    foreach n2 where n2.id=e.dest and n2.name='bob' do:
 | 
						|
      return n1.*, n2.*, e.*
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  The same indices are used to speed up every loop in both implementation
 | 
						|
  options.
 | 
						|
  The only difference in these two query plans is the order in which
 | 
						|
  the loops are nested.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  So which query plan is better? It turns out that the answer depends on
 | 
						|
  what kind of data is found in the node and edge tables.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Let the number of alice nodes be M and the number of bob nodes be N.
 | 
						|
  Consider two scenarios. In the first scenario, M and N are both 2 but
 | 
						|
  there are thousands of edges on each node. In this case, option 1 is
 | 
						|
  preferred. With option 1, the inner loop checks for the existence of
 | 
						|
  an edge between a pair of nodes and outputs the result if found. 
 | 
						|
  But because there are only 2 alice and bob nodes each, the inner loop
 | 
						|
  only has to run 4 times and the query is very quick. Option 2 would
 | 
						|
  take much longer here. The outer loop of option 2 only executes twice,
 | 
						|
  but because there are a large number of edges leaving each alice node,
 | 
						|
  the middle loop has to iterate many thousands of times. It will be
 | 
						|
  much slower. So in the first scenario, we prefer to use option 1.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Now consider the case where M and N are both 3500. Alice nodes are
 | 
						|
  abundant. But suppose each of these nodes is connected by only one
 | 
						|
  or two edges. In this case, option 2 is preferred. With option 2,
 | 
						|
  the outer loop still has to run 3500 times, but the middle loop only
 | 
						|
  runs once or twice for each outer loop and the inner loop will only
 | 
						|
  run once for each middle loop, if at all. So the total number of
 | 
						|
  iterations of the inner loop is around 7000. Option 1, on the other
 | 
						|
  hand, has to run both its outer loop and its middle loop 3500 times
 | 
						|
  each, resulting in 12 million iterations of the middle loop.
 | 
						|
  Thus in the second scenario, option 2 is nearly 2000 times faster
 | 
						|
  than option 1.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  So you can see that depending on how the data is structured in the table,
 | 
						|
  either query plan 1 or query plan 2 might be better.  Which plan does
 | 
						|
  SQLite choose by default?  As of version 3.6.18, without running <a href="lang_analyze.html">ANALYZE</a>,
 | 
						|
  SQLite will choose option 2.
 | 
						|
  But if the <a href="lang_analyze.html">ANALYZE</a> command is run in order to gather statistics,
 | 
						|
  a different choice might be made if the statistics indicate that the
 | 
						|
  alternative is likely to run faster.
 | 
						|
 | 
						|
<a name="manctrl"></a>
 | 
						|
 | 
						|
</p><h2 id="manual_control_of_query_plans_using_sqlite_stat_tables"><span>7.2. </span>Manual Control Of Query Plans Using SQLITE_STAT Tables</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
  SQLite provides the ability for advanced programmers to exercise control
 | 
						|
  over the query plan chosen by the optimizer. One method for doing this
 | 
						|
  is to fudge the <a href="lang_analyze.html">ANALYZE</a> results in the <a href="fileformat2.html#stat1tab">sqlite_stat1</a>, 
 | 
						|
  <a href="fileformat2.html#stat3tab">sqlite_stat3</a>, and/or <a href="fileformat2.html#stat4tab">sqlite_stat4</a> tables.  That approach is not 
 | 
						|
  recommended except for the one scenario described in the next paragraph.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  For a program that uses an SQLite database as its 
 | 
						|
  <a href="appfileformat.html">application file-format</a>,
 | 
						|
  when a new database instance is first created the <a href="lang_analyze.html">ANALYZE</a>
 | 
						|
  command is ineffective because the database contain no data from which
 | 
						|
  to gather statistics.  In that case, one could construct a large prototype
 | 
						|
  database containing typical data during development and run the 
 | 
						|
  <a href="lang_analyze.html">ANALYZE</a> command on this prototype database to gather statistics,
 | 
						|
  then save the prototype statistics as part of the application.
 | 
						|
  After deployment, when the application goes to create a new database file,
 | 
						|
  it can run the <a href="lang_analyze.html">ANALYZE</a> command in order to create the statistics
 | 
						|
  tables, then copy the precomputed statistics obtained
 | 
						|
  from the prototype database into these new statistics tables.
 | 
						|
  In that way, statistics from large working data sets can be preloaded
 | 
						|
  into newly created application files.
 | 
						|
 | 
						|
<a name="crossjoin"></a>
 | 
						|
 | 
						|
</p><h2 id="manual_control_of_query_plans_using_cross_join"><span>7.3. </span>Manual Control Of Query Plans Using CROSS JOIN</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
  Programmers can force SQLite to use a particular loop nesting order
 | 
						|
  for a join by using the CROSS JOIN operator instead of just JOIN, 
 | 
						|
  INNER JOIN, NATURAL JOIN, or a "," join.  Though CROSS JOINs are
 | 
						|
  commutative in theory, SQLite chooses to never reorder the tables in
 | 
						|
  a CROSS JOIN.  Hence, the left table of a CROSS JOIN will always be
 | 
						|
  in an outer loop relative to the right table.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  In the following query, the optimizer is free to reorder the 
 | 
						|
  tables of FROM clause anyway it sees fit:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT *
 | 
						|
  FROM node AS n1,
 | 
						|
       edge AS e,
 | 
						|
       node AS n2
 | 
						|
 WHERE n1.name = 'alice'
 | 
						|
   AND n2.name = 'bob'
 | 
						|
   AND e.orig = n1.id
 | 
						|
   AND e.dest = n2.id;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  But in the following logically equivalent formulation of the same query,
 | 
						|
  the substitution of "CROSS JOIN" for the "," means that the order
 | 
						|
  of tables must be N1, E, N2.
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT *
 | 
						|
  FROM node AS n1 CROSS JOIN
 | 
						|
       edge AS e CROSS JOIN
 | 
						|
       node AS n2
 | 
						|
 WHERE n1.name = 'alice'
 | 
						|
   AND n2.name = 'bob'
 | 
						|
   AND e.orig = n1.id
 | 
						|
   AND e.dest = n2.id;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  In the latter query, the query plan must be 
 | 
						|
  <a href="#option2">option 2</a>.  Note that
 | 
						|
  you must use the keyword "CROSS" in order to disable the table reordering
 | 
						|
  optimization; INNER JOIN, NATURAL JOIN, JOIN, and other similar
 | 
						|
  combinations work just like a comma join in that the optimizer is
 | 
						|
  free to reorder tables as it sees fit. (Table reordering is also
 | 
						|
  disabled on an outer join, but that is because outer joins are not
 | 
						|
  associative or commutative. Reordering tables in OUTER JOIN changes
 | 
						|
  the result.)
 | 
						|
 | 
						|
</p><p>
 | 
						|
  See "<a href="queryplanner-ng.html#fossilcasestudy">The Fossil NGQP Upgrade Case Study</a>" for another real-world example
 | 
						|
  of using CROSS JOIN to manually control the nesting order of a join.
 | 
						|
  The <a href="queryplanner-ng.html#howtofix">query planner checklist</a> found later in the same document provides
 | 
						|
  further guidance on manual control of the query planner.
 | 
						|
 | 
						|
<a name="multi_index"></a>
 | 
						|
 | 
						|
</p><h1 id="choosing_between_multiple_indices"><span>8. </span>Choosing between multiple indices</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  Each table in the FROM clause of a query can use at most one index
 | 
						|
  (except when the <a href="#or_opt">OR-clause optimization</a> comes into
 | 
						|
  play)
 | 
						|
  and SQLite strives to use at least one index on each table.  Sometimes,
 | 
						|
  two or more indices might be candidates for use on a single table.
 | 
						|
  For example:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE TABLE ex2(x,y,z);
 | 
						|
CREATE INDEX ex2i1 ON ex2(x);
 | 
						|
CREATE INDEX ex2i2 ON ex2(y);
 | 
						|
SELECT z FROM ex2 WHERE x=5 AND y=6;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  For the SELECT statement above, the optimizer can use the ex2i1 index
 | 
						|
  to lookup rows of ex2 that contain x=5 and then test each row against
 | 
						|
  the y=6 term.  Or it can use the ex2i2 index to lookup rows
 | 
						|
  of ex2 that contain y=6 then test each of those rows against the
 | 
						|
  x=5 term.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  When faced with a choice of two or more indices, SQLite tries to estimate
 | 
						|
  the total amount of work needed to perform the query using each option.
 | 
						|
  It then selects the option that gives the least estimated work.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  To help the optimizer get a more accurate estimate of the work involved
 | 
						|
  in using various indices, the user may optionally run the <a href="lang_analyze.html">ANALYZE</a> command.
 | 
						|
  The <a href="lang_analyze.html">ANALYZE</a> command scans all indices of database where there might
 | 
						|
  be a choice between two or more indices and gathers statistics on the
 | 
						|
  selectiveness of those indices.  The statistics gathered by
 | 
						|
  this scan are stored in special database tables names shows names all
 | 
						|
  begin with "<b>sqlite_stat</b>".
 | 
						|
  The content of these tables is not updated as the database
 | 
						|
  changes so after making significant changes it might be prudent to
 | 
						|
  rerun <a href="lang_analyze.html">ANALYZE</a>.
 | 
						|
  The results of an ANALYZE command are only available to database connections
 | 
						|
  that are opened after the ANALYZE command completes.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The various <b>sqlite_stat</b><i>N</i> tables contain information on how
 | 
						|
  selective the various indices are.  For example, the <a href="fileformat2.html#stat1tab">sqlite_stat1</a>
 | 
						|
  table might indicate that an equality constraint on column x reduces the
 | 
						|
  search space to 10 rows on average, whereas an equality constraint on
 | 
						|
  column y reduces the search space to 3 rows on average.  In that case,
 | 
						|
  SQLite would prefer to use index ex2i2 since that index is more selective.
 | 
						|
 | 
						|
<a name="uplus"></a>
 | 
						|
 | 
						|
</p><h2 id="disqualifying_where_clause_terms_using_unary_"><span>8.1. </span>Disqualifying WHERE Clause Terms Using Unary-"+"</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
  Terms of the WHERE clause can be manually disqualified for use with
 | 
						|
  indices by prepending a unary *+* operator to the column name.  The
 | 
						|
  unary *+* is a no-op and will not generate any byte code in the prepared
 | 
						|
  statement.
 | 
						|
  But the unary *+* operator will prevent the term from constraining an index.
 | 
						|
  So, in the example above, if the query were rewritten as:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT z FROM ex2 WHERE +x=5 AND y=6;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  The *+* operator on the *x* column will prevent that term from 
 | 
						|
  constraining an index.  This would force the use of the ex2i2 index.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Note that the unary *+* operator also removes 
 | 
						|
  <a href="datatype3.html#affinity">type affinity</a> from
 | 
						|
  an expression, and in some cases this can cause subtle changes in
 | 
						|
  the meaning of an expression.
 | 
						|
  In the example above,
 | 
						|
  if column *x* has <a href="datatype3.html#affinity">TEXT affinity</a>
 | 
						|
  then the comparison "x=5" will be done as text.  But the *+* operator
 | 
						|
  removes the affinity.  So the comparison "+x=5" will compare the text
 | 
						|
  in column *x* with the numeric value 5 and will always be false.
 | 
						|
 | 
						|
<a name="rangequery"></a>
 | 
						|
 | 
						|
</p><h2 id="range_queries"><span>8.2. </span>Range Queries</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
  Consider a slightly different scenario:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE TABLE ex2(x,y,z);
 | 
						|
CREATE INDEX ex2i1 ON ex2(x);
 | 
						|
CREATE INDEX ex2i2 ON ex2(y);
 | 
						|
SELECT z FROM ex2 WHERE x BETWEEN 1 AND 100 AND y BETWEEN 1 AND 100;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  Further suppose that column x contains values spread out
 | 
						|
  between 0 and 1,000,000 and column y contains values
 | 
						|
  that span between 0 and 1,000.  In that scenario,
 | 
						|
  the range constraint on column x should reduce the search space by
 | 
						|
  a factor of 10,000 whereas the range constraint on column y should
 | 
						|
  reduce the search space by a factor of only 10.  So the ex2i1 index
 | 
						|
  should be preferred.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  SQLite will make this determination, but only if it has been compiled
 | 
						|
  with <a href="compile.html#enable_stat3">SQLITE_ENABLE_STAT3</a> or <a href="compile.html#enable_stat4">SQLITE_ENABLE_STAT4</a>.
 | 
						|
  The <a href="compile.html#enable_stat3">SQLITE_ENABLE_STAT3</a> and <a href="compile.html#enable_stat4">SQLITE_ENABLE_STAT4</a> options causes
 | 
						|
  the <a href="lang_analyze.html">ANALYZE</a> command to collect a histogram of column content in the
 | 
						|
  <a href="fileformat2.html#stat3tab">sqlite_stat3</a> or <a href="fileformat2.html#stat4tab">sqlite_stat4</a> tables and to use this histogram to 
 | 
						|
  make a better guess at the best query to use for range constraints
 | 
						|
  such as the above.  The main difference between STAT3 and STAT4 is
 | 
						|
  that STAT3 records histogram data for only the left-most column of
 | 
						|
  an index whereas STAT4 records histogram data for all columns of an
 | 
						|
  index.  For single-column indexes, STAT3 and STAT4 work the same.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The histogram data is only useful if the right-hand side of the constraint
 | 
						|
  is a simple compile-time constant or <a href="lang_expr.html#varparam">parameter</a> and not an expression.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Another limitation of the histogram data is that it only applies to the
 | 
						|
  left-most column on an index.  Consider this scenario:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE TABLE ex3(w,x,y,z);
 | 
						|
CREATE INDEX ex3i1 ON ex2(w, x);
 | 
						|
CREATE INDEX ex3i2 ON ex2(w, y);
 | 
						|
SELECT z FROM ex3 WHERE w=5 AND x BETWEEN 1 AND 100 AND y BETWEEN 1 AND 100;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  Here the inequalities are on columns x and y which are not the
 | 
						|
  left-most index columns.  Hence, the histogram data which is collected no
 | 
						|
  left-most column of indices is useless in helping to choose between the
 | 
						|
  range constraints on columns x and y.
 | 
						|
 | 
						|
<a name="covidx"></a>
 | 
						|
 | 
						|
</p><h1 id="covering_indices"><span>9. </span>Covering Indices</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  When doing an indexed lookup of a row, the usual procedure is to
 | 
						|
  do a binary search on the index to find the index entry, then extract
 | 
						|
  the <a href="lang_createtable.html#rowid">rowid</a> from the index and use that <a href="lang_createtable.html#rowid">rowid</a> to do a binary search on
 | 
						|
  the original table.  Thus a typical indexed lookup involves two
 | 
						|
  binary searches.
 | 
						|
  If, however, all columns that were to be fetched from the table are
 | 
						|
  already available in the index itself, SQLite will use the values
 | 
						|
  contained in the index and will never look up the original table
 | 
						|
  row.  This saves one binary search for each row and can make many
 | 
						|
  queries run twice as fast.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  When an index contains all of the data needed for a query and when the
 | 
						|
  original table never needs to be consulted, we call that index a
 | 
						|
  "covering index".
 | 
						|
 | 
						|
<a name="order_by"></a>
 | 
						|
 | 
						|
</p><h1 id="order_by_optimizations"><span>10. </span>ORDER BY optimizations</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  SQLite attempts to use an index to satisfy the ORDER BY clause of a
 | 
						|
  query when possible.
 | 
						|
  When faced with the choice of using an index to satisfy WHERE clause
 | 
						|
  constraints or satisfying an ORDER BY clause, SQLite does the same
 | 
						|
  cost analysis described above
 | 
						|
  and chooses the index that it believes will result in the fastest answer.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  SQLite will also attempt to use indices to help satisfy GROUP BY clauses
 | 
						|
  and the DISTINCT keyword.  If the nested loops of the join can be arranged
 | 
						|
  such that rows that are equivalent for the GROUP BY or for the DISTINCT are
 | 
						|
  consecutive, then the GROUP BY or DISTINCT logic can determine if the 
 | 
						|
  current row is part of the same group or if the current row is distinct
 | 
						|
  simply by comparing the current row to the previous row.
 | 
						|
  This can be much faster than the alternative of comparing each row to
 | 
						|
  all prior rows.
 | 
						|
 | 
						|
<a name="partsort"></a>
 | 
						|
 | 
						|
</p><h2 id="partial_order_by_via_index"><span>10.1. </span>Partial ORDER BY Via Index</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
  If a query contains an ORDER BY clause with multiple terms, it might
 | 
						|
  be that SQLite can use indices to cause rows to come out in the order
 | 
						|
  of some prefix of the terms in the ORDER BY but that later terms in
 | 
						|
  the ORDER BY are not satisfied.  In that case, SQLite does block sorting.
 | 
						|
  Suppose the ORDER BY clause has four terms and the natural order of the
 | 
						|
  query results in rows appearing in order of the first two terms.  As
 | 
						|
  each row is output by the query engine and enters the sorter, the 
 | 
						|
  outputs in the current row corresponding to the first two terms of 
 | 
						|
  the ORDER BY are compared against the previous row.  If they have
 | 
						|
  changed, the current sort is finished and output and a new sort is
 | 
						|
  started.  This results in a slightly faster sort.  But the bigger
 | 
						|
  advantages are that many fewer rows need to be held in memory,
 | 
						|
  reducing memory requirements, and outputs can begin to appear before
 | 
						|
  the core query has run to completion.
 | 
						|
 | 
						|
<a name="flattening"></a>
 | 
						|
 | 
						|
</p><h1 id="subquery_flattening"><span>11. </span>Subquery flattening</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  When a subquery occurs in the FROM clause of a SELECT, the simplest
 | 
						|
  behavior is to evaluate the subquery into a transient table, then run
 | 
						|
  the outer SELECT against the transient table.  But such a plan
 | 
						|
  can be suboptimal since the transient table will not have any indices
 | 
						|
  and the outer query (which is likely a join) will be forced to do a
 | 
						|
  full table scan on the transient table.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  To overcome this problem, SQLite attempts to flatten subqueries in
 | 
						|
  the FROM clause of a SELECT.
 | 
						|
  This involves inserting the FROM clause of the subquery into the
 | 
						|
  FROM clause of the outer query and rewriting expressions in
 | 
						|
  the outer query that refer to the result set of the subquery.
 | 
						|
  For example:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>  SELECT t1.a, t2.b FROM t2, (SELECT x+y AS a FROM t1 WHERE z<100) WHERE="1" a="1">5
 | 
						|
</100)>
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  Would be rewritten using query flattening as:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>  SELECT t1.x+t1.y AS a, t2.b FROM t2, t1 WHERE z<100 AND="1" a="1">5
 | 
						|
</100>
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  There is a long list of conditions that must all be met in order for
 | 
						|
  query flattening to occur.  Some of the constraints are marked as 
 | 
						|
  obsolete by italic text.  These extra constraints are retained in the
 | 
						|
  documentation to preserve the numbering of the other constraints.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Casual readers are not expected to understand all of these rules.
 | 
						|
  A key take-away from this section is that the rules for determining
 | 
						|
  when query flatting is safe and when it is unsafe are subtle and
 | 
						|
  complex.  There have been multiple bugs over the years caused by
 | 
						|
  over-aggressive query flattening.  On the other hand, performance
 | 
						|
  of complex queries and/or queries involving views tends to suffer
 | 
						|
  if query flattening is more conservative.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  </p><ol>
 | 
						|
  <li value="1">  <i>(Obsolete.  Query flattening is no longer
 | 
						|
                      attempted for aggregate subqueries.)</i>
 | 
						|
 | 
						|
  </li><li value="2">  <i>(Obsolete.  Query flattening is no longer
 | 
						|
                      attempted for aggregate subqueries.)</i>
 | 
						|
 | 
						|
  </li><li value="3">
 | 
						|
  If the subquery is the right operand of a LEFT JOIN then
 | 
						|
   <ol type="a"><li> the subquery may not be a join, and
 | 
						|
   </li><li> the FROM clause of the subquery may
 | 
						|
   not contain a virtual table, and
 | 
						|
   </li><li> the outer query may not be an aggregate.</li></ol></li>
 | 
						|
 | 
						|
  <li value="4">  The subquery is not DISTINCT.
 | 
						|
 | 
						|
  </li><li value="5"> <i>(Subsumed into constraint 4)</i>
 | 
						|
 | 
						|
  </li><li value="6"> <i>(Obsolete.  Query flattening is no longer
 | 
						|
                      attempted for aggregate subqueries.)</i>
 | 
						|
 | 
						|
  </li><li value="7">
 | 
						|
  The subquery has a FROM clause.
 | 
						|
 | 
						|
  </li><li value="8">
 | 
						|
  The subquery does not use LIMIT or the outer query is not a join.
 | 
						|
 | 
						|
  </li><li value="9">
 | 
						|
  The subquery does not use LIMIT or the outer query does not use
 | 
						|
  aggregates.
 | 
						|
 | 
						|
  </li><li value="10"> <i>(Restriction relaxed in 2005)</i>
 | 
						|
 | 
						|
  </li><li value="11">
 | 
						|
  The subquery and the outer query do not both have ORDER BY clauses.
 | 
						|
 | 
						|
  </li><li value="12"> <i>(Subsumed into constraint 3)</i>
 | 
						|
 | 
						|
  </li><li value="13">  The subquery and outer query do not both use LIMIT.
 | 
						|
 | 
						|
  </li><li value="14">  The subquery does not use OFFSET.
 | 
						|
 | 
						|
  </li><li value="15">
 | 
						|
  If the outer query is part of a compound select, then the
 | 
						|
  subquery may not have a LIMIT clause.
 | 
						|
 | 
						|
  </li><li value="16">
 | 
						|
  If the outer query is an aggregate, then the subquery may
 | 
						|
  not contain ORDER BY. 
 | 
						|
 | 
						|
  </li><li value="17">
 | 
						|
  If the sub-query is a compound SELECT, then
 | 
						|
  <ol type="'a'">
 | 
						|
  <li> all compound operators must be UNION ALL, and
 | 
						|
  </li><li> no terms with the subquery compound may be aggregate
 | 
						|
       or DISTINCT, and
 | 
						|
  </li><li> every term within the subquery must have a FROM clause, and
 | 
						|
  </li><li> the outer query may not be an aggregate, DISTINCT query, or join.
 | 
						|
  </li></ol>
 | 
						|
 | 
						|
  The parent and sub-query may contain WHERE clauses. Subject to
 | 
						|
  rules (11), (12) and (13), they may also contain ORDER BY,
 | 
						|
  LIMIT and OFFSET clauses.
 | 
						|
 | 
						|
  </li><li value="18">
 | 
						|
  If the sub-query is a compound select, then all terms of the
 | 
						|
  ORDER by clause of the parent must be simple references to 
 | 
						|
  columns of the sub-query.
 | 
						|
 | 
						|
  </li><li value="19">
 | 
						|
  If the subquery uses LIMIT then the outer query may not
 | 
						|
  have a WHERE clause.
 | 
						|
 | 
						|
  </li><li value="20">
 | 
						|
  If the sub-query is a compound select, then it must not use
 | 
						|
   an ORDER BY clause.
 | 
						|
 | 
						|
  </li><li value="21">
 | 
						|
  If the subquery uses LIMIT, then the outer query may not be
 | 
						|
  DISTINCT.
 | 
						|
 | 
						|
  </li><li value="22"> The subquery may not be a recursive CTE.
 | 
						|
 | 
						|
  </li><li value="23"> <i>(Subsumed into constraint 17d.)</i>
 | 
						|
 | 
						|
  </li><li value="24"> <i>(Obsolete. Query flattening is no longer
 | 
						|
                      attempted for aggregate subqueries.)</i>
 | 
						|
  </li></ol>
 | 
						|
 | 
						|
<p>
 | 
						|
  The casual reader is not expected to understand or remember any part of
 | 
						|
  the list above.  The point of this list is to demonstrate
 | 
						|
  that the decision of whether or not to flatten a query is complex.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Query flattening is an important optimization when views are used as
 | 
						|
  each use of a view is translated into a subquery.
 | 
						|
 | 
						|
<a name="coroutines"></a>
 | 
						|
 | 
						|
</p><h1 id="subquery_co_routines"><span>12. </span>Subquery Co-routines</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  Prior to SQLite 3.7.15 (2012-12-12),
 | 
						|
  a subquery in the FROM clause would be
 | 
						|
  either flattened into the outer query, or else the subquery would be run
 | 
						|
  to completion
 | 
						|
  before the outer query started, the result set from the subquery
 | 
						|
  would be stored in a transient table,
 | 
						|
  and then the transient table would be used in the outer query.  Newer
 | 
						|
  versions of SQLite have a third option, which is to implement the subquery
 | 
						|
  using a co-routine.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  A co-routine is like a subroutine in that it runs in the same thread
 | 
						|
  as the caller and eventually returns control back to the caller.  The
 | 
						|
  difference is that a co-routine also has the ability to return
 | 
						|
  before it has finished, and then resume where it left off the next
 | 
						|
  time it is called.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  When a subquery is implemented as a co-routine, byte-code is generated
 | 
						|
  to implement the subquery as if it were a standalone query, except
 | 
						|
  instead of returning rows of results back to the application, the
 | 
						|
  co-routine yields control back to the caller after each row is computed.
 | 
						|
  The caller can then use that one computed row as part of its computation,
 | 
						|
  then invoke the co-routine again when it is ready for the next row.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Co-routines are better than storing the complete result set of the subquery
 | 
						|
  in a transient table because co-routines use less memory.  With a co-routine,
 | 
						|
  only a single row of the result needs to be remembered, whereas all rows of
 | 
						|
  the result must be stored for a transient table.  Also, because the
 | 
						|
  co-routine does not need to run to completion before the outer query
 | 
						|
  begins its work, the first rows of output can appear much sooner, and if
 | 
						|
  the overall query is aborted, less work is done overall.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  On the other hand, if the result of the subquery must be scanned multiple
 | 
						|
  times (because, for example, it is just one table in a join) then it
 | 
						|
  is better to use a transient table to remember the entire result of the
 | 
						|
  subquery, in order to avoid computing the subquery more than once.
 | 
						|
 | 
						|
<a name="deferred_work"></a>
 | 
						|
 | 
						|
</p><h2 id="using_co_routines_to_defer_work_until_after_the_sorting"><span>12.1. </span>Using Co-routines To Defer Work Until After The Sorting</h2>
 | 
						|
 | 
						|
<p>
 | 
						|
  As of SQLite version 3.21.0 (2017-10-24), the query planner will
 | 
						|
  always prefer to use a co-routine to implement FROM-clause subqueries 
 | 
						|
  that contains an ORDER BY clause and that are not part of a join when
 | 
						|
  the result set of the outer query is "complex".  This feature allows
 | 
						|
  applications to shift expensive computations from before the
 | 
						|
  sorter until after the sorter, which can result in faster operation.
 | 
						|
  For example, consider this query:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT expensive_function(a) FROM tab ORDER BY date DESC LIMIT 5;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  The goal of this query is to compute some value for the five most
 | 
						|
  recent entries in the table.  But in the query above, the
 | 
						|
  "expensive_function()" is invoked prior to the sort and thus is
 | 
						|
  invoked on every row of the table, even
 | 
						|
  rows that are ultimately omitted due to the LIMIT clause.
 | 
						|
  A co-routine can be used to work around this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT expensive_function(a) FROM (
 | 
						|
  SELECT a FROM tab ORDER BY date DESC LIMIT 5
 | 
						|
);
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  In the revised query, the subquery implemented by a co-routine computes
 | 
						|
  the five most recent values for "a".  Those five values are passed from the
 | 
						|
  co-routine up into the outer query where the "expensive_function()" is
 | 
						|
  invoked on only the specific rows that the application cares about.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The query planner in future versions of SQLite might grow smart enough
 | 
						|
  to make transformations such as the above automatically, in both directions.
 | 
						|
  That is to say, future versions of SQLite might transform queries of the
 | 
						|
  first form into the second, or queries written the second way into the
 | 
						|
  first.  As of SQLite version 3.22.0 (2018-01-22), the query planner
 | 
						|
  will flatten the subquery if the outer query does not make use of any
 | 
						|
  user-defined functions or subqueries in its result set.  For the examples
 | 
						|
  shown above, however, SQLite implements each of the queries as
 | 
						|
  written.
 | 
						|
 | 
						|
<a name="minmax"></a>
 | 
						|
 | 
						|
</p><h1 id="the_min_max_optimization"><span>13. </span>The MIN/MAX optimization</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  Queries that contain a single MIN() or MAX() aggregate function whose
 | 
						|
  argument is the left-most column of an index might be satisfied
 | 
						|
  by doing a single index lookup rather than by scanning the entire table.
 | 
						|
  Examples:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT MIN(x) FROM table;
 | 
						|
SELECT MAX(x)+1 FROM table;
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<a name="autoindex"></a>
 | 
						|
 | 
						|
<h1 id="automatic_indexes"><span>14. </span>Automatic Indexes</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  When no indices are available to aid the evaluation of a query, SQLite
 | 
						|
  might create an automatic index that lasts only for the duration
 | 
						|
  of a single SQL statement.
 | 
						|
  Since the cost of constructing the automatic index is
 | 
						|
  O(NlogN) (where N is the number of entries in the table) and the cost of
 | 
						|
  doing a full table scan is only O(N), an automatic index will
 | 
						|
  only be created if SQLite expects that the lookup will be run more than
 | 
						|
  logN times during the course of the SQL statement. Consider an example:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE TABLE t1(a,b);
 | 
						|
CREATE TABLE t2(c,d);
 | 
						|
-- Insert many rows into both t1 and t2
 | 
						|
SELECT * FROM t1, t2 WHERE a=c;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  In the query above, if both t1 and t2 have approximately N rows, then
 | 
						|
  without any indices the query will require O(N*N) time.  On the other
 | 
						|
  hand, creating an index on table t2 requires O(NlogN) time and then use
 | 
						|
  that index to evaluate the query requires an additional O(NlogN) time.
 | 
						|
  In the absence of <a href="lang_analyze.html">ANALYZE</a> information, SQLite guesses that N is one
 | 
						|
  million and hence it believes that constructing the automatic index will
 | 
						|
  be the cheaper approach.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  An automatic index might also be used for a subquery:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE TABLE t1(a,b);
 | 
						|
CREATE TABLE t2(c,d);
 | 
						|
-- Insert many rows into both t1 and t2
 | 
						|
SELECT a, (SELECT d FROM t2 WHERE c=b) FROM t1;
 | 
						|
</pre></div>
 | 
						|
<p>
 | 
						|
  In this example, the t2 table is used in a subquery to translate values
 | 
						|
  of the t1.b column.  If each table contains N rows, SQLite expects that
 | 
						|
  the subquery will run N times, and hence it will believe it is faster
 | 
						|
  to construct an automatic, transient index on t2 first and then use
 | 
						|
  that index to satisfy the N instances of the subquery.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The automatic indexing capability can be disabled at run-time using
 | 
						|
  the <a href="pragma.html#pragma_automatic_index">automatic_index pragma</a>.  Automatic indexing is turned on by
 | 
						|
  default, but this can be changed so that automatic indexing is off
 | 
						|
  by default using the <a href="compile.html#default_automatic_index">SQLITE_DEFAULT_AUTOMATIC_INDEX</a> compile-time option.
 | 
						|
  The ability to create automatic indices can be completely disabled by
 | 
						|
  compiling with the <a href="compile.html#omit_automatic_index">SQLITE_OMIT_AUTOMATIC_INDEX</a> compile-time option.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  In SQLite <a href="releaselog/3_8_0.html">version 3.8.0</a> (2013-08-26) and later, 
 | 
						|
  an <a href="rescode.html#warning_autoindex">SQLITE_WARNING_AUTOINDEX</a> message is sent
 | 
						|
  to the <a href="errlog.html">error log</a> every time a statement is prepared that uses an
 | 
						|
  automatic index.  Application developers can and should use these warnings
 | 
						|
  to identify the need for new persistent indices in the schema.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Do not confuse automatic indexes with the <a href="fileformat2.html#intschema">internal indexes</a> (having names
 | 
						|
  like "sqlite_autoindex_<i>table</i>_<i>N</i>") that are sometimes
 | 
						|
  created to implement a <a href="lang_createtable.html#primkeyconst">PRIMARY KEY constraint</a> or <a href="lang_createtable.html#uniqueconst">UNIQUE constraint</a>.
 | 
						|
  The automatic indexes described here exist only for the duration of a
 | 
						|
  single query, are never persisted to disk, and are only visible to a
 | 
						|
  single database connection.  Internal indexes are part of the implementation
 | 
						|
  of PRIMARY KEY and UNIQUE constraints, are long-lasting and persisted
 | 
						|
  to disk, and are visible to all database connections.  The term "autoindex"
 | 
						|
  appears in the names of <a href="fileformat2.html#intschema">internal indexes</a> for legacy reasons and does
 | 
						|
  not indicate that internal indexes and automatic indexes are related.
 | 
						|
 | 
						|
<a name="pushdown"></a>
 | 
						|
 | 
						|
</p><h1 id="the_push_down_optimization"><span>15. </span>The Push-Down Optimization</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  If a subquery cannot be <a href="optoverview.html#flattening">flattened</a> into the outer query, it might
 | 
						|
  still be possible to enhance performance by "pushing down" WHERE clause
 | 
						|
  terms from the outer query into the subquery.  Consider an example:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE TABLE t1(a INT, b INT);
 | 
						|
CREATE TABLE t2(x INT, y INT);
 | 
						|
CREATE VIEW v1(a,b) AS SELECT DISTINCT a, b FROM t1;
 | 
						|
 | 
						|
SELECT x, y, b
 | 
						|
  FROM t2 JOIN v1 ON (x=a)
 | 
						|
 WHERE b BETWEEN 10 AND 20;
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
  The view v1 cannot be <a href="optoverview.html#flattening">flattened</a> because it is DISTINCT.  It must
 | 
						|
  instead be run as a subquery with the results being stored in a
 | 
						|
  transient table, then the join is performed between t2 and the
 | 
						|
  transient table.  The push-down optimization pushes down the
 | 
						|
  "b BETWEEN 10 AND 20" term into the view.  This makes the transient
 | 
						|
  table smaller, and helps the subquery to run faster if there
 | 
						|
  is an index on t1.b.  The resulting evaluation is like this:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT x, y, b
 | 
						|
  FROM t2
 | 
						|
  JOIN (SELECT DISTINCT a, b FROM t1 WHERE b BETWEEN 10 AND 20)
 | 
						|
 WHERE b BETWEEN 10 AND 20;
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
  The push-down optimization cannot always be used.  For example,
 | 
						|
  if the subquery contains a LIMIT, then pushing down any part of
 | 
						|
  the WHERE clause from the outer query could change the result of
 | 
						|
  the inner query.  There are other restrictions, explained in a
 | 
						|
  comment in the source code on the pushDownWhereTerms() routine
 | 
						|
  that implements this optimization.
 | 
						|
 | 
						|
<a name="leftjoinreduction"></a>
 | 
						|
 | 
						|
</p><h1 id="the_left_join_strength_reduction_optimization"><span>16. </span>The LEFT JOIN Strength Reduction Optimization</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  Sometimes a LEFT JOIN can be converted into an ordinary JOIN,
 | 
						|
  if there are terms in the WHERE clause that guarantee that the
 | 
						|
  two joins will give identical results.  In particular, if any
 | 
						|
  column in the right-hand table of the LEFT JOIN must be non-NULL
 | 
						|
  in order for the WHERE clause to be true, then the LEFT JOIN is
 | 
						|
  demoted to an ordinary JOIN.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  The prover that determines whether any column of the right-hand
 | 
						|
  table of a LEFT JOIN must be non-NULL in the WHERE clause is
 | 
						|
  imperfect.  It sometimes returns a false negative.  In other words,
 | 
						|
  it sometimes fails to reduce the strength of a LEFT JOIN when doing
 | 
						|
  so was in fact possible.  For example, the prover does not know
 | 
						|
  the <a href="lang_datefunc.html">datetime() SQL function</a> will always return NULL if its first
 | 
						|
  argument is NULL, and so it will not recognize that the LEFT JOIN
 | 
						|
  in the following query could be strength-reduced:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT urls.url
 | 
						|
  FROM urls
 | 
						|
  LEFT JOIN
 | 
						|
    (SELECT *
 | 
						|
      FROM (SELECT url_id AS uid, max(retrieval_time) AS rtime
 | 
						|
              FROM lookups GROUP BY 1 ORDER BY 1)
 | 
						|
      WHERE uid IN (358341,358341,358341)
 | 
						|
    ) recent
 | 
						|
    ON u.source_seed_id = recent.xyz OR u.url_id = recent.xyz
 | 
						|
 WHERE
 | 
						|
     DATETIME(recent.rtime) > DATETIME('now', '-5 days');
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
  It is possible that future enhancements to the prover might enable it
 | 
						|
  to recognize that NULL inputs to certain built-in functions
 | 
						|
  always result in a NULL answer.  But not all built-in
 | 
						|
  functions have that property (for example <a href="lang_corefunc.html#coalesce">coalesce()</a>) and, of
 | 
						|
  course, the prover will never be able to reason about
 | 
						|
  <a href="appfunc.html">application-defined SQL functions</a>.
 | 
						|
 | 
						|
 | 
						|
<a name="omitnoopjoin"></a>
 | 
						|
 | 
						|
</p><h1 id="the_omit_left_join_optimization"><span>17. </span>The Omit LEFT JOIN Optimization</h1>
 | 
						|
 | 
						|
<p>
 | 
						|
  Sometimes a LEFT JOIN can be completely omitted from a query without
 | 
						|
  changing the result.  This can happen if all of the following are
 | 
						|
  true:
 | 
						|
 | 
						|
</p><p>
 | 
						|
  </p><ol>
 | 
						|
  <li> The query is not an aggregate
 | 
						|
  </li><li> Either the query is DISTINCT or else the ON or USING clause
 | 
						|
       on the LEFT JOIN constrains the join such that it matches
 | 
						|
       only a single row
 | 
						|
  </li><li> The right-hand table of the LEFT JOIN is not be used anywhere
 | 
						|
       in the query outside of its own USING or ON clause.
 | 
						|
  </li></ol>
 | 
						|
 | 
						|
<p>
 | 
						|
  LEFT JOIN elimination often comes up when LEFT JOINs are used
 | 
						|
  inside of views, and then the view is used in such as way that
 | 
						|
  none of the columns of the right-hand table of the LEFT JOIN are
 | 
						|
  referenced.
 | 
						|
 | 
						|
</p><p>
 | 
						|
  Here is a simple example of omitting a LEFT JOIN:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>CREATE TABLE t1(ipk INTEGER PRIMARY KEY, v1);
 | 
						|
CREATE TABLE t2(ipk INTEGER PRIMARY KEY, v2);
 | 
						|
CREATE TABLE t3(ipk INTEGER PRIMARY KEY, v3);
 | 
						|
 | 
						|
SELECT v1, v3 FROM t1 
 | 
						|
  LEFT JOIN t2 USING (t1.ipk=t2.ipk)
 | 
						|
  LEFT JOIN t3 USING (t1.ipk=t3.ipk)
 | 
						|
</pre></div>
 | 
						|
 | 
						|
<p>
 | 
						|
  The t2 table is completely unused in the query above, and so the
 | 
						|
  query planner is able to implement the query as if it were written:
 | 
						|
 | 
						|
</p><div class="codeblock"><pre>SELECT v1, v3 FROM t1 
 | 
						|
  LEFT JOIN t3 USING (t1.ipk=t3.ipk)
 | 
						|
</pre></div>
 | 
						|
 |