1563 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			1563 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
| <!DOCTYPE html>
 | |
| <html><head>
 | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0">
 | |
| <meta http-equiv="content-type" content="text/html; charset=UTF-8">
 | |
| <link href="sqlite.css" rel="stylesheet">
 | |
| <title>The SQLite Query Optimizer Overview</title>
 | |
| <!-- path= -->
 | |
| </head>
 | |
| <body>
 | |
| <div class=nosearch>
 | |
| <a href="index.html">
 | |
| <img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
 | |
| </a>
 | |
| <div><!-- IE hack to prevent disappearing logo --></div>
 | |
| <div class="tagline desktoponly">
 | |
| Small. Fast. Reliable.<br>Choose any three.
 | |
| </div>
 | |
| <div class="menu mainmenu">
 | |
| <ul>
 | |
| <li><a href="index.html">Home</a>
 | |
| <li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
 | |
| <li class='wideonly'><a href='about.html'>About</a>
 | |
| <li class='desktoponly'><a href="docs.html">Documentation</a>
 | |
| <li class='desktoponly'><a href="download.html">Download</a>
 | |
| <li class='wideonly'><a href='copyright.html'>License</a>
 | |
| <li class='desktoponly'><a href="support.html">Support</a>
 | |
| <li class='desktoponly'><a href="prosupport.html">Purchase</a>
 | |
| <li class='search' id='search_menubutton'>
 | |
| <a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
 | |
| </ul>
 | |
| </div>
 | |
| <div class="menu submenu" id="submenu">
 | |
| <ul>
 | |
| <li><a href='about.html'>About</a>
 | |
| <li><a href='docs.html'>Documentation</a>
 | |
| <li><a href='download.html'>Download</a>
 | |
| <li><a href='support.html'>Support</a>
 | |
| <li><a href='prosupport.html'>Purchase</a>
 | |
| </ul>
 | |
| </div>
 | |
| <div class="searchmenu" id="searchmenu">
 | |
| <form method="GET" action="search">
 | |
| <select name="s" id="searchtype">
 | |
| <option value="d">Search Documentation</option>
 | |
| <option value="c">Search Changelog</option>
 | |
| </select>
 | |
| <input type="text" name="q" id="searchbox" value="">
 | |
| <input type="submit" value="Go">
 | |
| </form>
 | |
| </div>
 | |
| </div>
 | |
| <script>
 | |
| function toggle_div(nm) {
 | |
| var w = document.getElementById(nm);
 | |
| if( w.style.display=="block" ){
 | |
| w.style.display = "none";
 | |
| }else{
 | |
| w.style.display = "block";
 | |
| }
 | |
| }
 | |
| function toggle_search() {
 | |
| var w = document.getElementById("searchmenu");
 | |
| if( w.style.display=="block" ){
 | |
| w.style.display = "none";
 | |
| } else {
 | |
| w.style.display = "block";
 | |
| setTimeout(function(){
 | |
| document.getElementById("searchbox").focus()
 | |
| }, 30);
 | |
| }
 | |
| }
 | |
| function div_off(nm){document.getElementById(nm).style.display="none";}
 | |
| window.onbeforeunload = function(e){div_off("submenu");}
 | |
| /* Disable the Search feature if we are not operating from CGI, since */
 | |
| /* Search is accomplished using CGI and will not work without it. */
 | |
| if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
 | |
| document.getElementById("search_menubutton").style.display = "none";
 | |
| }
 | |
| /* Used by the Hide/Show button beside syntax diagrams, to toggle the */
 | |
| function hideorshow(btn,obj){
 | |
| var x = document.getElementById(obj);
 | |
| var b = document.getElementById(btn);
 | |
| if( x.style.display!='none' ){
 | |
| x.style.display = 'none';
 | |
| b.innerHTML='show';
 | |
| }else{
 | |
| x.style.display = '';
 | |
| b.innerHTML='hide';
 | |
| }
 | |
| return false;
 | |
| }
 | |
| </script>
 | |
| </div>
 | |
| <div class=fancy>
 | |
| <div class=nosearch>
 | |
| <div class="fancy_title">
 | |
| The SQLite Query Optimizer Overview
 | |
| </div>
 | |
| <div class="fancy_toc">
 | |
| <a onclick="toggle_toc()">
 | |
| <span class="fancy_toc_mark" id="toc_mk">►</span>
 | |
| Table Of Contents
 | |
| </a>
 | |
| <div id="toc_sub"><div class="fancy-toc1"><a href="#introduction">1. Introduction</a></div>
 | |
| <div class="fancy-toc1"><a href="#where_clause_analysis">2. WHERE clause analysis</a></div>
 | |
| <div class="fancy-toc2"><a href="#index_term_usage_examples">2.1. Index term usage examples</a></div>
 | |
| <div class="fancy-toc1"><a href="#the_between_optimization">3. The BETWEEN optimization</a></div>
 | |
| <div class="fancy-toc1"><a href="#or_optimizations">4. OR optimizations</a></div>
 | |
| <div class="fancy-toc1"><a href="#the_like_optimization">5. The LIKE optimization</a></div>
 | |
| <div class="fancy-toc1"><a href="#the_skip_scan_optimization">6. The Skip-Scan Optimization</a></div>
 | |
| <div class="fancy-toc1"><a href="#joins">7. Joins</a></div>
 | |
| <div class="fancy-toc2"><a href="#order_of_tables_in_a_join">7.1. Order of tables in a join</a></div>
 | |
| <div class="fancy-toc2"><a href="#manual_control_of_query_plans_using_sqlite_stat_tables">7.2. Manual Control Of Query Plans Using SQLITE_STAT Tables</a></div>
 | |
| <div class="fancy-toc2"><a href="#manual_control_of_query_plans_using_cross_join">7.3. Manual Control Of Query Plans Using CROSS JOIN</a></div>
 | |
| <div class="fancy-toc1"><a href="#choosing_between_multiple_indices">8. Choosing between multiple indices</a></div>
 | |
| <div class="fancy-toc2"><a href="#disqualifying_where_clause_terms_using_unary_">8.1. Disqualifying WHERE Clause Terms Using Unary-"+"</a></div>
 | |
| <div class="fancy-toc2"><a href="#range_queries">8.2. Range Queries</a></div>
 | |
| <div class="fancy-toc1"><a href="#covering_indices">9. Covering Indices</a></div>
 | |
| <div class="fancy-toc1"><a href="#order_by_optimizations">10. ORDER BY optimizations</a></div>
 | |
| <div class="fancy-toc2"><a href="#partial_order_by_via_index">10.1. Partial ORDER BY Via Index</a></div>
 | |
| <div class="fancy-toc1"><a href="#subquery_flattening">11. Subquery flattening</a></div>
 | |
| <div class="fancy-toc1"><a href="#subquery_co_routines">12. Subquery Co-routines</a></div>
 | |
| <div class="fancy-toc2"><a href="#using_co_routines_to_defer_work_until_after_the_sorting">12.1. Using Co-routines To Defer Work Until After The Sorting</a></div>
 | |
| <div class="fancy-toc1"><a href="#the_min_max_optimization">13. The MIN/MAX optimization</a></div>
 | |
| <div class="fancy-toc1"><a href="#automatic_indexes">14. Automatic Indexes</a></div>
 | |
| <div class="fancy-toc1"><a href="#the_push_down_optimization">15. The Push-Down Optimization</a></div>
 | |
| <div class="fancy-toc1"><a href="#the_left_join_strength_reduction_optimization">16. The LEFT JOIN Strength Reduction Optimization</a></div>
 | |
| <div class="fancy-toc1"><a href="#the_omit_left_join_optimization">17. The Omit LEFT JOIN Optimization</a></div>
 | |
| </div>
 | |
| </div>
 | |
| <script>
 | |
| function toggle_toc(){
 | |
| var sub = document.getElementById("toc_sub")
 | |
| var mk = document.getElementById("toc_mk")
 | |
| if( sub.style.display!="block" ){
 | |
| sub.style.display = "block";
 | |
| mk.innerHTML = "▼";
 | |
| } else {
 | |
| sub.style.display = "none";
 | |
| mk.innerHTML = "►";
 | |
| }
 | |
| }
 | |
| </script>
 | |
| </div>
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| <h1 id="introduction"><span>1. </span>Introduction</h1>
 | |
| <p>
 | |
|   This document provides an overview of how the query planner and optimizer
 | |
|   for SQLite works.
 | |
| 
 | |
| 
 | |
| </p><p>
 | |
|   Given a single SQL statement, there might be dozens, hundreds, or even
 | |
|   thousands of ways to implement that statement, depending on the complexity
 | |
|   of the statement itself and of the underlying database schema.  The 
 | |
|   task of the query planner is to select an algorithm from among the many
 | |
|   choices that provides the answer with a minimum of disk I/O and CPU
 | |
|   overhead.
 | |
| 
 | |
| 
 | |
| </p><p>
 | |
|   Additional background information is available in the
 | |
|   <a href="queryplanner.html">indexing tutorial</a> document.
 | |
| 
 | |
| 
 | |
| </p><p>
 | |
|   With release 3.8.0 (2013-08-26),
 | |
|   the SQLite query planner was reimplemented as the
 | |
|   <a href="queryplanner-ng.html">Next Generation Query Planner</a> or "NGQP".  All of the features, techniques,
 | |
|   and algorithms described in this document are applicable to both the
 | |
|   pre-3.8.0 legacy query planner and to the NGQP.  For further information on
 | |
|   how the NGQP differs from the legacy query planner, see the 
 | |
|   <a href="queryplanner-ng.html">detailed description of the NGQP</a>.
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| <a name="where_clause"></a>
 | |
| 
 | |
| </p><h1 id="where_clause_analysis"><span>2. </span>WHERE clause analysis</h1>
 | |
| 
 | |
| <p>
 | |
|   The WHERE clause on a query is broken up into "terms" where each term
 | |
|   is separated from the others by an AND operator.
 | |
|   If the WHERE clause is composed of constraints separate by the OR
 | |
|   operator then the entire clause is considered to be a single "term"
 | |
|   to which the <a href="#or_opt">OR-clause optimization</a> is applied.
 | |
| 
 | |
| </p><p>
 | |
|   All terms of the WHERE clause are analyzed to see if they can be
 | |
|   satisfied using indices.
 | |
|   To be usable by an index a term must be of one of the following
 | |
|   forms:
 | |
| 
 | |
| <blockquote><pre><b>
 | |
|   </b><i>column</i><b> = </b><i>expression</i><b>
 | |
|   </b><i>column</i><b> IS </b><i>expression</i><b>
 | |
|   </b><i>column</i><b> > </b><i>expression</i><b>
 | |
|   </b><i>column</i><b> >= </b><i>expression</i><b>
 | |
|   </b><i>column</i><b> < </b><i>expression</i><b>
 | |
|   </b><i>column</i><b> <= </b><i>expression</i><b>
 | |
|   </b><i>expression</i><b> = </b><i>column</i><b>
 | |
|   </b><i>expression</i><b> > </b><i>column</i><b>
 | |
|   </b><i>expression</i><b> >= </b><i>column</i><b>
 | |
|   </b><i>expression</i><b> < </b><i>column</i><b>
 | |
|   </b><i>expression</i><b> <= </b><i>column</i><b>
 | |
|   </b><i>column</i><b> IN (</b><i>expression-list</i><b>)
 | |
|   </b><i>column</i><b> IN (</b><i>subquery</i><b>)
 | |
|   </b><i>column</i><b> IS NULL
 | |
| </b></pre></blockquote>
 | |
| 
 | |
| </p><p>
 | |
|   If an index is created using a statement like this:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE INDEX idx_ex1 ON ex1(a,b,c,d,e,...,y,z);
 | |
| </pre></div>
 | |
| 
 | |
| <p>
 | |
|   Then the index might be used if the initial columns of the index
 | |
|   (columns a, b, and so forth) appear in WHERE clause terms.
 | |
|   The initial columns of the index must be used with
 | |
|   the *=* or *IN* or *IS* operators.  
 | |
|   The right-most column that is used can employ inequalities.  
 | |
|   For the right-most
 | |
|   column of an index that is used, there can be up to two inequalities
 | |
|   that must sandwich the allowed values of the column between two extremes.
 | |
| 
 | |
| </p><p>
 | |
|   It is not necessary for every column of an index to appear in a
 | |
|   WHERE clause term in order for that index to be used. 
 | |
|   But there cannot be gaps in the columns of the index that are used.
 | |
|   Thus for the example index above, if there is no WHERE clause term
 | |
|   that constraints column c, then terms that constrain columns a and b can
 | |
|   be used with the index but not terms that constraint columns d through z.
 | |
|   Similarly, index columns will not normally be used (for indexing purposes)
 | |
|   if they are to the right of a 
 | |
|   column that is constrained only by inequalities.
 | |
|   (See the <a href="optoverview.html#skipscan">skip-scan optimization</a> below for the exception.)
 | |
| 
 | |
| </p><p>
 | |
|   In the case of <a href="expridx.html">indexes on expressions</a>, whenever the word "column" is
 | |
|   used in the foregoing text, one can substitute "indexed expression"
 | |
|   (meaning a copy of the expression that appears in the <a href="lang_createindex.html">CREATE INDEX</a>
 | |
|   statement) and everything will work the same.
 | |
| 
 | |
| <a name="idxexamp"></a>
 | |
| 
 | |
| </p><h2 id="index_term_usage_examples"><span>2.1. </span>Index term usage examples</h2>
 | |
| <p>
 | |
|   For the index above and WHERE clause like this:
 | |
| 
 | |
| </p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND c IS NULL AND d='hello'
 | |
| </pre></div>
 | |
| <p>
 | |
|   The first four columns a, b, c, and d of the index would be usable since
 | |
|   those four columns form a prefix of the index and are all bound by
 | |
|   equality constraints.
 | |
| 
 | |
| </p><p>
 | |
|   For the index above and WHERE clause like this:
 | |
| 
 | |
| </p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND c>12 AND d='hello'
 | |
| </pre></div>
 | |
| <p>
 | |
|   Only columns a, b, and c of the index would be usable.  The d column
 | |
|   would not be usable because it occurs to the right of c and c is
 | |
|   constrained only by inequalities.
 | |
| 
 | |
| </p><p>
 | |
|   For the index above and WHERE clause like this:
 | |
| 
 | |
| </p><div class="codeblock"><pre>... WHERE a=5 AND b IN (1,2,3) AND d='hello'
 | |
| </pre></div>
 | |
| <p>
 | |
|   Only columns a and b of the index would be usable.  The d column
 | |
|   would not be usable because column c is not constrained and there can
 | |
|   be no gaps in the set of columns that usable by the index.
 | |
| 
 | |
| </p><p>
 | |
|   For the index above and WHERE clause like this:
 | |
| 
 | |
| </p><div class="codeblock"><pre>... WHERE b IN (1,2,3) AND c NOT NULL AND d='hello'
 | |
| </pre></div>
 | |
| <p>
 | |
|   The index is not usable at all because the left-most column of the
 | |
|   index (column "a") is not constrained.  Assuming there are no other
 | |
|   indices, the query above would result in a full table scan.
 | |
| 
 | |
| </p><p>
 | |
|   For the index above and WHERE clause like this:
 | |
| 
 | |
| </p><div class="codeblock"><pre>... WHERE a=5 OR b IN (1,2,3) OR c NOT NULL OR d='hello'
 | |
| </pre></div>
 | |
| <p>
 | |
|   The index is not usable because the WHERE clause terms are connected
 | |
|   by OR instead of AND. This query would result in a full table scan.
 | |
|   However, if three additional indices where added that contained columns
 | |
|   b, c, and d as their left-most columns, then the
 | |
|   <a href="#or_opt">OR-clause optimization</a> might apply.
 | |
| 
 | |
| 
 | |
| <a name="between_opt"></a>
 | |
| 
 | |
| </p><h1 id="the_between_optimization"><span>3. </span>The BETWEEN optimization</h1>
 | |
| 
 | |
| <p>
 | |
|   If a term of the WHERE clause is of the following form:
 | |
| 
 | |
| <blockquote><pre><b>
 | |
|   </b><i>expr1</i><b> BETWEEN </b><i>expr2</i><b> AND </b><i>expr3</i><b>
 | |
| </b></pre></blockquote>
 | |
| </p><p>
 | |
|   Then two "virtual" terms are added as follows:
 | |
| 
 | |
| <blockquote><pre><b>
 | |
|   </b><i>expr1</i><b> >= </b><i>expr2</i><b> AND </b><i>expr1</i><b> <= </b><i>expr3</i><b>
 | |
| </b></pre></blockquote>
 | |
| </p><p>
 | |
|   Virtual terms are used for analysis only and do not cause any byte-code
 | |
|   to be generated.
 | |
|   If both virtual terms end up being used as constraints on an index,
 | |
|   then the original BETWEEN term is omitted and the corresponding test
 | |
|   is not performed on input rows.
 | |
|   Thus if the BETWEEN term ends up being used as an index constraint
 | |
|   no tests are ever performed on that term.
 | |
|   On the other hand, the
 | |
|   virtual terms themselves never causes tests to be performed on
 | |
|   input rows.
 | |
|   Thus if the BETWEEN term is not used as an index constraint and
 | |
|   instead must be used to test input rows, the <i>expr1</i> expression is
 | |
|   only evaluated once.
 | |
| 
 | |
| <a name="or_opt"></a>
 | |
| 
 | |
| </p><h1 id="or_optimizations"><span>4. </span>OR optimizations</h1>
 | |
| 
 | |
| <p>
 | |
|   WHERE clause constraints that are connected by OR instead of AND can
 | |
|   be handled in two different ways.
 | |
|   If a term consists of multiple subterms containing a common column
 | |
|   name and separated by OR, like this:
 | |
| 
 | |
| <blockquote><pre><b>
 | |
|   </b><i>column</i><b> = </b><i>expr1</i><b> OR </b><i>column</i><b> = </b><i>expr2</i><b> OR </b><i>column</i><b> = </b><i>expr3</i><b> OR ...
 | |
| </b></pre></blockquote>
 | |
| </p><p>
 | |
|   Then that term is rewritten as follows:
 | |
| 
 | |
| <blockquote><pre><b>
 | |
|   </b><i>column</i><b> IN (</b><i>expr1</i><b>,</b><i>expr2</i><b>,</b><i>expr3</i><b>,...)
 | |
| </b></pre></blockquote>
 | |
| </p><p>
 | |
|   The rewritten term then might go on to constrain an index using the
 | |
|   normal rules for *IN* operators.  Note that <i>column</i> must be
 | |
|   the same column in every OR-connected subterm,
 | |
|   although the column can occur on either the left or the right side of
 | |
|   the *=* operator.
 | |
| 
 | |
| </p><p>
 | |
|   If and only if the previously described conversion of OR to an IN operator
 | |
|   does not work, the second OR-clause optimization is attempted.
 | |
|   Suppose the OR clause consists of multiple subterms as follows:
 | |
| 
 | |
| <blockquote><pre><b>
 | |
|   </b><i>expr1</i><b> OR </b><i>expr2</i><b> OR </b><i>expr3</i><b>
 | |
| </b></pre></blockquote>
 | |
| </p><p>
 | |
|   Individual subterms might be a single comparison expression like
 | |
|   *a=5* or *x>y* or they can be LIKE or BETWEEN expressions, or a subterm
 | |
|   can be a parenthesized list of AND-connected sub-subterms.
 | |
|   Each subterm is analyzed as if it were itself the entire WHERE clause
 | |
|   in order to see if the subterm is indexable by itself.
 | |
|   If <u>every</u> subterm of an OR clause is separately indexable
 | |
|   then the OR clause might be coded such that a separate index is used
 | |
|   to evaluate each term of the OR clause.  One way to think about how
 | |
|   SQLite uses separate indices for each OR clause term is to imagine
 | |
|   that the WHERE clause where rewritten as follows:
 | |
| 
 | |
| <blockquote><pre><b>
 | |
|   rowid IN (SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr1</i><b>
 | |
|             UNION SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr2</i><b>
 | |
|             UNION SELECT rowid FROM </b><i>table</i><b> WHERE </b><i>expr3</i><b>)
 | |
| </b></pre></blockquote>
 | |
| </p><p>
 | |
|   The rewritten expression above is conceptual; WHERE clauses containing
 | |
|   OR are not really rewritten this way.
 | |
|   The actual implementation of the OR clause uses a mechanism that is
 | |
|   more efficient and that works even for <a href="withoutrowid.html">WITHOUT ROWID</a> tables or 
 | |
|   tables in which the "rowid" is inaccessible.
 | |
|   But the essence of the implementation is captured by the statement
 | |
|   above:  Separate indices are used to find candidate result rows
 | |
|   from each OR clause term and the final result is the union of
 | |
|   those rows.
 | |
| 
 | |
| </p><p>
 | |
|   Note that in most cases, SQLite will only use a single index for each
 | |
|   table in the FROM clause of a query.  The second OR-clause optimization
 | |
|   described here is the exception to that rule.  With an OR-clause,
 | |
|   a different index might be used for each subterm in the OR-clause.
 | |
| 
 | |
| </p><p>
 | |
|   For any given query, the fact that the OR-clause optimization described
 | |
|   here can be used does not guarantee that it will be used.
 | |
|   SQLite uses a cost-based query planner that estimates the CPU and
 | |
|   disk I/O costs of various competing query plans and chooses the plan
 | |
|   that it thinks will be the fastest.  If there are many OR terms in
 | |
|   the WHERE clause or if some of the indices on individual OR-clause 
 | |
|   subterms are not very selective, then SQLite might decide that it is
 | |
|   faster to use a different query algorithm, or even a full-table scan.
 | |
|   Application developers can use the
 | |
|   <a href="lang_explain.html">EXPLAIN QUERY PLAN</a> prefix on a statement to get a
 | |
|   high-level overview of the chosen query strategy.
 | |
| 
 | |
| <a name="like_opt"></a>
 | |
| 
 | |
| </p><h1 id="the_like_optimization"><span>5. </span>The LIKE optimization</h1>
 | |
| 
 | |
| <p>
 | |
|   A WHERE-clause term that uses the <a href="lang_expr.html#like">LIKE</a> or <a href="lang_expr.html#glob">GLOB</a> operator
 | |
|   can sometimes be used with an index to do a range search, 
 | |
|   almost as if the LIKE or GLOB were an alternative to a <a href="lang_expr.html#between">BETWEEN</a>
 | |
|   operator.
 | |
|   There are many conditions on this optimization:
 | |
| 
 | |
| </p><p>
 | |
|   </p><ol>
 | |
|   <li>The right-hand side of the LIKE or GLOB must be either a string literal
 | |
|       or a <a href="lang_expr.html#varparam">parameter</a> bound to a string literal
 | |
|       that does not begin with a wildcard character.</li>
 | |
|   <li>It must not be possible to make the LIKE or GLOB operator true by
 | |
|       having a numeric value (instead of a string or blob) on the
 | |
|       left-hand side. This means that either:
 | |
|       <ol type="A">
 | |
|       <li> the left-hand side of the LIKE or GLOB operator is the name
 | |
|            of an indexed column with <a href="datatype3.html#affinity">TEXT affinity</a>, or</li>
 | |
|       <li> the right-hand side pattern argument does not begin with a
 | |
|            minus sign ("-") or a digit.</li>
 | |
|       </ol>
 | |
|       This constraint arises from the fact that numbers do not sort in
 | |
|       lexicographical order.  For example: 9<10 but '9'>'10'.</li>
 | |
|   <li>The built-in functions used to implement LIKE and GLOB must not
 | |
|       have been overloaded using the <a href="c3ref/create_function.html">sqlite3_create_function()</a> API.</li>
 | |
|   <li>For the GLOB operator, the column must be indexed using the 
 | |
|       built-in BINARY collating sequence.</li>
 | |
|   <li>For the LIKE operator, if <a href="pragma.html#pragma_case_sensitive_like">case_sensitive_like</a> mode is enabled then
 | |
|       the column must indexed using BINARY collating sequence, or if
 | |
|       <a href="pragma.html#pragma_case_sensitive_like">case_sensitive_like</a> mode is disabled then the column must indexed
 | |
|       using built-in NOCASE collating sequence.</li>
 | |
|   <li>If the ESCAPE option is used, the ESCAPE character must be ASCII,
 | |
|       or a single-byte character in UTF-8.
 | |
|   </li></ol>
 | |
| 
 | |
| <p>
 | |
|   The LIKE operator has two modes that can be set by a
 | |
|   <a href="pragma.html#pragma_case_sensitive_like">pragma</a>.  The
 | |
|   default mode is for LIKE comparisons to be insensitive to differences
 | |
|   of case for latin1 characters.  Thus, by default, the following
 | |
|   expression is true:
 | |
| 
 | |
| </p><div class="codeblock"><pre>'a' LIKE 'A'
 | |
| </pre></div>
 | |
| <p>
 | |
|   But if the case_sensitive_like pragma is enabled as follows:
 | |
| 
 | |
| </p><div class="codeblock"><pre>PRAGMA case_sensitive_like=ON;
 | |
| </pre></div>
 | |
| <p>
 | |
|   Then the LIKE operator pays attention to case and the example above would
 | |
|   evaluate to false.  Note that case insensitivity only applies to
 | |
|   latin1 characters - basically the upper and lower case letters of English
 | |
|   in the lower 127 byte codes of ASCII.  International character sets
 | |
|   are case sensitive in SQLite unless an application-defined
 | |
|   <a href="datatype3.html#collation">collating sequence</a> and <a href="lang_corefunc.html#like">like() SQL function</a> are provided that
 | |
|   take non-ASCII characters into account.
 | |
|   But if an application-defined collating sequence and/or like() SQL
 | |
|   function are provided, the LIKE optimization described here will never
 | |
|   be taken.
 | |
| 
 | |
| </p><p>
 | |
|   The LIKE operator is case insensitive by default because this is what
 | |
|   the SQL standard requires.  You can change the default behavior at
 | |
|   compile time by using the <a href="compile.html#case_sensitive_like">SQLITE_CASE_SENSITIVE_LIKE</a> command-line option
 | |
|   to the compiler.
 | |
| 
 | |
| </p><p>
 | |
|   The LIKE optimization might occur if the column named on the left of the
 | |
|   operator is indexed using the built-in BINARY collating sequence and
 | |
|   case_sensitive_like is turned on.  Or the optimization might occur if
 | |
|   the column is indexed using the built-in NOCASE collating sequence and the 
 | |
|   case_sensitive_like mode is off.  These are the only two combinations
 | |
|   under which LIKE operators will be optimized.
 | |
| 
 | |
| </p><p>
 | |
|   The GLOB operator is always case sensitive.  The column on the left side
 | |
|   of the GLOB operator must always use the built-in BINARY collating sequence
 | |
|   or no attempt will be made to optimize that operator with indices.
 | |
| 
 | |
| </p><p>
 | |
|   The LIKE optimization will only be attempted if
 | |
|   the right-hand side of the GLOB or LIKE operator is either
 | |
|   literal string or a <a href="lang_expr.html#varparam">parameter</a> that has been <a href="c3ref/bind_blob.html">bound</a>
 | |
|   to a string literal.  The string literal must not
 | |
|   begin with a wildcard; if the right-hand side begins with a wildcard
 | |
|   character then this optimization is not attempted.  If the right-hand side 
 | |
|   is a <a href="lang_expr.html#varparam">parameter</a> that is bound to a string, then this optimization is
 | |
|   only attempted if the <a href="c3ref/stmt.html">prepared statement</a> containing the expression
 | |
|   was compiled with <a href="c3ref/prepare.html">sqlite3_prepare_v2()</a> or <a href="c3ref/prepare.html">sqlite3_prepare16_v2()</a>.
 | |
|   The LIKE optimization is not attempted if the
 | |
|   right-hand side is a <a href="lang_expr.html#varparam">parameter</a> and the statement was prepared using
 | |
|   <a href="c3ref/prepare.html">sqlite3_prepare()</a> or <a href="c3ref/prepare.html">sqlite3_prepare16()</a>.
 | |
| 
 | |
| </p><p>
 | |
|   Suppose the initial sequence of non-wildcard characters on the right-hand
 | |
|   side of the LIKE or GLOB operator is <i>x</i>.  We are using a single 
 | |
|   character to denote this non-wildcard prefix but the reader should
 | |
|   understand that the prefix can consist of more than 1 character.
 | |
|   Let <i>y</i> be the smallest string that is the same length as /x/ but which
 | |
|   compares greater than <i>x</i>.  For example, if <i>x</i> is *hello* then
 | |
|   <i>y</i> would be *hellp*.
 | |
|   The LIKE and GLOB optimizations consist of adding two virtual terms
 | |
|   like this:
 | |
| 
 | |
| <blockquote><pre><b>
 | |
|   </b><i>column</i><b> >= </b><i>x</i><b> AND </b><i>column</i><b> < </b><i>y</i><b>
 | |
| </b></pre></blockquote>
 | |
| </p><p>
 | |
|   Under most circumstances, the original LIKE or GLOB operator is still
 | |
|   tested against each input row even if the virtual terms are used to
 | |
|   constrain an index.  This is because we do not know what additional
 | |
|   constraints may be imposed by characters to the right
 | |
|   of the <i>x</i> prefix.  However, if there is only a single
 | |
|   global wildcard to the right of <i>x</i>, then the original LIKE or 
 | |
|   GLOB test is disabled.
 | |
|   In other words, if the pattern is like this:
 | |
| 
 | |
| <blockquote><pre><b>
 | |
|   </b><i>column</i><b> LIKE </b><i>x</i><b>%
 | |
|   </b><i>column</i><b> GLOB </b><i>x</i><b>*
 | |
| </b></pre></blockquote>
 | |
| </p><p>
 | |
|   then the original LIKE or GLOB tests are disabled when the virtual
 | |
|   terms constrain an index because in that case we know that all of the
 | |
|   rows selected by the index will pass the LIKE or GLOB test.
 | |
| 
 | |
| </p><p>
 | |
|   Note that when the right-hand side of a LIKE or GLOB operator is
 | |
|   a <a href="lang_expr.html#varparam">parameter</a> and the statement is prepared using <a href="c3ref/prepare.html">sqlite3_prepare_v2()</a>
 | |
|   or <a href="c3ref/prepare.html">sqlite3_prepare16_v2()</a> then the statement is automatically reparsed
 | |
|   and recompiled on the first <a href="c3ref/step.html">sqlite3_step()</a> call of each run if the binding
 | |
|   to the right-hand side parameter has changed since the previous run.
 | |
|   This reparse and recompile is essentially the same action that occurs
 | |
|   following a schema change.  The recompile is necessary so that the query
 | |
|   planner can examine the new value bound to the right-hand side of the
 | |
|   LIKE or GLOB operator and determine whether or not to employ the
 | |
|   optimization described above.
 | |
| 
 | |
| <a name="skipscan"></a>
 | |
| 
 | |
| </p><h1 id="the_skip_scan_optimization"><span>6. </span>The Skip-Scan Optimization</h1>
 | |
| 
 | |
| <p>
 | |
|   The general rule is that indexes are only useful if there are 
 | |
|   WHERE-clause constraints on the left-most columns of the index.
 | |
|   However, in some cases,
 | |
|   SQLite is able to use an index even if the first few columns of
 | |
|   the index are omitted from the WHERE clause but later columns 
 | |
|   are included.
 | |
| 
 | |
| 
 | |
| </p><p>
 | |
|   Consider a table such as the following:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE TABLE people(
 | |
|   name TEXT PRIMARY KEY,
 | |
|   role TEXT NOT NULL,
 | |
|   height INT NOT NULL, -- in cm
 | |
|   CHECK( role IN ('student','teacher') )
 | |
| );
 | |
| CREATE INDEX people_idx1 ON people(role, height);
 | |
| </pre></div>
 | |
| 
 | |
| <p>
 | |
|   The people table has one entry for each person in a large
 | |
|   organization.  Each person is either a "student" or a "teacher",
 | |
|   as determined by the "role" field.  And we record the height in
 | |
|   centimeters of each person.  The role and height are indexed.
 | |
|   Notice that the left-most column of the index is not very
 | |
|   selective - it only contains two possible values.
 | |
| 
 | |
| 
 | |
| </p><p>
 | |
|   Now consider a query to find the names of everyone in the
 | |
|   organization that is 180cm tall or taller:
 | |
| 
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT name FROM people WHERE height>=180;
 | |
| </pre></div>
 | |
| 
 | |
| <p>
 | |
|   Because the left-most column of the index does not appear in the
 | |
|   WHERE clause of the query, one is tempted to conclude that the
 | |
|   index is not usable here.  But SQLite is able to use the index.
 | |
|   Conceptually, SQLite uses the index as if the query were more
 | |
|   like the following:
 | |
| 
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT name FROM people
 | |
|  WHERE role IN (SELECT DISTINCT role FROM people)
 | |
|    AND height>=180;
 | |
| </pre></div>
 | |
| 
 | |
| <p>
 | |
|   Or this:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT name FROM people WHERE role='teacher' AND height>=180
 | |
| UNION ALL
 | |
| SELECT name FROM people WHERE role='student' AND height>=180;
 | |
| </pre></div>
 | |
| 
 | |
| <p>
 | |
|   The alternative query formulations shown above are conceptual only.
 | |
|   SQLite does not really transform the query. 
 | |
|   The actual query plan is like this:
 | |
|   SQLite locates the first possible value for "role", which it
 | |
|   can do by rewinding the "people_idx1" index to the beginning and reading
 | |
|   the first record.  SQLite stores this first "role" value in an
 | |
|   internal variable that we will here call "$role".  Then SQLite
 | |
|   runs a query like: "SELECT name FROM people WHERE role=$role AND height>=180".
 | |
|   This query has an equality constraint on the left-most column of the
 | |
|   index and so the index can be used to resolve that query.  Once
 | |
|   that query is finished, SQLite then uses the "people_idx1" index to
 | |
|   locate the next value of the "role" column, using code that is logically
 | |
|   similar to "SELECT role FROM people WHERE role>$role LIMIT 1".
 | |
|   This new "role" value overwrites the $role variable, and the process
 | |
|   repeats until all possible values for "role" have been examined.
 | |
| 
 | |
| </p><p>
 | |
|   We call this kind of index usage a "skip-scan" because the database
 | |
|   engine is basically doing a full scan of the index but it optimizes the
 | |
|   scan (making it less than "full") by occasionally skipping ahead to the
 | |
|   next candidate value.
 | |
| 
 | |
| </p><p>
 | |
|   SQLite might use a skip-scan on an index if it knows that the first
 | |
|   one or more columns contain many duplication values.
 | |
|   If there are too few duplicates
 | |
|   in the left-most columns of the index, then it would
 | |
|   be faster to simply step ahead to the next value, and thus do
 | |
|   a full table scan, than to do a binary search on an index to locate
 | |
|   the next left-column value.
 | |
| 
 | |
| </p><p>
 | |
|   The only way that SQLite can know that the left-most columns of an index
 | |
|   have many duplicate is if the <a href="lang_analyze.html">ANALYZE</a> command has been run
 | |
|   on the database.
 | |
|   Without the results of ANALYZE, SQLite has to guess at the "shape" of
 | |
|   the data in the table, and the default guess is that there are an average
 | |
|   of 10 duplicates for every value in the left-most column of the index.
 | |
|   But skip-scan only becomes profitable (it only gets to be faster than
 | |
|   a full table scan) when the number of duplicates is about 18 or more.
 | |
|   Hence, a skip-scan is never used on a database that has not been analyzed.
 | |
| 
 | |
| <a name="joins"></a>
 | |
| 
 | |
| </p><h1 id="joins"><span>7. </span>Joins</h1>
 | |
| 
 | |
| <p>
 | |
|   The ON and USING clauses of an inner join are converted into additional
 | |
|   terms of the WHERE clause prior to WHERE clause analysis described
 | |
|   above in paragraph 1.0.  Thus with SQLite, there is no computational
 | |
|   advantage to use the newer SQL92 join syntax
 | |
|   over the older SQL89 comma-join syntax.  They both end up accomplishing
 | |
|   exactly the same thing on inner joins.
 | |
| 
 | |
| </p><p>
 | |
|   For a LEFT OUTER JOIN the situation is more complex.  The following
 | |
|   two queries are not equivalent:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT * FROM tab1 LEFT JOIN tab2 ON tab1.x=tab2.y;
 | |
| SELECT * FROM tab1 LEFT JOIN tab2 WHERE tab1.x=tab2.y;
 | |
| </pre></div>
 | |
| <p>
 | |
|   For an inner join, the two queries above would be identical.  But
 | |
|   special processing applies to the ON and USING clauses of an OUTER join:
 | |
|   specifically, the constraints in an ON or USING clause do not apply if
 | |
|   the right table of the join is on a null row, but the constraints do apply
 | |
|   in the WHERE clause.  The net effect is that putting the ON or USING
 | |
|   clause expressions for a LEFT JOIN in the WHERE clause effectively converts
 | |
|   the query to an
 | |
|   ordinary INNER JOIN - albeit an inner join that runs more slowly.
 | |
| 
 | |
| <a name="table_order"></a>
 | |
| 
 | |
| </p><h2 id="order_of_tables_in_a_join"><span>7.1. </span>Order of tables in a join</h2>
 | |
| 
 | |
| <p>
 | |
|   The current implementation of 
 | |
|   SQLite uses only loop joins.  That is to say, joins are implemented as
 | |
|   nested loops.
 | |
| 
 | |
| </p><p>
 | |
|   The default order of the nested loops in a join is for the left-most
 | |
|   table in the FROM clause to form the outer loop and the right-most
 | |
|   table to form the inner loop.
 | |
|   However, SQLite will nest the loops in a different order if doing so
 | |
|   will help it to select better indices.
 | |
| 
 | |
| </p><p>
 | |
|   Inner joins can be freely reordered.  However a left outer join is
 | |
|   neither commutative nor associative and hence will not be reordered.
 | |
|   Inner joins to the left and right of the outer join might be reordered
 | |
|   if the optimizer thinks that is advantageous but the outer joins are
 | |
|   always evaluated in the order in which they occur.
 | |
| 
 | |
| </p><p>
 | |
|   SQLite <a href="lang_select.html#crossjoin">treats the CROSS JOIN operator specially</a>.
 | |
|   The CROSS JOIN operator is commutative in theory.  But SQLite chooses to
 | |
|   never reorder tables in a CROSS JOIN.  This provides a mechanism
 | |
|   by which the programmer can force SQLite to choose a particular loop nesting
 | |
|   order.  
 | |
| 
 | |
| </p><p>
 | |
|   When selecting the order of tables in a join, SQLite uses an efficient
 | |
|   polynomial-time algorithm.  Because of this,
 | |
|   SQLite is able to plan queries with 50- or 60-way joins in a matter of
 | |
|   microseconds
 | |
| 
 | |
| </p><p>
 | |
|   Join reordering is automatic and usually works well enough that
 | |
|   programmers do not have to think about it, especially if <a href="lang_analyze.html">ANALYZE</a>
 | |
|   has been used to gather statistics about the available indices.
 | |
|   But occasionally some hints from the programmer are needed.
 | |
|   Consider, for example, the following schema:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE TABLE node(
 | |
|    id INTEGER PRIMARY KEY,
 | |
|    name TEXT
 | |
| );
 | |
| CREATE INDEX node_idx ON node(name);
 | |
| CREATE TABLE edge(
 | |
|    orig INTEGER REFERENCES node,
 | |
|    dest INTEGER REFERENCES node,
 | |
|    PRIMARY KEY(orig, dest)
 | |
| );
 | |
| CREATE INDEX edge_idx ON edge(dest,orig);
 | |
| </pre></div>
 | |
| <p>
 | |
|   The schema above defines a directed graph with the ability to store a
 | |
|   name at each node. Now consider a query against this schema:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT *
 | |
|   FROM edge AS e,
 | |
|        node AS n1,
 | |
|        node AS n2
 | |
|  WHERE n1.name = 'alice'
 | |
|    AND n2.name = 'bob'
 | |
|    AND e.orig = n1.id
 | |
|    AND e.dest = n2.id;
 | |
| </pre></div>
 | |
| <p>
 | |
|   This query asks for is all information about edges that go from
 | |
|   nodes labeled "alice" to nodes labeled "bob".
 | |
|   The query optimizer in SQLite has basically two choices on how to
 | |
|   implement this query.  (There are actually six different choices, but
 | |
|   we will only consider two of them here.)
 | |
|   Pseudocode below demonstrating these two choices.
 | |
| 
 | |
| <a name="option1"></a>
 | |
| 
 | |
| </p><p>Option 1:
 | |
| </p><div class="codeblock"><pre>foreach n1 where n1.name='alice' do:
 | |
|   foreach n2 where n2.name='bob' do:
 | |
|     foreach e where e.orig=n1.id and e.dest=n2.id
 | |
|       return n1.*, n2.*, e.*
 | |
|     end
 | |
|   end
 | |
| end
 | |
| </pre></div>
 | |
| <a name="option2"></a>
 | |
| 
 | |
| <p>Option 2:
 | |
| </p><div class="codeblock"><pre>foreach n1 where n1.name='alice' do:
 | |
|   foreach e where e.orig=n1.id do:
 | |
|     foreach n2 where n2.id=e.dest and n2.name='bob' do:
 | |
|       return n1.*, n2.*, e.*
 | |
|     end
 | |
|   end
 | |
| end
 | |
| </pre></div>
 | |
| <p>
 | |
|   The same indices are used to speed up every loop in both implementation
 | |
|   options.
 | |
|   The only difference in these two query plans is the order in which
 | |
|   the loops are nested.
 | |
| 
 | |
| </p><p>
 | |
|   So which query plan is better? It turns out that the answer depends on
 | |
|   what kind of data is found in the node and edge tables.
 | |
| 
 | |
| </p><p>
 | |
|   Let the number of alice nodes be M and the number of bob nodes be N.
 | |
|   Consider two scenarios. In the first scenario, M and N are both 2 but
 | |
|   there are thousands of edges on each node. In this case, option 1 is
 | |
|   preferred. With option 1, the inner loop checks for the existence of
 | |
|   an edge between a pair of nodes and outputs the result if found. 
 | |
|   But because there are only 2 alice and bob nodes each, the inner loop
 | |
|   only has to run 4 times and the query is very quick. Option 2 would
 | |
|   take much longer here. The outer loop of option 2 only executes twice,
 | |
|   but because there are a large number of edges leaving each alice node,
 | |
|   the middle loop has to iterate many thousands of times. It will be
 | |
|   much slower. So in the first scenario, we prefer to use option 1.
 | |
| 
 | |
| </p><p>
 | |
|   Now consider the case where M and N are both 3500. Alice nodes are
 | |
|   abundant. But suppose each of these nodes is connected by only one
 | |
|   or two edges. In this case, option 2 is preferred. With option 2,
 | |
|   the outer loop still has to run 3500 times, but the middle loop only
 | |
|   runs once or twice for each outer loop and the inner loop will only
 | |
|   run once for each middle loop, if at all. So the total number of
 | |
|   iterations of the inner loop is around 7000. Option 1, on the other
 | |
|   hand, has to run both its outer loop and its middle loop 3500 times
 | |
|   each, resulting in 12 million iterations of the middle loop.
 | |
|   Thus in the second scenario, option 2 is nearly 2000 times faster
 | |
|   than option 1.
 | |
| 
 | |
| </p><p>
 | |
|   So you can see that depending on how the data is structured in the table,
 | |
|   either query plan 1 or query plan 2 might be better.  Which plan does
 | |
|   SQLite choose by default?  As of version 3.6.18, without running <a href="lang_analyze.html">ANALYZE</a>,
 | |
|   SQLite will choose option 2.
 | |
|   But if the <a href="lang_analyze.html">ANALYZE</a> command is run in order to gather statistics,
 | |
|   a different choice might be made if the statistics indicate that the
 | |
|   alternative is likely to run faster.
 | |
| 
 | |
| <a name="manctrl"></a>
 | |
| 
 | |
| </p><h2 id="manual_control_of_query_plans_using_sqlite_stat_tables"><span>7.2. </span>Manual Control Of Query Plans Using SQLITE_STAT Tables</h2>
 | |
| 
 | |
| <p>
 | |
|   SQLite provides the ability for advanced programmers to exercise control
 | |
|   over the query plan chosen by the optimizer. One method for doing this
 | |
|   is to fudge the <a href="lang_analyze.html">ANALYZE</a> results in the <a href="fileformat2.html#stat1tab">sqlite_stat1</a>, 
 | |
|   <a href="fileformat2.html#stat3tab">sqlite_stat3</a>, and/or <a href="fileformat2.html#stat4tab">sqlite_stat4</a> tables.  That approach is not 
 | |
|   recommended except for the one scenario described in the next paragraph.
 | |
| 
 | |
| </p><p>
 | |
|   For a program that uses an SQLite database as its 
 | |
|   <a href="appfileformat.html">application file-format</a>,
 | |
|   when a new database instance is first created the <a href="lang_analyze.html">ANALYZE</a>
 | |
|   command is ineffective because the database contain no data from which
 | |
|   to gather statistics.  In that case, one could construct a large prototype
 | |
|   database containing typical data during development and run the 
 | |
|   <a href="lang_analyze.html">ANALYZE</a> command on this prototype database to gather statistics,
 | |
|   then save the prototype statistics as part of the application.
 | |
|   After deployment, when the application goes to create a new database file,
 | |
|   it can run the <a href="lang_analyze.html">ANALYZE</a> command in order to create the statistics
 | |
|   tables, then copy the precomputed statistics obtained
 | |
|   from the prototype database into these new statistics tables.
 | |
|   In that way, statistics from large working data sets can be preloaded
 | |
|   into newly created application files.
 | |
| 
 | |
| <a name="crossjoin"></a>
 | |
| 
 | |
| </p><h2 id="manual_control_of_query_plans_using_cross_join"><span>7.3. </span>Manual Control Of Query Plans Using CROSS JOIN</h2>
 | |
| 
 | |
| <p>
 | |
|   Programmers can force SQLite to use a particular loop nesting order
 | |
|   for a join by using the CROSS JOIN operator instead of just JOIN, 
 | |
|   INNER JOIN, NATURAL JOIN, or a "," join.  Though CROSS JOINs are
 | |
|   commutative in theory, SQLite chooses to never reorder the tables in
 | |
|   a CROSS JOIN.  Hence, the left table of a CROSS JOIN will always be
 | |
|   in an outer loop relative to the right table.
 | |
| 
 | |
| </p><p>
 | |
|   In the following query, the optimizer is free to reorder the 
 | |
|   tables of FROM clause anyway it sees fit:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT *
 | |
|   FROM node AS n1,
 | |
|        edge AS e,
 | |
|        node AS n2
 | |
|  WHERE n1.name = 'alice'
 | |
|    AND n2.name = 'bob'
 | |
|    AND e.orig = n1.id
 | |
|    AND e.dest = n2.id;
 | |
| </pre></div>
 | |
| <p>
 | |
|   But in the following logically equivalent formulation of the same query,
 | |
|   the substitution of "CROSS JOIN" for the "," means that the order
 | |
|   of tables must be N1, E, N2.
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT *
 | |
|   FROM node AS n1 CROSS JOIN
 | |
|        edge AS e CROSS JOIN
 | |
|        node AS n2
 | |
|  WHERE n1.name = 'alice'
 | |
|    AND n2.name = 'bob'
 | |
|    AND e.orig = n1.id
 | |
|    AND e.dest = n2.id;
 | |
| </pre></div>
 | |
| <p>
 | |
|   In the latter query, the query plan must be 
 | |
|   <a href="#option2">option 2</a>.  Note that
 | |
|   you must use the keyword "CROSS" in order to disable the table reordering
 | |
|   optimization; INNER JOIN, NATURAL JOIN, JOIN, and other similar
 | |
|   combinations work just like a comma join in that the optimizer is
 | |
|   free to reorder tables as it sees fit. (Table reordering is also
 | |
|   disabled on an outer join, but that is because outer joins are not
 | |
|   associative or commutative. Reordering tables in OUTER JOIN changes
 | |
|   the result.)
 | |
| 
 | |
| </p><p>
 | |
|   See "<a href="queryplanner-ng.html#fossilcasestudy">The Fossil NGQP Upgrade Case Study</a>" for another real-world example
 | |
|   of using CROSS JOIN to manually control the nesting order of a join.
 | |
|   The <a href="queryplanner-ng.html#howtofix">query planner checklist</a> found later in the same document provides
 | |
|   further guidance on manual control of the query planner.
 | |
| 
 | |
| <a name="multi_index"></a>
 | |
| 
 | |
| </p><h1 id="choosing_between_multiple_indices"><span>8. </span>Choosing between multiple indices</h1>
 | |
| 
 | |
| <p>
 | |
|   Each table in the FROM clause of a query can use at most one index
 | |
|   (except when the <a href="#or_opt">OR-clause optimization</a> comes into
 | |
|   play)
 | |
|   and SQLite strives to use at least one index on each table.  Sometimes,
 | |
|   two or more indices might be candidates for use on a single table.
 | |
|   For example:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE TABLE ex2(x,y,z);
 | |
| CREATE INDEX ex2i1 ON ex2(x);
 | |
| CREATE INDEX ex2i2 ON ex2(y);
 | |
| SELECT z FROM ex2 WHERE x=5 AND y=6;
 | |
| </pre></div>
 | |
| <p>
 | |
|   For the SELECT statement above, the optimizer can use the ex2i1 index
 | |
|   to lookup rows of ex2 that contain x=5 and then test each row against
 | |
|   the y=6 term.  Or it can use the ex2i2 index to lookup rows
 | |
|   of ex2 that contain y=6 then test each of those rows against the
 | |
|   x=5 term.
 | |
| 
 | |
| </p><p>
 | |
|   When faced with a choice of two or more indices, SQLite tries to estimate
 | |
|   the total amount of work needed to perform the query using each option.
 | |
|   It then selects the option that gives the least estimated work.
 | |
| 
 | |
| </p><p>
 | |
|   To help the optimizer get a more accurate estimate of the work involved
 | |
|   in using various indices, the user may optionally run the <a href="lang_analyze.html">ANALYZE</a> command.
 | |
|   The <a href="lang_analyze.html">ANALYZE</a> command scans all indices of database where there might
 | |
|   be a choice between two or more indices and gathers statistics on the
 | |
|   selectiveness of those indices.  The statistics gathered by
 | |
|   this scan are stored in special database tables names shows names all
 | |
|   begin with "<b>sqlite_stat</b>".
 | |
|   The content of these tables is not updated as the database
 | |
|   changes so after making significant changes it might be prudent to
 | |
|   rerun <a href="lang_analyze.html">ANALYZE</a>.
 | |
|   The results of an ANALYZE command are only available to database connections
 | |
|   that are opened after the ANALYZE command completes.
 | |
| 
 | |
| </p><p>
 | |
|   The various <b>sqlite_stat</b><i>N</i> tables contain information on how
 | |
|   selective the various indices are.  For example, the <a href="fileformat2.html#stat1tab">sqlite_stat1</a>
 | |
|   table might indicate that an equality constraint on column x reduces the
 | |
|   search space to 10 rows on average, whereas an equality constraint on
 | |
|   column y reduces the search space to 3 rows on average.  In that case,
 | |
|   SQLite would prefer to use index ex2i2 since that index is more selective.
 | |
| 
 | |
| <a name="uplus"></a>
 | |
| 
 | |
| </p><h2 id="disqualifying_where_clause_terms_using_unary_"><span>8.1. </span>Disqualifying WHERE Clause Terms Using Unary-"+"</h2>
 | |
| 
 | |
| <p>
 | |
|   Terms of the WHERE clause can be manually disqualified for use with
 | |
|   indices by prepending a unary *+* operator to the column name.  The
 | |
|   unary *+* is a no-op and will not generate any byte code in the prepared
 | |
|   statement.
 | |
|   But the unary *+* operator will prevent the term from constraining an index.
 | |
|   So, in the example above, if the query were rewritten as:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT z FROM ex2 WHERE +x=5 AND y=6;
 | |
| </pre></div>
 | |
| <p>
 | |
|   The *+* operator on the *x* column will prevent that term from 
 | |
|   constraining an index.  This would force the use of the ex2i2 index.
 | |
| 
 | |
| </p><p>
 | |
|   Note that the unary *+* operator also removes 
 | |
|   <a href="datatype3.html#affinity">type affinity</a> from
 | |
|   an expression, and in some cases this can cause subtle changes in
 | |
|   the meaning of an expression.
 | |
|   In the example above,
 | |
|   if column *x* has <a href="datatype3.html#affinity">TEXT affinity</a>
 | |
|   then the comparison "x=5" will be done as text.  But the *+* operator
 | |
|   removes the affinity.  So the comparison "+x=5" will compare the text
 | |
|   in column *x* with the numeric value 5 and will always be false.
 | |
| 
 | |
| <a name="rangequery"></a>
 | |
| 
 | |
| </p><h2 id="range_queries"><span>8.2. </span>Range Queries</h2>
 | |
| 
 | |
| <p>
 | |
|   Consider a slightly different scenario:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE TABLE ex2(x,y,z);
 | |
| CREATE INDEX ex2i1 ON ex2(x);
 | |
| CREATE INDEX ex2i2 ON ex2(y);
 | |
| SELECT z FROM ex2 WHERE x BETWEEN 1 AND 100 AND y BETWEEN 1 AND 100;
 | |
| </pre></div>
 | |
| <p>
 | |
|   Further suppose that column x contains values spread out
 | |
|   between 0 and 1,000,000 and column y contains values
 | |
|   that span between 0 and 1,000.  In that scenario,
 | |
|   the range constraint on column x should reduce the search space by
 | |
|   a factor of 10,000 whereas the range constraint on column y should
 | |
|   reduce the search space by a factor of only 10.  So the ex2i1 index
 | |
|   should be preferred.
 | |
| 
 | |
| </p><p>
 | |
|   SQLite will make this determination, but only if it has been compiled
 | |
|   with <a href="compile.html#enable_stat3">SQLITE_ENABLE_STAT3</a> or <a href="compile.html#enable_stat4">SQLITE_ENABLE_STAT4</a>.
 | |
|   The <a href="compile.html#enable_stat3">SQLITE_ENABLE_STAT3</a> and <a href="compile.html#enable_stat4">SQLITE_ENABLE_STAT4</a> options causes
 | |
|   the <a href="lang_analyze.html">ANALYZE</a> command to collect a histogram of column content in the
 | |
|   <a href="fileformat2.html#stat3tab">sqlite_stat3</a> or <a href="fileformat2.html#stat4tab">sqlite_stat4</a> tables and to use this histogram to 
 | |
|   make a better guess at the best query to use for range constraints
 | |
|   such as the above.  The main difference between STAT3 and STAT4 is
 | |
|   that STAT3 records histogram data for only the left-most column of
 | |
|   an index whereas STAT4 records histogram data for all columns of an
 | |
|   index.  For single-column indexes, STAT3 and STAT4 work the same.
 | |
| 
 | |
| </p><p>
 | |
|   The histogram data is only useful if the right-hand side of the constraint
 | |
|   is a simple compile-time constant or <a href="lang_expr.html#varparam">parameter</a> and not an expression.
 | |
| 
 | |
| </p><p>
 | |
|   Another limitation of the histogram data is that it only applies to the
 | |
|   left-most column on an index.  Consider this scenario:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE TABLE ex3(w,x,y,z);
 | |
| CREATE INDEX ex3i1 ON ex2(w, x);
 | |
| CREATE INDEX ex3i2 ON ex2(w, y);
 | |
| SELECT z FROM ex3 WHERE w=5 AND x BETWEEN 1 AND 100 AND y BETWEEN 1 AND 100;
 | |
| </pre></div>
 | |
| <p>
 | |
|   Here the inequalities are on columns x and y which are not the
 | |
|   left-most index columns.  Hence, the histogram data which is collected no
 | |
|   left-most column of indices is useless in helping to choose between the
 | |
|   range constraints on columns x and y.
 | |
| 
 | |
| <a name="covidx"></a>
 | |
| 
 | |
| </p><h1 id="covering_indices"><span>9. </span>Covering Indices</h1>
 | |
| 
 | |
| <p>
 | |
|   When doing an indexed lookup of a row, the usual procedure is to
 | |
|   do a binary search on the index to find the index entry, then extract
 | |
|   the <a href="lang_createtable.html#rowid">rowid</a> from the index and use that <a href="lang_createtable.html#rowid">rowid</a> to do a binary search on
 | |
|   the original table.  Thus a typical indexed lookup involves two
 | |
|   binary searches.
 | |
|   If, however, all columns that were to be fetched from the table are
 | |
|   already available in the index itself, SQLite will use the values
 | |
|   contained in the index and will never look up the original table
 | |
|   row.  This saves one binary search for each row and can make many
 | |
|   queries run twice as fast.
 | |
| 
 | |
| </p><p>
 | |
|   When an index contains all of the data needed for a query and when the
 | |
|   original table never needs to be consulted, we call that index a
 | |
|   "covering index".
 | |
| 
 | |
| <a name="order_by"></a>
 | |
| 
 | |
| </p><h1 id="order_by_optimizations"><span>10. </span>ORDER BY optimizations</h1>
 | |
| 
 | |
| <p>
 | |
|   SQLite attempts to use an index to satisfy the ORDER BY clause of a
 | |
|   query when possible.
 | |
|   When faced with the choice of using an index to satisfy WHERE clause
 | |
|   constraints or satisfying an ORDER BY clause, SQLite does the same
 | |
|   cost analysis described above
 | |
|   and chooses the index that it believes will result in the fastest answer.
 | |
| 
 | |
| </p><p>
 | |
|   SQLite will also attempt to use indices to help satisfy GROUP BY clauses
 | |
|   and the DISTINCT keyword.  If the nested loops of the join can be arranged
 | |
|   such that rows that are equivalent for the GROUP BY or for the DISTINCT are
 | |
|   consecutive, then the GROUP BY or DISTINCT logic can determine if the 
 | |
|   current row is part of the same group or if the current row is distinct
 | |
|   simply by comparing the current row to the previous row.
 | |
|   This can be much faster than the alternative of comparing each row to
 | |
|   all prior rows.
 | |
| 
 | |
| <a name="partsort"></a>
 | |
| 
 | |
| </p><h2 id="partial_order_by_via_index"><span>10.1. </span>Partial ORDER BY Via Index</h2>
 | |
| 
 | |
| <p>
 | |
|   If a query contains an ORDER BY clause with multiple terms, it might
 | |
|   be that SQLite can use indices to cause rows to come out in the order
 | |
|   of some prefix of the terms in the ORDER BY but that later terms in
 | |
|   the ORDER BY are not satisfied.  In that case, SQLite does block sorting.
 | |
|   Suppose the ORDER BY clause has four terms and the natural order of the
 | |
|   query results in rows appearing in order of the first two terms.  As
 | |
|   each row is output by the query engine and enters the sorter, the 
 | |
|   outputs in the current row corresponding to the first two terms of 
 | |
|   the ORDER BY are compared against the previous row.  If they have
 | |
|   changed, the current sort is finished and output and a new sort is
 | |
|   started.  This results in a slightly faster sort.  But the bigger
 | |
|   advantages are that many fewer rows need to be held in memory,
 | |
|   reducing memory requirements, and outputs can begin to appear before
 | |
|   the core query has run to completion.
 | |
| 
 | |
| <a name="flattening"></a>
 | |
| 
 | |
| </p><h1 id="subquery_flattening"><span>11. </span>Subquery flattening</h1>
 | |
| 
 | |
| <p>
 | |
|   When a subquery occurs in the FROM clause of a SELECT, the simplest
 | |
|   behavior is to evaluate the subquery into a transient table, then run
 | |
|   the outer SELECT against the transient table.  But such a plan
 | |
|   can be suboptimal since the transient table will not have any indices
 | |
|   and the outer query (which is likely a join) will be forced to do a
 | |
|   full table scan on the transient table.
 | |
| 
 | |
| </p><p>
 | |
|   To overcome this problem, SQLite attempts to flatten subqueries in
 | |
|   the FROM clause of a SELECT.
 | |
|   This involves inserting the FROM clause of the subquery into the
 | |
|   FROM clause of the outer query and rewriting expressions in
 | |
|   the outer query that refer to the result set of the subquery.
 | |
|   For example:
 | |
| 
 | |
| </p><div class="codeblock"><pre>  SELECT t1.a, t2.b FROM t2, (SELECT x+y AS a FROM t1 WHERE z<100) WHERE="1" a="1">5
 | |
| </100)>
 | |
| </pre></div>
 | |
| <p>
 | |
|   Would be rewritten using query flattening as:
 | |
| 
 | |
| </p><div class="codeblock"><pre>  SELECT t1.x+t1.y AS a, t2.b FROM t2, t1 WHERE z<100 AND="1" a="1">5
 | |
| </100>
 | |
| </pre></div>
 | |
| <p>
 | |
|   There is a long list of conditions that must all be met in order for
 | |
|   query flattening to occur.  Some of the constraints are marked as 
 | |
|   obsolete by italic text.  These extra constraints are retained in the
 | |
|   documentation to preserve the numbering of the other constraints.
 | |
| 
 | |
| </p><p>
 | |
|   Casual readers are not expected to understand all of these rules.
 | |
|   A key take-away from this section is that the rules for determining
 | |
|   when query flatting is safe and when it is unsafe are subtle and
 | |
|   complex.  There have been multiple bugs over the years caused by
 | |
|   over-aggressive query flattening.  On the other hand, performance
 | |
|   of complex queries and/or queries involving views tends to suffer
 | |
|   if query flattening is more conservative.
 | |
| 
 | |
| </p><p>
 | |
|   </p><ol>
 | |
|   <li value="1">  <i>(Obsolete.  Query flattening is no longer
 | |
|                       attempted for aggregate subqueries.)</i>
 | |
| 
 | |
|   </li><li value="2">  <i>(Obsolete.  Query flattening is no longer
 | |
|                       attempted for aggregate subqueries.)</i>
 | |
| 
 | |
|   </li><li value="3">
 | |
|   If the subquery is the right operand of a LEFT JOIN then
 | |
|    <ol type="a"><li> the subquery may not be a join, and
 | |
|    </li><li> the FROM clause of the subquery may
 | |
|    not contain a virtual table, and
 | |
|    </li><li> the outer query may not be an aggregate.</li></ol></li>
 | |
| 
 | |
|   <li value="4">  The subquery is not DISTINCT.
 | |
| 
 | |
|   </li><li value="5"> <i>(Subsumed into constraint 4)</i>
 | |
| 
 | |
|   </li><li value="6"> <i>(Obsolete.  Query flattening is no longer
 | |
|                       attempted for aggregate subqueries.)</i>
 | |
| 
 | |
|   </li><li value="7">
 | |
|   The subquery has a FROM clause.
 | |
| 
 | |
|   </li><li value="8">
 | |
|   The subquery does not use LIMIT or the outer query is not a join.
 | |
| 
 | |
|   </li><li value="9">
 | |
|   The subquery does not use LIMIT or the outer query does not use
 | |
|   aggregates.
 | |
| 
 | |
|   </li><li value="10"> <i>(Restriction relaxed in 2005)</i>
 | |
| 
 | |
|   </li><li value="11">
 | |
|   The subquery and the outer query do not both have ORDER BY clauses.
 | |
| 
 | |
|   </li><li value="12"> <i>(Subsumed into constraint 3)</i>
 | |
| 
 | |
|   </li><li value="13">  The subquery and outer query do not both use LIMIT.
 | |
| 
 | |
|   </li><li value="14">  The subquery does not use OFFSET.
 | |
| 
 | |
|   </li><li value="15">
 | |
|   If the outer query is part of a compound select, then the
 | |
|   subquery may not have a LIMIT clause.
 | |
| 
 | |
|   </li><li value="16">
 | |
|   If the outer query is an aggregate, then the subquery may
 | |
|   not contain ORDER BY. 
 | |
| 
 | |
|   </li><li value="17">
 | |
|   If the sub-query is a compound SELECT, then
 | |
|   <ol type="'a'">
 | |
|   <li> all compound operators must be UNION ALL, and
 | |
|   </li><li> no terms with the subquery compound may be aggregate
 | |
|        or DISTINCT, and
 | |
|   </li><li> every term within the subquery must have a FROM clause, and
 | |
|   </li><li> the outer query may not be an aggregate, DISTINCT query, or join.
 | |
|   </li></ol>
 | |
| 
 | |
|   The parent and sub-query may contain WHERE clauses. Subject to
 | |
|   rules (11), (12) and (13), they may also contain ORDER BY,
 | |
|   LIMIT and OFFSET clauses.
 | |
| 
 | |
|   </li><li value="18">
 | |
|   If the sub-query is a compound select, then all terms of the
 | |
|   ORDER by clause of the parent must be simple references to 
 | |
|   columns of the sub-query.
 | |
| 
 | |
|   </li><li value="19">
 | |
|   If the subquery uses LIMIT then the outer query may not
 | |
|   have a WHERE clause.
 | |
| 
 | |
|   </li><li value="20">
 | |
|   If the sub-query is a compound select, then it must not use
 | |
|    an ORDER BY clause.
 | |
| 
 | |
|   </li><li value="21">
 | |
|   If the subquery uses LIMIT, then the outer query may not be
 | |
|   DISTINCT.
 | |
| 
 | |
|   </li><li value="22"> The subquery may not be a recursive CTE.
 | |
| 
 | |
|   </li><li value="23"> <i>(Subsumed into constraint 17d.)</i>
 | |
| 
 | |
|   </li><li value="24"> <i>(Obsolete. Query flattening is no longer
 | |
|                       attempted for aggregate subqueries.)</i>
 | |
|   </li></ol>
 | |
| 
 | |
| <p>
 | |
|   The casual reader is not expected to understand or remember any part of
 | |
|   the list above.  The point of this list is to demonstrate
 | |
|   that the decision of whether or not to flatten a query is complex.
 | |
| 
 | |
| </p><p>
 | |
|   Query flattening is an important optimization when views are used as
 | |
|   each use of a view is translated into a subquery.
 | |
| 
 | |
| <a name="coroutines"></a>
 | |
| 
 | |
| </p><h1 id="subquery_co_routines"><span>12. </span>Subquery Co-routines</h1>
 | |
| 
 | |
| <p>
 | |
|   Prior to SQLite 3.7.15 (2012-12-12),
 | |
|   a subquery in the FROM clause would be
 | |
|   either flattened into the outer query, or else the subquery would be run
 | |
|   to completion
 | |
|   before the outer query started, the result set from the subquery
 | |
|   would be stored in a transient table,
 | |
|   and then the transient table would be used in the outer query.  Newer
 | |
|   versions of SQLite have a third option, which is to implement the subquery
 | |
|   using a co-routine.
 | |
| 
 | |
| </p><p>
 | |
|   A co-routine is like a subroutine in that it runs in the same thread
 | |
|   as the caller and eventually returns control back to the caller.  The
 | |
|   difference is that a co-routine also has the ability to return
 | |
|   before it has finished, and then resume where it left off the next
 | |
|   time it is called.
 | |
| 
 | |
| </p><p>
 | |
|   When a subquery is implemented as a co-routine, byte-code is generated
 | |
|   to implement the subquery as if it were a standalone query, except
 | |
|   instead of returning rows of results back to the application, the
 | |
|   co-routine yields control back to the caller after each row is computed.
 | |
|   The caller can then use that one computed row as part of its computation,
 | |
|   then invoke the co-routine again when it is ready for the next row.
 | |
| 
 | |
| </p><p>
 | |
|   Co-routines are better than storing the complete result set of the subquery
 | |
|   in a transient table because co-routines use less memory.  With a co-routine,
 | |
|   only a single row of the result needs to be remembered, whereas all rows of
 | |
|   the result must be stored for a transient table.  Also, because the
 | |
|   co-routine does not need to run to completion before the outer query
 | |
|   begins its work, the first rows of output can appear much sooner, and if
 | |
|   the overall query is aborted, less work is done overall.
 | |
| 
 | |
| </p><p>
 | |
|   On the other hand, if the result of the subquery must be scanned multiple
 | |
|   times (because, for example, it is just one table in a join) then it
 | |
|   is better to use a transient table to remember the entire result of the
 | |
|   subquery, in order to avoid computing the subquery more than once.
 | |
| 
 | |
| <a name="deferred_work"></a>
 | |
| 
 | |
| </p><h2 id="using_co_routines_to_defer_work_until_after_the_sorting"><span>12.1. </span>Using Co-routines To Defer Work Until After The Sorting</h2>
 | |
| 
 | |
| <p>
 | |
|   As of SQLite version 3.21.0 (2017-10-24), the query planner will
 | |
|   always prefer to use a co-routine to implement FROM-clause subqueries 
 | |
|   that contains an ORDER BY clause and that are not part of a join when
 | |
|   the result set of the outer query is "complex".  This feature allows
 | |
|   applications to shift expensive computations from before the
 | |
|   sorter until after the sorter, which can result in faster operation.
 | |
|   For example, consider this query:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT expensive_function(a) FROM tab ORDER BY date DESC LIMIT 5;
 | |
| </pre></div>
 | |
| <p>
 | |
|   The goal of this query is to compute some value for the five most
 | |
|   recent entries in the table.  But in the query above, the
 | |
|   "expensive_function()" is invoked prior to the sort and thus is
 | |
|   invoked on every row of the table, even
 | |
|   rows that are ultimately omitted due to the LIMIT clause.
 | |
|   A co-routine can be used to work around this:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT expensive_function(a) FROM (
 | |
|   SELECT a FROM tab ORDER BY date DESC LIMIT 5
 | |
| );
 | |
| </pre></div>
 | |
| <p>
 | |
|   In the revised query, the subquery implemented by a co-routine computes
 | |
|   the five most recent values for "a".  Those five values are passed from the
 | |
|   co-routine up into the outer query where the "expensive_function()" is
 | |
|   invoked on only the specific rows that the application cares about.
 | |
| 
 | |
| </p><p>
 | |
|   The query planner in future versions of SQLite might grow smart enough
 | |
|   to make transformations such as the above automatically, in both directions.
 | |
|   That is to say, future versions of SQLite might transform queries of the
 | |
|   first form into the second, or queries written the second way into the
 | |
|   first.  As of SQLite version 3.22.0 (2018-01-22), the query planner
 | |
|   will flatten the subquery if the outer query does not make use of any
 | |
|   user-defined functions or subqueries in its result set.  For the examples
 | |
|   shown above, however, SQLite implements each of the queries as
 | |
|   written.
 | |
| 
 | |
| <a name="minmax"></a>
 | |
| 
 | |
| </p><h1 id="the_min_max_optimization"><span>13. </span>The MIN/MAX optimization</h1>
 | |
| 
 | |
| <p>
 | |
|   Queries that contain a single MIN() or MAX() aggregate function whose
 | |
|   argument is the left-most column of an index might be satisfied
 | |
|   by doing a single index lookup rather than by scanning the entire table.
 | |
|   Examples:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT MIN(x) FROM table;
 | |
| SELECT MAX(x)+1 FROM table;
 | |
| </pre></div>
 | |
| 
 | |
| <a name="autoindex"></a>
 | |
| 
 | |
| <h1 id="automatic_indexes"><span>14. </span>Automatic Indexes</h1>
 | |
| 
 | |
| <p>
 | |
|   When no indices are available to aid the evaluation of a query, SQLite
 | |
|   might create an automatic index that lasts only for the duration
 | |
|   of a single SQL statement.
 | |
|   Since the cost of constructing the automatic index is
 | |
|   O(NlogN) (where N is the number of entries in the table) and the cost of
 | |
|   doing a full table scan is only O(N), an automatic index will
 | |
|   only be created if SQLite expects that the lookup will be run more than
 | |
|   logN times during the course of the SQL statement. Consider an example:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE TABLE t1(a,b);
 | |
| CREATE TABLE t2(c,d);
 | |
| -- Insert many rows into both t1 and t2
 | |
| SELECT * FROM t1, t2 WHERE a=c;
 | |
| </pre></div>
 | |
| <p>
 | |
|   In the query above, if both t1 and t2 have approximately N rows, then
 | |
|   without any indices the query will require O(N*N) time.  On the other
 | |
|   hand, creating an index on table t2 requires O(NlogN) time and then use
 | |
|   that index to evaluate the query requires an additional O(NlogN) time.
 | |
|   In the absence of <a href="lang_analyze.html">ANALYZE</a> information, SQLite guesses that N is one
 | |
|   million and hence it believes that constructing the automatic index will
 | |
|   be the cheaper approach.
 | |
| 
 | |
| </p><p>
 | |
|   An automatic index might also be used for a subquery:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE TABLE t1(a,b);
 | |
| CREATE TABLE t2(c,d);
 | |
| -- Insert many rows into both t1 and t2
 | |
| SELECT a, (SELECT d FROM t2 WHERE c=b) FROM t1;
 | |
| </pre></div>
 | |
| <p>
 | |
|   In this example, the t2 table is used in a subquery to translate values
 | |
|   of the t1.b column.  If each table contains N rows, SQLite expects that
 | |
|   the subquery will run N times, and hence it will believe it is faster
 | |
|   to construct an automatic, transient index on t2 first and then use
 | |
|   that index to satisfy the N instances of the subquery.
 | |
| 
 | |
| </p><p>
 | |
|   The automatic indexing capability can be disabled at run-time using
 | |
|   the <a href="pragma.html#pragma_automatic_index">automatic_index pragma</a>.  Automatic indexing is turned on by
 | |
|   default, but this can be changed so that automatic indexing is off
 | |
|   by default using the <a href="compile.html#default_automatic_index">SQLITE_DEFAULT_AUTOMATIC_INDEX</a> compile-time option.
 | |
|   The ability to create automatic indices can be completely disabled by
 | |
|   compiling with the <a href="compile.html#omit_automatic_index">SQLITE_OMIT_AUTOMATIC_INDEX</a> compile-time option.
 | |
| 
 | |
| </p><p>
 | |
|   In SQLite <a href="releaselog/3_8_0.html">version 3.8.0</a> (2013-08-26) and later, 
 | |
|   an <a href="rescode.html#warning_autoindex">SQLITE_WARNING_AUTOINDEX</a> message is sent
 | |
|   to the <a href="errlog.html">error log</a> every time a statement is prepared that uses an
 | |
|   automatic index.  Application developers can and should use these warnings
 | |
|   to identify the need for new persistent indices in the schema.
 | |
| 
 | |
| </p><p>
 | |
|   Do not confuse automatic indexes with the <a href="fileformat2.html#intschema">internal indexes</a> (having names
 | |
|   like "sqlite_autoindex_<i>table</i>_<i>N</i>") that are sometimes
 | |
|   created to implement a <a href="lang_createtable.html#primkeyconst">PRIMARY KEY constraint</a> or <a href="lang_createtable.html#uniqueconst">UNIQUE constraint</a>.
 | |
|   The automatic indexes described here exist only for the duration of a
 | |
|   single query, are never persisted to disk, and are only visible to a
 | |
|   single database connection.  Internal indexes are part of the implementation
 | |
|   of PRIMARY KEY and UNIQUE constraints, are long-lasting and persisted
 | |
|   to disk, and are visible to all database connections.  The term "autoindex"
 | |
|   appears in the names of <a href="fileformat2.html#intschema">internal indexes</a> for legacy reasons and does
 | |
|   not indicate that internal indexes and automatic indexes are related.
 | |
| 
 | |
| <a name="pushdown"></a>
 | |
| 
 | |
| </p><h1 id="the_push_down_optimization"><span>15. </span>The Push-Down Optimization</h1>
 | |
| 
 | |
| <p>
 | |
|   If a subquery cannot be <a href="optoverview.html#flattening">flattened</a> into the outer query, it might
 | |
|   still be possible to enhance performance by "pushing down" WHERE clause
 | |
|   terms from the outer query into the subquery.  Consider an example:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE TABLE t1(a INT, b INT);
 | |
| CREATE TABLE t2(x INT, y INT);
 | |
| CREATE VIEW v1(a,b) AS SELECT DISTINCT a, b FROM t1;
 | |
| 
 | |
| SELECT x, y, b
 | |
|   FROM t2 JOIN v1 ON (x=a)
 | |
|  WHERE b BETWEEN 10 AND 20;
 | |
| </pre></div>
 | |
| 
 | |
| <p>
 | |
|   The view v1 cannot be <a href="optoverview.html#flattening">flattened</a> because it is DISTINCT.  It must
 | |
|   instead be run as a subquery with the results being stored in a
 | |
|   transient table, then the join is performed between t2 and the
 | |
|   transient table.  The push-down optimization pushes down the
 | |
|   "b BETWEEN 10 AND 20" term into the view.  This makes the transient
 | |
|   table smaller, and helps the subquery to run faster if there
 | |
|   is an index on t1.b.  The resulting evaluation is like this:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT x, y, b
 | |
|   FROM t2
 | |
|   JOIN (SELECT DISTINCT a, b FROM t1 WHERE b BETWEEN 10 AND 20)
 | |
|  WHERE b BETWEEN 10 AND 20;
 | |
| </pre></div>
 | |
| 
 | |
| <p>
 | |
|   The push-down optimization cannot always be used.  For example,
 | |
|   if the subquery contains a LIMIT, then pushing down any part of
 | |
|   the WHERE clause from the outer query could change the result of
 | |
|   the inner query.  There are other restrictions, explained in a
 | |
|   comment in the source code on the pushDownWhereTerms() routine
 | |
|   that implements this optimization.
 | |
| 
 | |
| <a name="leftjoinreduction"></a>
 | |
| 
 | |
| </p><h1 id="the_left_join_strength_reduction_optimization"><span>16. </span>The LEFT JOIN Strength Reduction Optimization</h1>
 | |
| 
 | |
| <p>
 | |
|   Sometimes a LEFT JOIN can be converted into an ordinary JOIN,
 | |
|   if there are terms in the WHERE clause that guarantee that the
 | |
|   two joins will give identical results.  In particular, if any
 | |
|   column in the right-hand table of the LEFT JOIN must be non-NULL
 | |
|   in order for the WHERE clause to be true, then the LEFT JOIN is
 | |
|   demoted to an ordinary JOIN.
 | |
| 
 | |
| </p><p>
 | |
|   The prover that determines whether any column of the right-hand
 | |
|   table of a LEFT JOIN must be non-NULL in the WHERE clause is
 | |
|   imperfect.  It sometimes returns a false negative.  In other words,
 | |
|   it sometimes fails to reduce the strength of a LEFT JOIN when doing
 | |
|   so was in fact possible.  For example, the prover does not know
 | |
|   the <a href="lang_datefunc.html">datetime() SQL function</a> will always return NULL if its first
 | |
|   argument is NULL, and so it will not recognize that the LEFT JOIN
 | |
|   in the following query could be strength-reduced:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT urls.url
 | |
|   FROM urls
 | |
|   LEFT JOIN
 | |
|     (SELECT *
 | |
|       FROM (SELECT url_id AS uid, max(retrieval_time) AS rtime
 | |
|               FROM lookups GROUP BY 1 ORDER BY 1)
 | |
|       WHERE uid IN (358341,358341,358341)
 | |
|     ) recent
 | |
|     ON u.source_seed_id = recent.xyz OR u.url_id = recent.xyz
 | |
|  WHERE
 | |
|      DATETIME(recent.rtime) > DATETIME('now', '-5 days');
 | |
| </pre></div>
 | |
| 
 | |
| <p>
 | |
|   It is possible that future enhancements to the prover might enable it
 | |
|   to recognize that NULL inputs to certain built-in functions
 | |
|   always result in a NULL answer.  But not all built-in
 | |
|   functions have that property (for example <a href="lang_corefunc.html#coalesce">coalesce()</a>) and, of
 | |
|   course, the prover will never be able to reason about
 | |
|   <a href="appfunc.html">application-defined SQL functions</a>.
 | |
| 
 | |
| 
 | |
| <a name="omitnoopjoin"></a>
 | |
| 
 | |
| </p><h1 id="the_omit_left_join_optimization"><span>17. </span>The Omit LEFT JOIN Optimization</h1>
 | |
| 
 | |
| <p>
 | |
|   Sometimes a LEFT JOIN can be completely omitted from a query without
 | |
|   changing the result.  This can happen if all of the following are
 | |
|   true:
 | |
| 
 | |
| </p><p>
 | |
|   </p><ol>
 | |
|   <li> The query is not an aggregate
 | |
|   </li><li> Either the query is DISTINCT or else the ON or USING clause
 | |
|        on the LEFT JOIN constrains the join such that it matches
 | |
|        only a single row
 | |
|   </li><li> The right-hand table of the LEFT JOIN is not be used anywhere
 | |
|        in the query outside of its own USING or ON clause.
 | |
|   </li></ol>
 | |
| 
 | |
| <p>
 | |
|   LEFT JOIN elimination often comes up when LEFT JOINs are used
 | |
|   inside of views, and then the view is used in such as way that
 | |
|   none of the columns of the right-hand table of the LEFT JOIN are
 | |
|   referenced.
 | |
| 
 | |
| </p><p>
 | |
|   Here is a simple example of omitting a LEFT JOIN:
 | |
| 
 | |
| </p><div class="codeblock"><pre>CREATE TABLE t1(ipk INTEGER PRIMARY KEY, v1);
 | |
| CREATE TABLE t2(ipk INTEGER PRIMARY KEY, v2);
 | |
| CREATE TABLE t3(ipk INTEGER PRIMARY KEY, v3);
 | |
| 
 | |
| SELECT v1, v3 FROM t1 
 | |
|   LEFT JOIN t2 USING (t1.ipk=t2.ipk)
 | |
|   LEFT JOIN t3 USING (t1.ipk=t3.ipk)
 | |
| </pre></div>
 | |
| 
 | |
| <p>
 | |
|   The t2 table is completely unused in the query above, and so the
 | |
|   query planner is able to implement the query as if it were written:
 | |
| 
 | |
| </p><div class="codeblock"><pre>SELECT v1, v3 FROM t1 
 | |
|   LEFT JOIN t3 USING (t1.ipk=t3.ipk)
 | |
| </pre></div>
 | |
| 
 |