363 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			363 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
| <!DOCTYPE html>
 | |
| <html><head>
 | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0">
 | |
| <meta http-equiv="content-type" content="text/html; charset=UTF-8">
 | |
| <link href="sqlite.css" rel="stylesheet">
 | |
| <title>How SQLite Works</title>
 | |
| <!-- path= -->
 | |
| </head>
 | |
| <body>
 | |
| <div class=nosearch>
 | |
| <a href="index.html">
 | |
| <img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
 | |
| </a>
 | |
| <div><!-- IE hack to prevent disappearing logo --></div>
 | |
| <div class="tagline desktoponly">
 | |
| Small. Fast. Reliable.<br>Choose any three.
 | |
| </div>
 | |
| <div class="menu mainmenu">
 | |
| <ul>
 | |
| <li><a href="index.html">Home</a>
 | |
| <li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
 | |
| <li class='wideonly'><a href='about.html'>About</a>
 | |
| <li class='desktoponly'><a href="docs.html">Documentation</a>
 | |
| <li class='desktoponly'><a href="download.html">Download</a>
 | |
| <li class='wideonly'><a href='copyright.html'>License</a>
 | |
| <li class='desktoponly'><a href="support.html">Support</a>
 | |
| <li class='desktoponly'><a href="prosupport.html">Purchase</a>
 | |
| <li class='search' id='search_menubutton'>
 | |
| <a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
 | |
| </ul>
 | |
| </div>
 | |
| <div class="menu submenu" id="submenu">
 | |
| <ul>
 | |
| <li><a href='about.html'>About</a>
 | |
| <li><a href='docs.html'>Documentation</a>
 | |
| <li><a href='download.html'>Download</a>
 | |
| <li><a href='support.html'>Support</a>
 | |
| <li><a href='prosupport.html'>Purchase</a>
 | |
| </ul>
 | |
| </div>
 | |
| <div class="searchmenu" id="searchmenu">
 | |
| <form method="GET" action="search">
 | |
| <select name="s" id="searchtype">
 | |
| <option value="d">Search Documentation</option>
 | |
| <option value="c">Search Changelog</option>
 | |
| </select>
 | |
| <input type="text" name="q" id="searchbox" value="">
 | |
| <input type="submit" value="Go">
 | |
| </form>
 | |
| </div>
 | |
| </div>
 | |
| <script>
 | |
| function toggle_div(nm) {
 | |
| var w = document.getElementById(nm);
 | |
| if( w.style.display=="block" ){
 | |
| w.style.display = "none";
 | |
| }else{
 | |
| w.style.display = "block";
 | |
| }
 | |
| }
 | |
| function toggle_search() {
 | |
| var w = document.getElementById("searchmenu");
 | |
| if( w.style.display=="block" ){
 | |
| w.style.display = "none";
 | |
| } else {
 | |
| w.style.display = "block";
 | |
| setTimeout(function(){
 | |
| document.getElementById("searchbox").focus()
 | |
| }, 30);
 | |
| }
 | |
| }
 | |
| function div_off(nm){document.getElementById(nm).style.display="none";}
 | |
| window.onbeforeunload = function(e){div_off("submenu");}
 | |
| /* Disable the Search feature if we are not operating from CGI, since */
 | |
| /* Search is accomplished using CGI and will not work without it. */
 | |
| if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
 | |
| document.getElementById("search_menubutton").style.display = "none";
 | |
| }
 | |
| /* Used by the Hide/Show button beside syntax diagrams, to toggle the */
 | |
| function hideorshow(btn,obj){
 | |
| var x = document.getElementById(obj);
 | |
| var b = document.getElementById(btn);
 | |
| if( x.style.display!='none' ){
 | |
| x.style.display = 'none';
 | |
| b.innerHTML='show';
 | |
| }else{
 | |
| x.style.display = '';
 | |
| b.innerHTML='hide';
 | |
| }
 | |
| return false;
 | |
| }
 | |
| </script>
 | |
| </div>
 | |
| <div class=fancy>
 | |
| <div class=nosearch>
 | |
| <div class="fancy_title">
 | |
| How SQLite Works
 | |
| </div>
 | |
| <div class="fancy_toc">
 | |
| <a onclick="toggle_toc()">
 | |
| <span class="fancy_toc_mark" id="toc_mk">►</span>
 | |
| Table Of Contents
 | |
| </a>
 | |
| <div id="toc_sub"><div class="fancy-toc1"><a href="#background">1. Background</a></div>
 | |
| <div class="fancy-toc2"><a href="#sqlite_is_different_from_most_other_sql_databases">1.1. SQLite Is Different From Most Other SQL Databases</a></div>
 | |
| <div class="fancy-toc1"><a href="#sql_is_a_programming_language">2. SQL Is A Programming Language</a></div>
 | |
| <div class="fancy-toc2"><a href="#programming_language_processing_steps">2.1. Programming Language Processing Steps</a></div>
 | |
| <div class="fancy-toc2"><a href="#compiling_sqlite_programs">2.2. Compiling SQLite Programs</a></div>
 | |
| <div class="fancy-toc1"><a href="#further_reading">3. Further Reading</a></div>
 | |
| </div>
 | |
| </div>
 | |
| <script>
 | |
| function toggle_toc(){
 | |
| var sub = document.getElementById("toc_sub")
 | |
| var mk = document.getElementById("toc_mk")
 | |
| if( sub.style.display!="block" ){
 | |
| sub.style.display = "block";
 | |
| mk.innerHTML = "▼";
 | |
| } else {
 | |
| sub.style.display = "none";
 | |
| mk.innerHTML = "►";
 | |
| }
 | |
| }
 | |
| </script>
 | |
| </div>
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| <h1 id="background"><span>1. </span>Background</h1>
 | |
| 
 | |
| <p>
 | |
| SQLite is a <a href="https://en.wikipedia.org/wiki/Library_(computing)">software library</a>
 | |
| that translates high-level disk I/O requests generated by an application
 | |
| into low-level I/O operations that
 | |
| can be carried out by the operating system.  The application constructs 
 | |
| high-level I/O requests using the
 | |
| <a href="https://en.wikipedia.org/wiki/SQL">SQL language</a>.
 | |
| SQLite translates each high-level SQL statement into a sequence of
 | |
| many low-level I/O requests (open a file, read a few bytes from a file,
 | |
| write a few bytes into a file, etc.) that do the work requested by
 | |
| the SQL.
 | |
| 
 | |
| 
 | |
| </p><p>
 | |
| An application program could do all its disk I/O by direct calls to
 | |
| operating system I/O routines
 | |
| or by using a key/value storage engine like 
 | |
| <a href="https://en.wikipedia.org/wiki/Berkeley_DB">Berkeley DB</a> or
 | |
| <a href="https://en.wikipedia.org/wiki/RocksDB">RocksDB</a> (to name but two).
 | |
| But there are advantages to using a higher-level interface based
 | |
| on the SQL language.
 | |
| 
 | |
| </p><ol>
 | |
| <li><p>
 | |
| SQL is a very high-level language.
 | |
| A few lines of SQL can replace hundreds or thousands of lines of
 | |
| procedural code.  SQL thus reduces
 | |
| the amount of work needed to develop and maintain the application,
 | |
| and thereby helps to reduce the number of bugs in the application.
 | |
| 
 | |
| </p></li><li><p>
 | |
| SQL and SQLite are 
 | |
| <a href="https://en.wikipedia.org/wiki/Database_transaction">transactional</a>.
 | |
| The use of a transactional storage system makes it much easier to
 | |
| reason about the behavior of the application, and to write applications
 | |
| that are robust, even in the face of software bugs, hardware faults,
 | |
| or power losses.
 | |
| 
 | |
| </p></li><li><p>
 | |
| SQLite is often
 | |
| <a href="fasterthanfs.html">faster than direct low-level I/O</a>.
 | |
| This is counterintuitive.  One would expect that a
 | |
| high-level interface such as SQLite would impose a run-time penalty.
 | |
| And, theoretically, that is correct.  But in practice, SQL-based
 | |
| systems such as SQLite do so many behind-the-scenes optimizations
 | |
| that an application developer would never have time to create and
 | |
| maintain, that the SQL-based systems end up providing
 | |
| a net performance gain.
 | |
| </p></li></ol>
 | |
| 
 | |
| <h2 id="sqlite_is_different_from_most_other_sql_databases"><span>1.1. </span>SQLite Is Different From Most Other SQL Databases</h2>
 | |
| 
 | |
| <p>
 | |
| There are many SQL-based database management systems available, besides
 | |
| SQLite.  Common options include MySQL, PostgreSQL, and SQL-Server.
 | |
| All these systems use the SQL langauge to communicate with the 
 | |
| application, just like SQLite.  But these other systems different
 | |
| from SQLite in important respects.
 | |
| 
 | |
| </p><ol>
 | |
| <li><p>
 | |
| SQLite is a <a href="serverless.html">serverless</a> software library, whereas the other
 | |
| systems are client-server based.  With MySQL, PostgreSQL, SQL-Server,
 | |
| and others, the application sends a message containing some SQL
 | |
| over to a separate server thread or process. That separate
 | |
| thread or process performs the requested I/O, then send the results
 | |
| back to the application.  But there is no separate thread or
 | |
| process with SQLite.  SQLite runs in the same address space as
 | |
| the application, using the same program counter and heap storage.
 | |
| SQLite does no interprocess communication (IPC).  When an application
 | |
| sends an SQL statement into SQLite (by invoking a the appropriate
 | |
| SQLite library subroutine), SQLite interprets the SQL
 | |
| in the same thread as the caller.  When an SQLite API routine
 | |
| returns, it does not leave behind any background tasks that
 | |
| run separately from the application.
 | |
| 
 | |
| </p></li><li><p>
 | |
| An SQLite database is a single ordinary file on disk
 | |
| (with a <a href="fileformat2.html">well-defined file format</a>).  With other
 | |
| systems, a "database" is usually a large number of separate
 | |
| files hidden away in obscure directories of the filesystem, or
 | |
| even spread across multiple machines.  But with SQLite, a
 | |
| complete database is just an ordinary disk file.
 | |
| </p></li></ol>
 | |
| 
 | |
| <h1 id="sql_is_a_programming_language"><span>2. </span>SQL Is A Programming Language</h1>
 | |
| 
 | |
| <p>
 | |
| The best way to understand how SQL database engines work is to
 | |
| think of SQL as a programming language, not as a "query language".
 | |
| Each SQL statement is a separate program.  Applications construct
 | |
| SQL program source files and send them to the database engine.
 | |
| The database engine compiles the SQL source code into executable
 | |
| form, runs that executable, then sends the result back to the
 | |
| application.
 | |
| 
 | |
| </p><p>
 | |
| While SQL is a programming language, it is different from other
 | |
| programming languages like C, Javascript, Python, or Go in that
 | |
| SQL is a 
 | |
| <a href="https://en.wikipedia.org/wiki/Declarative_programming">declarative language</a>
 | |
| where the others are
 | |
| <a href="https://en.wikipedia.org/wiki/Imperative_programming">imperative languages</a>.
 | |
| This is an important difference that has implications for the
 | |
| design of the compiler used to translate program source text into an
 | |
| executable format. However, those details 
 | |
| should not detract from the fact that
 | |
| SQL is really just another programming language.
 | |
| 
 | |
| </p><h2 id="programming_language_processing_steps"><span>2.1. </span>Programming Language Processing Steps</h2>
 | |
| 
 | |
| <p>
 | |
| All programming languages are processed in two steps:
 | |
| 
 | |
| </p><ol>
 | |
| <li><p>
 | |
| Translate the program source text into an executable format.
 | |
| </p></li><li><p>
 | |
| Run the executable generated in the previous step in order to
 | |
| carry out the desired action.
 | |
| </p></li></ol>
 | |
| 
 | |
| <p>
 | |
| All programming languages uses those two basic steps.
 | |
| The main difference is in the executable format.
 | |
| 
 | |
| </p><p>
 | |
| "Compiled" languages like C++ and Rust translate
 | |
| the source text into machine code that can be directly executed
 | |
| by the underlying hardware.  There exist SQL database systems
 | |
| that do the same with SQL - they translate each SQL statement
 | |
| directly into machine code.  But that approach is uncommon and
 | |
| is not the approach taken by SQLite.
 | |
| 
 | |
| </p><p>
 | |
| Other languages like Java, Perl, Python, and TCL
 | |
| typically translate the program source text into bytecode.
 | |
| This bytecode is then run through an interpreter that
 | |
| reads the bytecode and carries out the desired operations.
 | |
| SQLite uses this bytecode approach.  If you preceed
 | |
| any SQL statement with the "<a href="lang_explain.html">EXPLAIN</a>" keyword in SQLite,
 | |
| it will show you the bytecode that is generated rather
 | |
| than run the bytecode.
 | |
| 
 | |
| </p><p>
 | |
| Another approach is to translate the program source text
 | |
| into a tree of objects in memory.  This tree is the "executable".
 | |
| An interpret runs the executable by walking the tree.
 | |
| This is the technique used by MySQL, PostgreSQL, and
 | |
| SQL-Server.
 | |
| 
 | |
| </p><p>
 | |
| Of course, not every language fits neatly into one of the
 | |
| above catagories.  This applies to both SQL database engines
 | |
| and more familiar imperative programming languages.  Javascript
 | |
| is famous for using a hybrid execution model, where the code
 | |
| is initially compiled into a tree of objects, but might be
 | |
| further translating (using just-in-time compilation) down into
 | |
| more efficient bytecode or machine code, as a means of boosting
 | |
| performance.
 | |
| 
 | |
| </p><p>
 | |
| The executable format really ends up being just an implementation
 | |
| detail.  The key point is that all languages have a compiler
 | |
| step which translates programs into an executable format and
 | |
| a run step that executes the compiled program.
 | |
| 
 | |
| 
 | |
| </p><h2 id="compiling_sqlite_programs"><span>2.2. </span>Compiling SQLite Programs</h2>
 | |
| 
 | |
| <p>
 | |
| When an SQL program is submitted to SQLite, the first step is
 | |
| to split the source text into "tokens".  A token might be:
 | |
| 
 | |
| </p><ul>
 | |
| <li>A language keyword like "SELECT" or "UPDATE".
 | |
| </li><li>An identifier for a table or column or variable.
 | |
| </li><li>Punctuation characters like "," or "==" or ";".
 | |
| </li><li>Literal values: numeric or string constants.
 | |
| </li><li>Whitespace or comments.
 | |
| </li></ul>
 | |
| 
 | |
| <p>
 | |
| Whitespace and comment tokens are discarded.  All other tokens
 | |
| are fed into an
 | |
| <a href="https://en.wikipedia.org/wiki/LALR_parser">LALR(1) Parser</a>
 | |
| that analysis the structure of the input program and generates an
 | |
| <a href="https://en.wikipedia.org/wiki/Abstract_syntax_tree">Abstract Syntax Tree (AST)</a>
 | |
| for the input program.
 | |
| 
 | |
| </p><p>
 | |
| The parser forwards the AST on to the code generator.
 | |
| The code generator is the heart of SQLite, and is where most of
 | |
| the magic happens.
 | |
| The code generator resolves symbolic names in the AST - matching
 | |
| the names of columns and tables in the input SQL into actual
 | |
| columns and tables of the database.  The code generator also does
 | |
| various transformations on the AST to "optimize" it.  Finally
 | |
| the code generator chooses appropriate algorithms to implement
 | |
| the operations requested by the AST and constructs bytecode to
 | |
| carry out those operations.
 | |
| 
 | |
| </p><p>
 | |
| The bytecode generated by the code generator is called a
 | |
| "<a href="c3ref/stmt.html">prepared statement</a>".  Translating SQL source text into a
 | |
| prepared statement is analogous to converting a C++ program
 | |
| into machine code by invoking gcc or clang.  Human-readable
 | |
| source text (SQL or C++) goes in, and a machine readable executable
 | |
| (bytecode or machine code) comes out.
 | |
| 
 | |
| </p><h1 id="further_reading"><span>3. </span>Further Reading</h1>
 | |
| 
 | |
| <ul>
 | |
| <li><p>
 | |
| The <a href="atomiccommit.html">Atomic Commit</a> document describes how SQLite implements
 | |
| transactions.
 | |
| 
 | |
| </p></li><li><p>
 | |
| The <a href="opcode.html">bytecode engine</a> document has more information
 | |
| on the bytecode format used by SQLite, and how to view and interpret
 | |
| an SQLite prepared statement.
 | |
| 
 | |
| </p></li><li><p>
 | |
| The <a href="optoverview.html">SQLite query planner</a> and
 | |
| <a href="queryplanner-ng.html">Next Generation Query Planner</a> documents have further details
 | |
| on the algorithms SQLite uses to implement SQL statements and
 | |
| how it goes above choosing an appropriate algorithm for each
 | |
| individual SQL statement.
 | |
| </p></li></ul>
 | |
| 
 |