1981 lines
		
	
	
		
			109 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			1981 lines
		
	
	
		
			109 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
<!DOCTYPE html>
 | 
						|
<html><head>
 | 
						|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
						|
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
 | 
						|
<link href="sqlite.css" rel="stylesheet">
 | 
						|
<title>No Title</title>
 | 
						|
<!-- path= -->
 | 
						|
</head>
 | 
						|
<body>
 | 
						|
<div class=nosearch>
 | 
						|
<a href="index.html">
 | 
						|
<img class="logo" src="images/sqlite370_banner.gif" alt="SQLite" border="0">
 | 
						|
</a>
 | 
						|
<div><!-- IE hack to prevent disappearing logo --></div>
 | 
						|
<div class="tagline desktoponly">
 | 
						|
Small. Fast. Reliable.<br>Choose any three.
 | 
						|
</div>
 | 
						|
<div class="menu mainmenu">
 | 
						|
<ul>
 | 
						|
<li><a href="index.html">Home</a>
 | 
						|
<li class='mobileonly'><a href="javascript:void(0)" onclick='toggle_div("submenu")'>Menu</a>
 | 
						|
<li class='wideonly'><a href='about.html'>About</a>
 | 
						|
<li class='desktoponly'><a href="docs.html">Documentation</a>
 | 
						|
<li class='desktoponly'><a href="download.html">Download</a>
 | 
						|
<li class='wideonly'><a href='copyright.html'>License</a>
 | 
						|
<li class='desktoponly'><a href="support.html">Support</a>
 | 
						|
<li class='desktoponly'><a href="prosupport.html">Purchase</a>
 | 
						|
<li class='search' id='search_menubutton'>
 | 
						|
<a href="javascript:void(0)" onclick='toggle_search()'>Search</a>
 | 
						|
</ul>
 | 
						|
</div>
 | 
						|
<div class="menu submenu" id="submenu">
 | 
						|
<ul>
 | 
						|
<li><a href='about.html'>About</a>
 | 
						|
<li><a href='docs.html'>Documentation</a>
 | 
						|
<li><a href='download.html'>Download</a>
 | 
						|
<li><a href='support.html'>Support</a>
 | 
						|
<li><a href='prosupport.html'>Purchase</a>
 | 
						|
</ul>
 | 
						|
</div>
 | 
						|
<div class="searchmenu" id="searchmenu">
 | 
						|
<form method="GET" action="search">
 | 
						|
<select name="s" id="searchtype">
 | 
						|
<option value="d">Search Documentation</option>
 | 
						|
<option value="c">Search Changelog</option>
 | 
						|
</select>
 | 
						|
<input type="text" name="q" id="searchbox" value="">
 | 
						|
<input type="submit" value="Go">
 | 
						|
</form>
 | 
						|
</div>
 | 
						|
</div>
 | 
						|
<script>
 | 
						|
function toggle_div(nm) {
 | 
						|
var w = document.getElementById(nm);
 | 
						|
if( w.style.display=="block" ){
 | 
						|
w.style.display = "none";
 | 
						|
}else{
 | 
						|
w.style.display = "block";
 | 
						|
}
 | 
						|
}
 | 
						|
function toggle_search() {
 | 
						|
var w = document.getElementById("searchmenu");
 | 
						|
if( w.style.display=="block" ){
 | 
						|
w.style.display = "none";
 | 
						|
} else {
 | 
						|
w.style.display = "block";
 | 
						|
setTimeout(function(){
 | 
						|
document.getElementById("searchbox").focus()
 | 
						|
}, 30);
 | 
						|
}
 | 
						|
}
 | 
						|
function div_off(nm){document.getElementById(nm).style.display="none";}
 | 
						|
window.onbeforeunload = function(e){div_off("submenu");}
 | 
						|
/* Disable the Search feature if we are not operating from CGI, since */
 | 
						|
/* Search is accomplished using CGI and will not work without it. */
 | 
						|
if( !location.origin || !location.origin.match || !location.origin.match(/http/) ){
 | 
						|
document.getElementById("search_menubutton").style.display = "none";
 | 
						|
}
 | 
						|
/* Used by the Hide/Show button beside syntax diagrams, to toggle the */
 | 
						|
function hideorshow(btn,obj){
 | 
						|
var x = document.getElementById(obj);
 | 
						|
var b = document.getElementById(btn);
 | 
						|
if( x.style.display!='none' ){
 | 
						|
x.style.display = 'none';
 | 
						|
b.innerHTML='show';
 | 
						|
}else{
 | 
						|
x.style.display = '';
 | 
						|
b.innerHTML='hide';
 | 
						|
}
 | 
						|
return false;
 | 
						|
}
 | 
						|
</script>
 | 
						|
</div>
 | 
						|
 | 
						|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 | 
						|
<html>
 | 
						|
<head>
 | 
						|
  <link type="text/css" rel="stylesheet" href="images/fileformat/rtdocs.css">
 | 
						|
  <script type="text/javascript" src=images/fileformat/rtdocs.js></script>
 | 
						|
</head>
 | 
						|
<div id=document_title>SQLite File IO Specification</div>
 | 
						|
<div id=toc_header>Table Of Contents</div>
 | 
						|
<div id=toc>
 | 
						|
  <b>Javascript is required for some features of this document, including 
 | 
						|
     table of contents, figure numbering and internal references (section
 | 
						|
     numbers and hyper-links.
 | 
						|
  </b>
 | 
						|
</div>
 | 
						|
<!-- End of standard rt docs header -->
 | 
						|
<h1 id=overview>Overview</h1>
 | 
						|
  <p>
 | 
						|
    SQLite stores an entire database within a single file, the format of
 | 
						|
    which is described in the <i>SQLite File Database File Format</i> 
 | 
						|
    document <cite>ff_sqlitert_requirements</cite>. Each database file is
 | 
						|
    stored within a file system, presumably provided by the host operating
 | 
						|
    system. Instead of interfacing with the operating system directly, 
 | 
						|
    the host application is required to supply an adaptor component that 
 | 
						|
    implements the <i>SQLite Virtual File System</i> interface 
 | 
						|
    (described in <cite>capi_sqlitert_requirements</cite>). The adaptor
 | 
						|
    component is responsible for translating the calls made by SQLite to
 | 
						|
    the <i>VFS</i> interface into calls to the file-system interface 
 | 
						|
    provided by the operating system. This arrangement is depicted in figure
 | 
						|
    <cite>figure_vfs_role</cite>.
 | 
						|
    <center><img src="images/fileformat/vfs_role.gif">
 | 
						|
    <p><i>Figure <span class=fig id=figure_vfs_role></span> - Virtual File System (VFS) Adaptor</i>
 | 
						|
      </center>
 | 
						|
  <p>
 | 
						|
    Although it would be easy to design a system that uses the <i>VFS</i>
 | 
						|
    interface to read and update the content of a database file stored
 | 
						|
    within a file-system, there are several complicated issues that need
 | 
						|
    to be addressed by such a system:
 | 
						|
  <ol>
 | 
						|
    <li><p>SQLite is required to <b>implement atomic and durable
 | 
						|
        transactions</b> (the 'A' and 'D' from the ACID acronym), even if an
 | 
						|
        application, operating system or power failure occurs midway through or
 | 
						|
        shortly after updating a database file.
 | 
						|
        <p>To implement atomic transactions in the face of potential 
 | 
						|
        application, operating system or power failures, database writers write
 | 
						|
        a copy of those portions of the database file that they are going to
 | 
						|
        modify into a second file, the <i>journal file</i>, before writing
 | 
						|
        to the database file. If a failure does occur while modifying the 
 | 
						|
        database file, SQLite can reconstruct the original database 
 | 
						|
        (before the modifications were attempted) based on the contents of 
 | 
						|
        the <i>journal file</i>.
 | 
						|
    <li><p>SQLite is required to <b>implement isolated transactions</b> (the 'I'
 | 
						|
        from the ACID acronym). 
 | 
						|
        <p>This is done by using the file locking facilities provided by the
 | 
						|
        VFS adaptor to serialize writers (write transactions) and preventing
 | 
						|
        readers (read transactions) from accessing database files while writers
 | 
						|
        are midway through updating them.
 | 
						|
    <li><p>For performance reasons, it is advantageous to <b>minimize the 
 | 
						|
        quantity of data read and written</b> to and from the file-system.
 | 
						|
        <p>As one might expect, the amount of data read from the database 
 | 
						|
        file is minimized by caching portions of the database file in main 
 | 
						|
        memory. Additionally, multiple updates to the database file that
 | 
						|
        are part of the same <i>write transaction</i> may be cached in
 | 
						|
        main memory and written to the file together, allowing for
 | 
						|
        more efficient IO patterns and eliminating the redundant write 
 | 
						|
        operations that could take place if part of the database file is
 | 
						|
        modified more than once within a single <i>write transaction</i>.
 | 
						|
  </ol>
 | 
						|
  <p class=todo>
 | 
						|
    System requirement references for the above points.
 | 
						|
  <p>
 | 
						|
    This document describes in detail the way that SQLite uses the API 
 | 
						|
    provided by the VFS adaptor component to solve the problems and implement
 | 
						|
    the strategies enumerated above. It also specifies the assumptions made
 | 
						|
    about the properties of the system that the VFS adaptor provides
 | 
						|
    access to. For example, specific assumptions about the extent of
 | 
						|
    data corruption that may occur if a power failure occurs while a
 | 
						|
    database file is being updated are presented in section 
 | 
						|
    <cite>fs_characteristics</cite>.
 | 
						|
  <p>
 | 
						|
    This document does not specify the details of the interface that must
 | 
						|
    be implemented by the VFS adaptor component, that is left to
 | 
						|
    <cite>capi_sqlitert_requirements</cite>.
 | 
						|
  <h2>Relationship to Other Documents</h2>
 | 
						|
    <p>
 | 
						|
      Related to C-API requirements:
 | 
						|
    <ol>
 | 
						|
      <li>Opening a connection.
 | 
						|
      <li>Closing a connection.
 | 
						|
    </ol>
 | 
						|
    <p>
 | 
						|
      Related to SQL requirements:
 | 
						|
    <ol>
 | 
						|
      <li value=3>Opening a read-only transaction.
 | 
						|
      <li>Terminating a read-only transaction.
 | 
						|
      <li>Opening a read-write transaction.
 | 
						|
      <li>Committing a read-write transaction.
 | 
						|
      <li>Rolling back a read-write transaction.
 | 
						|
      <li>Opening a statement transaction.
 | 
						|
      <li>Committing a statement transaction.
 | 
						|
      <li>Rolling back a statement transaction.
 | 
						|
      <li>Committing a multi-file transaction.
 | 
						|
    </ol>
 | 
						|
    <p>
 | 
						|
      Related to file-format requirements:
 | 
						|
    <ol>
 | 
						|
      <li value=12>Pinning (reading) a database page.
 | 
						|
      <li>Unpinning a database page.
 | 
						|
      <li>Modifying the contents of a database page.
 | 
						|
      <li>Appending a new page to the database file.
 | 
						|
      <li>Truncating a page from the end of the database file.
 | 
						|
    </ol>
 | 
						|
  <h2>Document Structure</h2>
 | 
						|
    <p>
 | 
						|
      Section <cite>vfs_assumptions</cite> of this document describes the
 | 
						|
      various assumptions made about the system to which the VFS adaptor
 | 
						|
      component provides access. The basic capabilities and functions 
 | 
						|
      required from the VFS implementation are presented along with the
 | 
						|
      description of the VFS interface in 
 | 
						|
      <cite>capi_sqlitert_requirements</cite>. Section
 | 
						|
      <cite>vfs_assumptions</cite> complements this by describing in more
 | 
						|
      detail the assumptions made about VFS implementations on which the
 | 
						|
      algorithms presented in this document depend. Some of these assumptions
 | 
						|
      relate to performance issues, but most concern the expected state of
 | 
						|
      the file-system following a failure that occurs midway through 
 | 
						|
      modifying a database file.
 | 
						|
    <p>
 | 
						|
      Section <cite>database_connections</cite> introduces the concept of
 | 
						|
      a <i>database connection</i>, a combination of a file-handle and
 | 
						|
      in-memory cache used to access a database file. It also describes the
 | 
						|
      VFS operations required when a new <i>database connection</i> is
 | 
						|
      created (opened), and when one is destroyed (closed).
 | 
						|
    <p>
 | 
						|
      Section <cite>reading_data</cite> describes the steps required to
 | 
						|
      open a <i>read transaction</i> and read data from a database file.
 | 
						|
    <p>
 | 
						|
      Section <cite>writing_data</cite> describes the steps required to
 | 
						|
      open a <i>write transaction </i> and write data to a database file.
 | 
						|
    <p>
 | 
						|
      Section <cite>rollback</cite> describes the way in which aborted
 | 
						|
      <i>write transactions</i> may be rolled back (reverted), either as
 | 
						|
      a result of an explicit user directive or because an application,
 | 
						|
      operating system or power failure occurred while SQLite was midway
 | 
						|
      through updating a database file.
 | 
						|
    <p>
 | 
						|
      Section <cite>page_cache_algorithms</cite> describes some of the
 | 
						|
      algorithms used to determine exactly which portions of the database
 | 
						|
      file are cached by a <i>page cache</i>, and the effect that they
 | 
						|
      have on the quantity and nature of the required VFS operations.
 | 
						|
      It may at first seem odd to include the <i>page cache</i>, which is 
 | 
						|
      primarily an implementation detail, in this document. However, it is
 | 
						|
      necessary to acknowledge and describe the <i>page cache</i> in order to
 | 
						|
      provide a more complete explanation of the nature and quantity of IO
 | 
						|
      performed by SQLite. 
 | 
						|
  <h2>Glossary</h2>
 | 
						|
    <p class=todo>
 | 
						|
      After this document is ready, make the vocabulary consistent and
 | 
						|
      then add a glossary here.
 | 
						|
<h1 id=vfs_assumptions>VFS Adaptor Related Assumptions</h1>
 | 
						|
  <p>
 | 
						|
    This section documents those assumptions made about the system that
 | 
						|
    the VFS adaptor provides access to. The assumptions noted in section
 | 
						|
    <cite>fs_characteristics</cite> are particularly important. If these
 | 
						|
    assumptions are not true, then a power or operating system failure
 | 
						|
    may cause SQLite databases to become corrupted.
 | 
						|
  <h2 id=fs_performance>Performance Related Assumptions</h2>
 | 
						|
    <p>
 | 
						|
      SQLite uses the assumptions in this section to try to speed up 
 | 
						|
      reading from and writing to the database file.
 | 
						|
<p class=req id=A21010>
 | 
						|
      It is assumed that writing a series of sequential blocks of data to 
 | 
						|
      a file in order is faster than writing the same blocks in an arbitrary
 | 
						|
      order.
 | 
						|
</p>  <h2 id=fs_characteristics>System Failure Related Assumptions</h2>
 | 
						|
    <p>
 | 
						|
      In the event of an operating system or power failure, the various 
 | 
						|
      combinations of file-system software and storage hardware available
 | 
						|
      provide varying levels of guarantee as to the integrity of the data
 | 
						|
      written to the file system just before or during the failure. The exact
 | 
						|
      combination of IO operations that SQLite is required to perform in 
 | 
						|
      order to safely modify a database file depend on the exact 
 | 
						|
      characteristics of the target platform.
 | 
						|
    <p>
 | 
						|
      This section describes the assumptions that SQLite makes about the
 | 
						|
      content of a file-system following a power or system failure. In
 | 
						|
      other words, it describes the extent of file and file-system corruption
 | 
						|
      that such an event may cause.
 | 
						|
    <p>
 | 
						|
      SQLite queries an implementation for file-system characteristics
 | 
						|
      using the xDeviceCharacteristics() and xSectorSize() methods of the
 | 
						|
      database file file-handle. These two methods are only ever called
 | 
						|
      on file-handles open on database files. They are not called for 
 | 
						|
      <i>journal files</i>, <i>master-journal files</i> or 
 | 
						|
      <i>temporary database files</i>.
 | 
						|
    <p>
 | 
						|
      The file-system <i>sector size</i> value determined by calling the
 | 
						|
      xSectorSize() method is a power of 2 value between 512 and 32768, 
 | 
						|
      inclusive <span class=todo>reference to exactly how this is
 | 
						|
      determined</span>. SQLite assumes that the underlying storage
 | 
						|
      device stores data in blocks of <i>sector-size</i> bytes each, 
 | 
						|
      sectors. It is also assumed that each aligned block of 
 | 
						|
      <i>sector-size</i> bytes of each file is stored in a single device
 | 
						|
      sector. If the file is not an exact multiple of <i>sector-size</i>
 | 
						|
      bytes in size, then the final device sector is partially empty.
 | 
						|
    <p>
 | 
						|
      Normally, SQLite assumes that if a power failure occurs while 
 | 
						|
      updating any portion of a sector then the contents of the entire 
 | 
						|
      device sector is suspect following recovery. After writing to
 | 
						|
      any part of a sector within a file, it is assumed that the modified
 | 
						|
      sector contents are held in a volatile buffer somewhere within
 | 
						|
      the system (main memory, disk cache etc.). SQLite does not assume
 | 
						|
      that the updated data has reached the persistent storage media, until
 | 
						|
      after it has successfully <i>synced</i> the corresponding file by
 | 
						|
      invoking the VFS xSync() method. <i>Syncing</i> a file causes all
 | 
						|
      modifications to the file up until that point to be committed to
 | 
						|
      persistent storage.
 | 
						|
    <p>
 | 
						|
      Based on the above, SQLite is designed around a model of the
 | 
						|
      file-system whereby any sector of a file written to is considered to be
 | 
						|
      in a transient state until after the file has been successfully 
 | 
						|
      <i>synced</i>. Should a power or system failure occur while a sector 
 | 
						|
      is in a transient state, it is impossible to predict its contents
 | 
						|
      following recovery. It may be written correctly, not written at all,
 | 
						|
      overwritten with random data, or any combination thereof.
 | 
						|
    <p>
 | 
						|
      For example, if the <i>sector-size</i> of a given file-system is
 | 
						|
      2048 bytes, and SQLite opens a file and writes a 1024 byte block
 | 
						|
      of data to offset 3072 of the file, then according to the model 
 | 
						|
      the second sector of the file is in the transient state. If a 
 | 
						|
      power failure or operating system crash occurs before or during
 | 
						|
      the next call to xSync() on the file handle, then following system
 | 
						|
      recovery SQLite assumes that all file data between byte offsets 2048 
 | 
						|
      and 4095, inclusive, is invalid. It also assumes that since the first
 | 
						|
      sector of the file, containing the data from byte offset 0 to 2047 
 | 
						|
      inclusive, is valid, since it was not in a transient state when the 
 | 
						|
      crash occurred.
 | 
						|
    <p>
 | 
						|
      Assuming that any and all sectors in the transient state may be 
 | 
						|
      corrupted following a power or system failure is a very pessimistic
 | 
						|
      approach. Some modern systems provide more sophisticated guarantees
 | 
						|
      than this. SQLite allows the VFS implementation to specify at runtime
 | 
						|
      that the current platform supports zero or more of the following 
 | 
						|
      properties:
 | 
						|
    <ul>
 | 
						|
      <li><p>The <b>safe-append</b> property. If a system supports the
 | 
						|
          <i>safe-append</i> property, it means that when a file is extended
 | 
						|
          the new data is written to the persistent media before the size
 | 
						|
          of the file itself is updated. This guarantees that if a failure
 | 
						|
          occurs after a file has been extended, following recovery 
 | 
						|
          the write operations that extended the file will appear to have 
 | 
						|
          succeeded or not occurred at all. It is not possible for invalid
 | 
						|
          or garbage data to appear in the extended region of the file.
 | 
						|
      <li><p>The <b>atomic-write</b> property. A system that supports this
 | 
						|
          property also specifies the size or sizes of the blocks that it
 | 
						|
          is capable of writing. Valid sizes are powers of two greater than
 | 
						|
          512. If a write operation modifies a block of <i>n</i> bytes,
 | 
						|
          where <i>n</i> is one of the block sizes for which <i>atomic-write</i>
 | 
						|
          is supported, then it is impossible for an aligned write of <i>n</i>
 | 
						|
          bytes to cause data corruption. If a failure occurs after such 
 | 
						|
          a write operation and before the applicable file handle is
 | 
						|
          <i>synced</i>, then following recovery it will appear as if the
 | 
						|
          write operation succeeded or did not take place at all. It is not
 | 
						|
          possible that only part of the data specified by the write operation
 | 
						|
          was written to persistent media, nor is it possible for any content
 | 
						|
          of the sectors spanned by the write operation to be replaced with
 | 
						|
          garbage data, as it is normally assumed to be.
 | 
						|
      <li><p>The <b>sequential-write</b> property. A system that supports the
 | 
						|
          <i>sequential-write</i> property guarantees that the various write
 | 
						|
          operations on files within the same file-system are written to the
 | 
						|
          persistent media in the same order that they are performed by the
 | 
						|
          application and that each operation is concluded before the next
 | 
						|
          is begun. If a system supports the <i>sequential-write</i> 
 | 
						|
          property, then the model used to determine the possible states of
 | 
						|
          the file-system following a failure is different. 
 | 
						|
          <p>If a system supports <i>sequential-write</i> it is assumed that 
 | 
						|
          <i>syncing</i> any file within the file system flushes all write
 | 
						|
          operations on all files (not just the <i>synced</i> file) to
 | 
						|
          the persistent media. If a failure does occur, it is not known
 | 
						|
          whether or not any of the write operations performed by SQLite 
 | 
						|
          since the last time a file was <i>synced</i>. SQLite is able to
 | 
						|
          assume that if the write operations of unknown status are arranged
 | 
						|
          in the order that they occurred:
 | 
						|
          <ol> 
 | 
						|
            <li> the first <i>n</i> operations will have been executed 
 | 
						|
                 successfully,
 | 
						|
            <li> the next operation puts all device sectors that it modifies
 | 
						|
                 into the transient state, so that following recovery each
 | 
						|
                 sector may be partially written, completely written, not
 | 
						|
                 written at all or populated with garbage data,
 | 
						|
            <li> the remaining operations will not have had any effect on
 | 
						|
                 the contents of the file-system.
 | 
						|
          </ol> 
 | 
						|
    </ul>
 | 
						|
    <h3 id=fs_assumption_details>Failure Related Assumption Details</h3>
 | 
						|
    <p>
 | 
						|
      This section describes how the assumptions presented in the parent
 | 
						|
      section apply to the individual API functions and operations provided 
 | 
						|
      by the VFS to SQLite for the purposes of modifying the contents of the
 | 
						|
      file-system.
 | 
						|
    <p>
 | 
						|
      SQLite manipulates the contents of the file-system using a combination
 | 
						|
      of the following four types of operation:
 | 
						|
    <ul>
 | 
						|
      <li> <b>Create file</b> operations. SQLite may create new files
 | 
						|
           within the file-system by invoking the xOpen() method of
 | 
						|
           the sqlite3_io_methods object.
 | 
						|
      <li> <b>Delete file</b> operations. SQLite may remove files from the
 | 
						|
           file system by calling the xDelete() method of the
 | 
						|
           sqlite3_io_methods object.
 | 
						|
      <li> <b>Truncate file</b> operations. SQLite may truncate existing 
 | 
						|
           files by invoking the xTruncate() method of the sqlite3_file 
 | 
						|
           object.
 | 
						|
      <li> <b>Write file</b> operations. SQLite may modify the contents
 | 
						|
           and increase the size of a file by files by invoking the xWrite() 
 | 
						|
           method of the sqlite3_file object.
 | 
						|
    </ul>
 | 
						|
    <p>
 | 
						|
      Additionally, all VFS implementations are required to provide the
 | 
						|
      <i>sync file</i> operation, accessed via the xSync() method of the
 | 
						|
      sqlite3_file object, used to flush create, write and truncate operations
 | 
						|
      on a file to the persistent storage medium.
 | 
						|
    <p>
 | 
						|
      The formalized assumptions in this section refer to <i>system failure</i>
 | 
						|
      events.  In this context, this should be interpreted as any failure that
 | 
						|
      causes the system to stop operating. For example a power failure or
 | 
						|
      operating system crash.
 | 
						|
    <p>
 | 
						|
      SQLite does not assume that a <b>create file</b> operation has actually
 | 
						|
      modified the file-system records within persistent storage until
 | 
						|
      after the file has been successfully <i>synced</i>.
 | 
						|
<p class=req id=A21001>
 | 
						|
      If a system failure occurs during or after a "create file"
 | 
						|
      operation, but before the created file has been <i>synced</i>, then 
 | 
						|
      SQLite assumes that it is possible that the created file may not
 | 
						|
      exist following system recovery.
 | 
						|
</p>    <p>
 | 
						|
      Of course, it is also possible that it does exist following system
 | 
						|
      recovery.
 | 
						|
<p class=req id=A21002>
 | 
						|
      If a "create file" operation is executed by SQLite, and then the
 | 
						|
      created file <i>synced</i>, then SQLite assumes that the file-system
 | 
						|
      modifications corresponding to the "create file" operation have been
 | 
						|
      committed to persistent media. It is assumed that if a system
 | 
						|
      failure occurs any time after the file has been successfully 
 | 
						|
      <i>synced</i>, then the file is guaranteed to appear in the file-system
 | 
						|
      following system recovery.
 | 
						|
</p>    <p>
 | 
						|
      A <b>delete file</b> operation (invoked by a call to the VFS xDelete() 
 | 
						|
      method) is assumed to be an atomic and durable operation. 
 | 
						|
    </p>
 | 
						|
<p class=req id=A21003>
 | 
						|
      If a system failure occurs at any time after a "delete file" 
 | 
						|
      operation (call to the VFS xDelete() method) returns successfully, it is
 | 
						|
      assumed that the file-system will not contain the deleted file following
 | 
						|
      system recovery.
 | 
						|
</p><p class=req id=A21004>
 | 
						|
      If a system failure occurs during a "delete file" operation,
 | 
						|
      it is assumed that following system recovery the file-system will 
 | 
						|
      either contain the file being deleted in the state it was in before
 | 
						|
      the operation was attempted, or not contain the file at all. It is 
 | 
						|
      assumed that it is not possible for the file to have become corrupted
 | 
						|
      purely as a result of a failure occurring during a "delete file" 
 | 
						|
      operation.
 | 
						|
</p>    <p>
 | 
						|
      The effects of a <b>truncate file</b> operation are not assumed to
 | 
						|
      be made persistent until after the corresponding file has been
 | 
						|
      <i>synced</i>.
 | 
						|
<p class=req id=A21005>
 | 
						|
      If a system failure occurs during or after a "truncate file"
 | 
						|
      operation, but before the truncated file has been <i>synced</i>, then 
 | 
						|
      SQLite assumes that the size of the truncated file is either as large
 | 
						|
      or larger than the size that it was to be truncated to.
 | 
						|
</p><p class=req id=A21006>
 | 
						|
      If a system failure occurs during or after a "truncate file"
 | 
						|
      operation, but before the truncated file has been <i>synced</i>, then 
 | 
						|
      it is assumed that the contents of the file up to the size that the
 | 
						|
      file was to be truncated to are not corrupted.
 | 
						|
</p>    <p>
 | 
						|
      The above two assumptions may be interpreted to mean that if a 
 | 
						|
      system failure occurs after file truncation but before the truncated
 | 
						|
      file is <i>synced</i>, the contents of the file following the point
 | 
						|
      at which it was to be truncated may not be trusted. They may contain 
 | 
						|
      the original file data, or may contain garbage.
 | 
						|
<p class=req id=A21007>
 | 
						|
      If a "truncate file" operation is executed by SQLite, and then the
 | 
						|
      truncated file <i>synced</i>, then SQLite assumes that the file-system
 | 
						|
      modifications corresponding to the "truncate file" operation have been
 | 
						|
      committed to persistent media. It is assumed that if a system
 | 
						|
      failure occurs any time after the file has been successfully 
 | 
						|
      <i>synced</i>, then the effects of the file truncation are guaranteed
 | 
						|
      to appear in the file system following recovery.
 | 
						|
</p>    <p>
 | 
						|
      A <b>write file</b> operation modifies the contents of an existing file
 | 
						|
      within the file-system. It may also increase the size of the file.
 | 
						|
      The effects of a <i>write file</i> operation are not assumed to
 | 
						|
      be made persistent until after the corresponding file has been
 | 
						|
      <i>synced</i>.
 | 
						|
<p class=req id=A21008>
 | 
						|
      If a system failure occurs during or after a "write file"
 | 
						|
      operation, but before the corresponding file has been <i>synced</i>, 
 | 
						|
      then it is assumed that the content of all sectors spanned by the
 | 
						|
      <i>write file</i> operation are untrustworthy following system 
 | 
						|
      recovery. This includes regions of the sectors that were not
 | 
						|
      actually modified by the write file operation.
 | 
						|
</p><p class=req id=A21011>
 | 
						|
      If a system failure occurs on a system that supports the 
 | 
						|
      <i>atomic-write</i> property for blocks of size <i>N</i> bytes
 | 
						|
      following an aligned write of <i>N</i> 
 | 
						|
      bytes to a file but before the file has been successfully <i>synced</i>,
 | 
						|
      then is assumed following recovery that all sectors spanned by the
 | 
						|
      write operation were correctly updated, or that none of the sectors were
 | 
						|
      modified at all.
 | 
						|
</p><p class=req id=A21012>
 | 
						|
      If a system failure occurs on a system that supports the 
 | 
						|
      <i>safe-append</i> following a write operation that appends data
 | 
						|
      to the end of the file without modifying any of the existing file 
 | 
						|
      content but before the file has been successfully <i>synced</i>,
 | 
						|
      then is assumed following recovery that either the data was
 | 
						|
      correctly appended to the file, or that the file size remains 
 | 
						|
      unchanged. It is assumed that it is impossible that the file be
 | 
						|
      extended but populated with incorrect data.
 | 
						|
</p><p class=req id=A21013>
 | 
						|
      Following a system recovery, if a device sector is deemed to be
 | 
						|
      untrustworthy as defined by A21008 and neither A21011 or A21012 
 | 
						|
      apply to the range of bytes written, then no assumption can be
 | 
						|
      made about the content of the sector following recovery. It is
 | 
						|
      assumed that it is possible for such a sector to be written 
 | 
						|
      correctly, not written at all, populated with garbage data or any
 | 
						|
      combination thereof.
 | 
						|
</p><p class=req id=A21009>
 | 
						|
      If a system failure occurs during or after a "write file"
 | 
						|
      operation that causes the file to grow, but before the corresponding 
 | 
						|
      file has been <i>synced</i>, then it is assumed that the size of 
 | 
						|
      the file following recovery is as large or larger than it was when
 | 
						|
      it was most recently <i>synced</i>.
 | 
						|
</p>    <p>
 | 
						|
      If a system supports the <i>sequential-write</i> property, then further
 | 
						|
      assumptions may be made with respect to the state of the file-system
 | 
						|
      following recovery from a <i>system failure</i>. Specifically, it is
 | 
						|
      assumed that create, truncate, delete and write file operations are
 | 
						|
      applied to the persistent representation in the same order as they 
 | 
						|
      are performed by SQLite. Furthermore, it is assumed that the 
 | 
						|
      file-system waits until one operation is safely written to the 
 | 
						|
      persistent media before the next is attempted, just as if the relevant
 | 
						|
      file were <i>synced</i> following each operation.
 | 
						|
<p class=req id=A21014>
 | 
						|
      If a system failure occurs on a system that supports the
 | 
						|
      <i>sequential-write</i> property, then it is assumed that all 
 | 
						|
      operations completed before the last time any file was <i>synced</i> 
 | 
						|
      have been successfully committed to persistent media.
 | 
						|
</p><p class=req id=A21015>
 | 
						|
      If a system failure occurs on a system that supports the
 | 
						|
      <i>sequential-write</i> property, then it is assumed that the set
 | 
						|
      of possible states that the file-system may be in following recovery
 | 
						|
      is the same as if each of the write operations performed since the most
 | 
						|
      recent time a file was <i>synced</i> was itself followed by a <i>sync
 | 
						|
      file</i> operation, and that the system failure may have occurred during
 | 
						|
      any of the write or <i>sync file</i> operations.
 | 
						|
</p><!--
 | 
						|
    <p>
 | 
						|
      The return value of the xSectorSize() method, the <i>sector-size</i>, is
 | 
						|
      expected by SQLite to be a power of 2 value greater than or equal to 512.
 | 
						|
    <p class=todo> 
 | 
						|
      What does it do if this is not the case? If the sector size is less
 | 
						|
      than 512 then 512 is used instead. How about a non power-of-two value?
 | 
						|
      UPDATE: How this situation is handled should be described in the API
 | 
						|
      requirements. Here we can just refer to the other document.
 | 
						|
    <p>
 | 
						|
      SQLite assumes that files are stored and written to within the
 | 
						|
      file-system as a collection of blocks (hereafter sectors) of data, each
 | 
						|
      <i>sector-size</i> bytes in size. This model is used to derive
 | 
						|
      the following assumptions related to the expected state of the
 | 
						|
      file-system following a power failure or operating system crash.
 | 
						|
    <ul>
 | 
						|
      <li>
 | 
						|
          After part or all of a file sector has been modified
 | 
						|
          using the xWrite() method of an open file-handle, the sector
 | 
						|
          is said to be in a transient state, where the operating system
 | 
						|
          makes no guarantees about the actual content of the sector on the
 | 
						|
          persistent media. The sector remains in the transient state until
 | 
						|
          the next successful call to xSync() on the same file-handle 
 | 
						|
          returns. If a power failure or operating system crash occurs, then
 | 
						|
          part or all of all sectors in the transient state when the crash
 | 
						|
          occurred may contain invalid data following system recovery.
 | 
						|
      <li>
 | 
						|
          Following a power failure or operating system crash, the content
 | 
						|
          of all sectors that were not in a transient state when the crash
 | 
						|
          occurred may be trusted.
 | 
						|
    </ul>
 | 
						|
    <p class=todo>
 | 
						|
      What do we assume about the other three file-system write 
 | 
						|
      operations - xTruncate(), xDelete() and "create file"?
 | 
						|
    <p>
 | 
						|
      The xDeviceCharacteristics() method returns a set of flags, 
 | 
						|
      indicating which of the following properties (if any) the 
 | 
						|
      file-system provides:
 | 
						|
    <ul>
 | 
						|
      <li>The <b><i>sequential IO</i></b> property. If a file-system has this 
 | 
						|
          property, then in the event of a crash at most a single sector
 | 
						|
          may contain invalid data. The file-system guarantees
 | 
						|
      <li>The <b><i>safe-append</i></b> property.
 | 
						|
      <li>The <b><i>atomic write</i></b> property.
 | 
						|
    </ul>
 | 
						|
    <p class=todo>
 | 
						|
      Write an explanation as to how the file-system properties influence
 | 
						|
      the model used to predict file damage after a catastrophe.
 | 
						|
 -->
 | 
						|
<h1 id=database_connections>Database Connections</h1>
 | 
						|
  <p>
 | 
						|
    Within this document, the term <i>database connection</i> has a slightly
 | 
						|
    different meaning from that which one might assume. The handles returned
 | 
						|
    by the <code>sqlite3_open()</code> and <code>sqlite3_open16()</code>
 | 
						|
    APIs (<span class=todo>reference</span>) are referred to as <i>database
 | 
						|
    handles</i>.  A <i>database connection</i> is a connection to a single
 | 
						|
    database file using a single file-handle, which is held open for the
 | 
						|
    lifetime of the connection. Using the SQL ATTACH syntax, multiple
 | 
						|
    <i>database connections</i> may be accessed via a single <i>database
 | 
						|
    handle</i>. Or, using SQLite's <i>shared-cache mode</i> feature, multiple
 | 
						|
    <i>database handles</i> may access a single <i>database connection</i>.
 | 
						|
  <p>
 | 
						|
    Usually, a new <i>database connection</i> is opened whenever the user opens
 | 
						|
    new <i>database handle</i> on a real database file (not an in-memory
 | 
						|
    database) or when a database file is attached to an existing <i>database
 | 
						|
    connection</i> using the SQL ATTACH syntax. However if the <i>shared-cache
 | 
						|
    mode</i> feature is enabled, then the database file may be accessed through
 | 
						|
    an existing <i>database connection</i>. For more information on
 | 
						|
    <i>shared-cache mode</i>, refer to <span class=todo>Reference</span>.  The
 | 
						|
    various IO operations required to open a new connection are detailed in
 | 
						|
    section <cite>open_new_connection</cite> of this document.
 | 
						|
  <p>
 | 
						|
    Similarly, a <i>database connection</i> is usually closed when the user
 | 
						|
    closes a <i>database handle</i> that is open on a real database file or
 | 
						|
    has had one or more real database files attached to it using the ATTACH
 | 
						|
    mechanism, or when a real database file is detached from a <i>database
 | 
						|
    connection</i> using the DETACH syntax. Again, the exception is if
 | 
						|
    <i>shared-cache mode</i> is enabled. In this case, a <i>database
 | 
						|
    connection</i> is not closed until its number of users reaches zero.
 | 
						|
    The IO related steps required to close a <i>database connection</i> are
 | 
						|
    described in section <cite>closing_database_connection</cite>.
 | 
						|
  <p class=todo>
 | 
						|
    After sections 4 and 5 are finished, come back here and see if we can add a
 | 
						|
    list of state items associated with each database connection to make things
 | 
						|
    easier to understand. i.e each database connection has a file handle, a set
 | 
						|
    of entries in the page cache, an expected page size etc.
 | 
						|
  <h2 id=open_new_connection>Opening a New Connection</h2>
 | 
						|
    <p>
 | 
						|
      This section describes the VFS operations that take place when a
 | 
						|
      new database connection is created. 
 | 
						|
    <p>
 | 
						|
      Opening a new database connection is a two-step process:
 | 
						|
    <ol>
 | 
						|
      <li> A file-handle is opened on the database file.
 | 
						|
      <li> If step 1 was successful, an attempt is made to read the 
 | 
						|
           <i>database file header</i> from the database file using the 
 | 
						|
           new file-handle.
 | 
						|
    </ol>
 | 
						|
    <p>
 | 
						|
      In step 2 of the procedure above, the database file is not locked
 | 
						|
      before it is read from. This is the only exception to the locking 
 | 
						|
      rules described in section <cite>reading_data</cite>.
 | 
						|
    <p>
 | 
						|
      The reason for attempting to read the <i>database file header</i>
 | 
						|
      is to determine the <i>page-size</i> used by the database file. 
 | 
						|
      Because it is not possible to be certain as to the <i>page-size</i> 
 | 
						|
      without holding at least a <i>shared lock</i> on the database file
 | 
						|
      (because some other <i>database connection</i> might have changed it
 | 
						|
      since the <i>database file header</i> was read), the value read from the
 | 
						|
      <i>database file header</i> is known as the <i>expected page size</i>. 
 | 
						|
<p class=req id=H35060>
 | 
						|
When a new <i>database connection</i> is required, SQLite shall attempt
 | 
						|
to open a file-handle on the database file. If the attempt fails, then
 | 
						|
no new <i>database connection</i> is created and an error returned.
 | 
						|
<p class=req id=H35070>
 | 
						|
When a new <i>database connection</i> is required, after opening the
 | 
						|
new file-handle, SQLite shall attempt to read the first 100 bytes
 | 
						|
of the database file. If the attempt fails for any other reason than
 | 
						|
that the opened file is less than 100 bytes in size, then
 | 
						|
the file-handle is closed, no new <i>database connection</i> is created
 | 
						|
and an error returned instead.
 | 
						|
<p class=req id=H35080>
 | 
						|
If the <i>database file header</i> is successfully read from a newly
 | 
						|
opened database file, the connections <i>expected page-size</i> shall
 | 
						|
be set to the value stored in the <i>page-size field</i> of the
 | 
						|
database header.
 | 
						|
<p class=req id=H35090>
 | 
						|
If the <i>database file header</i> cannot be read from a newly opened
 | 
						|
database file (because the file is less than 100 bytes in size), the
 | 
						|
connections <i>expected page-size</i> shall be set to the compile time
 | 
						|
value of the SQLITE_DEFAULT_PAGESIZE option.
 | 
						|
  <h2 id=closing_database_connection>Closing a Connection</h2>
 | 
						|
    <p>
 | 
						|
      This section describes the VFS operations that take place when an
 | 
						|
      existing database connection is closed (destroyed). 
 | 
						|
    <p>
 | 
						|
      Closing a database connection is a simple matter. The open VFS 
 | 
						|
      file-handle is closed and in-memory <i>page cache</i> related resources
 | 
						|
      are released. 
 | 
						|
<p class=req id=H35400>
 | 
						|
When a <i>database connection</i> is closed, SQLite shall close the
 | 
						|
associated file handle at the VFS level.
 | 
						|
<p class=req id=H35430>
 | 
						|
When a <i>database connection</i> is closed, all associated <i>page
 | 
						|
cache</i> entries shall be discarded.
 | 
						|
<h1 id=page_cache>The Page Cache</h1>
 | 
						|
  <p>
 | 
						|
    The contents of an SQLite database file are formatted as a set of 
 | 
						|
    fixed size pages. See <cite>ff_sqlitert_requirements</cite> for a
 | 
						|
    complete description of the format used. The <i>page size</i> used
 | 
						|
    for a particular database is stored as part of the database file
 | 
						|
    header at a well-known offset within the first 100 bytes of the 
 | 
						|
    file. Almost all read and write operations performed by SQLite on
 | 
						|
    database files are done on blocks of data <i>page-size</i> bytes
 | 
						|
    in size. 
 | 
						|
  <p>
 | 
						|
    All SQLite database connections running within a single process share
 | 
						|
    a single <i>page cache</i>. The <i>page cache</i> caches data read from
 | 
						|
    database files in main-memory on a per-page basis. When SQLite requires
 | 
						|
    data from a database file to satisfy a database query, it checks the <i>
 | 
						|
    page cache</i> for usable cached versions of the required database
 | 
						|
    pages before loading it from the database file. If no usable cache
 | 
						|
    entry can be found and the database page data is loaded from the database
 | 
						|
    file, it is cached in the <i>page cache</i> in case the same data is 
 | 
						|
    needed again later. Because reading from the database file is assumed to
 | 
						|
    be an order of magnitude slower than reading from main-memory, caching
 | 
						|
    database page content in the <i>page cache</i> to minimize the number
 | 
						|
    of read operations performed on the database file is a significant
 | 
						|
    performance enhancement.
 | 
						|
  <p>
 | 
						|
    The <i>page cache</i> is also used to buffer database write operations.
 | 
						|
    When SQLite is required to modify one of more of the <i>database pages</i>
 | 
						|
    that make up a database file, it first modifies the cached version of
 | 
						|
    the page in the <i>page cache</i>. At that point the page is considered
 | 
						|
    a "dirty" page. At some point later on, the new content of the "dirty"
 | 
						|
    page is copied from the <i>page cache</i> into the database file via
 | 
						|
    the VFS interface. Buffering writes in the <i>page cache</i> can reduce
 | 
						|
    the number of write operations required on the database file (in cases
 | 
						|
    where the same page is updated twice) and allows optimizations based
 | 
						|
    on the assumptions outlined in section <cite>fs_performance</cite>.
 | 
						|
  <p>
 | 
						|
    Database read and write operations, and the way in which they interact
 | 
						|
    with and use the <i>page cache</i>, are described in detail in sections
 | 
						|
    <cite>reading_data</cite> and <cite>writing_data</cite> of this document,
 | 
						|
    respectively.
 | 
						|
  <p>
 | 
						|
    At any one time, the <i>page cache</i> contains zero or more <i>page cache
 | 
						|
    entries</i>, each of which has the following data associated with it:
 | 
						|
  <ul>
 | 
						|
    <li><p>
 | 
						|
      A reference to <b>the associated <i>database connection</i></b>. Each
 | 
						|
      entry in the <i>page cache</i> is associated with a single <i>database
 | 
						|
      connection</i>; the <i>database connection</i> that created the entry. 
 | 
						|
      A <i>page cache entry</i> is only ever used by the <i>database 
 | 
						|
      connection</i> that created it. Page cache entries are not shared between
 | 
						|
      <i>database connections</i>.
 | 
						|
    <li><p>
 | 
						|
      The <b><i>page number</i></b> of the cached page. Pages are sequentially
 | 
						|
      numbered within a database file starting from page 1 (page 1 begins at
 | 
						|
      byte offset 0). Refer to <cite>ff_sqlitert_requirements</cite> for
 | 
						|
      details.
 | 
						|
    <li><p>
 | 
						|
      The <b>cached data</b>; a blob of data <i>page-size</i> bytes in size.
 | 
						|
  </ul>
 | 
						|
  <p>
 | 
						|
    The first two elements in the list above, the associated <i>database
 | 
						|
    connection</i> and the <i>page number</i>, uniquely identify the
 | 
						|
    <i>page cache entry</i>. At no time may the <i>page cache</i> contain two
 | 
						|
    entries for which both the <i>database connection</i> and <i>page 
 | 
						|
    number</i> are identical. Or, put another way, a single <i>database
 | 
						|
    connection</i> never caches more than one copy of a database page
 | 
						|
    within the <i>page cache</i>.
 | 
						|
  <p>
 | 
						|
    At any one time, each <i>page cache entry</i> may be said to be a <i>clean
 | 
						|
    page</i>, a <i>non-writable dirty page</i> or a <i>writable dirty page</i>,
 | 
						|
    according to the following definitions:
 | 
						|
  <ul>
 | 
						|
    <li> <p>A <b><i>clean page</i></b> is one for which the cached data 
 | 
						|
         currently matches the contents of the corresponding page of 
 | 
						|
         the database file. The page has not been modified since it was
 | 
						|
         loaded from the file.
 | 
						|
    <li> <p>A <b><i>dirty page</i></b> is a <i>page cache entry</i> for which
 | 
						|
         the cached data has been modified since it was loaded from the database
 | 
						|
         file, and so no longer matches the current contents of the
 | 
						|
         corresponding database file page. A <i>dirty page</i> is one that is
 | 
						|
         currently buffering a modification made to the database file as part
 | 
						|
         of a <i>write transaction</i>. 
 | 
						|
    <li> <p>Within this document, the term <b><i>non-writable dirty
 | 
						|
         page</i></b> is used specifically to refer to a <i>page cache
 | 
						|
         entry</i> with modified content for which it is not yet safe to update
 | 
						|
         the database file with.  It is not safe to update a database file with
 | 
						|
         a buffered write if a power or system failure that occurs during or
 | 
						|
         soon after the update may cause the database to become corrupt
 | 
						|
         following system recovery, according to the assumptions made in
 | 
						|
         section <cite>fs_assumption_details</cite>.
 | 
						|
    <li> <p>A <i>dirty page</i> for which it would be safe to update the
 | 
						|
         corresponding database file page with the modified contents of 
 | 
						|
         without risking database corruption is known as a 
 | 
						|
         <b><i>writable dirty page</i></b>.
 | 
						|
  </ul>
 | 
						|
  <p>
 | 
						|
    The exact logic used to determine if a <i>page cache entry</i> with
 | 
						|
    modified content is a <i>dirty page</i> or <i>writable page</i> is
 | 
						|
    presented in section <cite>page_cache_algorithms</cite>.
 | 
						|
  <p>
 | 
						|
    Because main-memory is a limited resource, the <i>page cache</i> cannot
 | 
						|
    be allowed to grow indefinitely. As a result, unless all database files
 | 
						|
    opened by database connections within the process are quite small,
 | 
						|
    sometimes data must be discarded from the <i>page cache</i>. In practice
 | 
						|
    this means <i>page cache entries</i> must be purged to make room
 | 
						|
    for new ones. If a <i>page cache entry</i> being removed from the <i>page
 | 
						|
    cache</i> to free main-memory is a <i>dirty page</i>, then its contents
 | 
						|
    must be saved into the database file before it can be discarded without
 | 
						|
    data loss. The following two sub-sections describe the algorithms used by
 | 
						|
    the <i>page cache</i> to determine exactly when existing <i>page cache
 | 
						|
    entries</i> are purged (discarded).
 | 
						|
  <h2>Page Cache Configuration</h2>
 | 
						|
    <p class=todo>
 | 
						|
      Describe the parameters set to configure the page cache limits.
 | 
						|
  <h2 id=page_cache_algorithms>Page Cache Algorithms</h2>
 | 
						|
    <p class=todo>
 | 
						|
      Requirements describing the way in which the configuration parameters
 | 
						|
      are used. About LRU etc.
 | 
						|
<h1 id=reading_data>Reading Data</h1>
 | 
						|
  <p>
 | 
						|
    In order to return data from the database to the user, for example as
 | 
						|
    the results of a SELECT query, SQLite must at some point read data
 | 
						|
    from the database file. Usually, data is read from the database file in 
 | 
						|
    aligned blocks of <i>page-size</i> bytes. The exception is when the
 | 
						|
    database file header fields are being inspected, before the
 | 
						|
    <i>page-size</i> used by the database can be known.
 | 
						|
  <p>
 | 
						|
    With two exceptions, a <i>database connection</i> must have an open 
 | 
						|
    transaction (either a <i>read-only transaction</i> or a 
 | 
						|
    <i>read/write transaction</i>) on the database before data may be 
 | 
						|
    read from the database file. 
 | 
						|
  <p>
 | 
						|
    The two exceptions are:
 | 
						|
  <ul>
 | 
						|
    <li> When an attempt is made to read the 100 byte <i>database file
 | 
						|
         header</i> immediately after opening the <i>database connection</i>
 | 
						|
         (see section <cite>open_new_connection</cite>). When this occurs
 | 
						|
         no lock is held on the database file.
 | 
						|
    <li> Data read while in the process of opening a read-only transaction
 | 
						|
         (see section <cite>open_read_only_trans</cite>). These read 
 | 
						|
         operations occur after a <i>shared lock</i> is held on the database
 | 
						|
         file.
 | 
						|
  </ul>
 | 
						|
  <p>
 | 
						|
    Once a transaction has been opened, reading data from a database 
 | 
						|
    connection is a simple operation. Using the xRead() method of the 
 | 
						|
    file-handle open on the database file, the required database file 
 | 
						|
    pages are read one at a time. SQLite never reads partial pages and
 | 
						|
    always uses a single call to xRead() for each required page. 
 | 
						|
   <p>
 | 
						|
    After reading the data for a database page, SQLite stores the raw
 | 
						|
    page of data in the <i>page cache</i>. Each time a page of data is 
 | 
						|
    required by the upper layers, the <i>page cache</i> is queried
 | 
						|
    to see if it contains a copy of the required page stored by
 | 
						|
    the current <i>database connection</i>. If such an entry can be
 | 
						|
    found, then the required data is read from the <i>page cache</i> instead
 | 
						|
    of the database file. Only a connection with an open transaction
 | 
						|
    transaction (either a <i>read-only transaction</i> or a 
 | 
						|
    <i>read/write transaction</i>) on the database may read data from the
 | 
						|
    <i>page cache</i>. In this sense reading from the <i>page cache</i> is no
 | 
						|
    different to reading from the <i>database file</i>.
 | 
						|
   <p>
 | 
						|
    Refer to section <cite>page_cache_algorithms</cite> for a description 
 | 
						|
    of exactly how and for how long page data is stored in the 
 | 
						|
    <i>page cache</i>.
 | 
						|
<p class=req id=H35010>
 | 
						|
Except for the read operation required by H35070 and those reads made
 | 
						|
as part of opening a read-only transaction, SQLite shall ensure that
 | 
						|
a <i>database connection</i> has an open read-only or read/write
 | 
						|
transaction when any data is read from the <i>database file</i>.
 | 
						|
<p class=req id=H35020>
 | 
						|
Aside from those read operations described by H35070 and H21XXX, SQLite
 | 
						|
shall read data from the database file in aligned blocks of
 | 
						|
<i>page-size</i> bytes, where <i>page-size</i> is the database page size
 | 
						|
used by the database file.
 | 
						|
<p class=req id=H35420>
 | 
						|
SQLite shall ensure that a <i>database connection</i> has an open
 | 
						|
read-only or read/write transaction before using data stored in the <i>page
 | 
						|
cache</i> to satisfy user queries.
 | 
						|
  <h2 id=open_read_only_trans>Opening a Read-Only Transaction</h2>
 | 
						|
    <p>
 | 
						|
      Before data may be read from a <i>database file</i> or queried from
 | 
						|
      the <i>page cache</i>, a <i>read-only transaction</i> must be
 | 
						|
      successfully opened by the associated database connection (this is true
 | 
						|
      even if the connection will eventually write to the database, as a
 | 
						|
      <i>read/write transaction</i> may only be opened by upgrading from a
 | 
						|
      <i>read-only transaction</i>). This section describes the procedure
 | 
						|
      for opening a <i>read-only transaction</i>.
 | 
						|
    <p>
 | 
						|
      The key element of a <i>read-only transaction</i> is that the 
 | 
						|
      file-handle open on the database file obtains and holds a
 | 
						|
      <i>shared-lock</i> on the database file. Because a connection requires
 | 
						|
      an <i>exclusive-lock</i> before it may actually modify the contents
 | 
						|
      of the database file, and by definition while one connection is holding
 | 
						|
      a <i>shared-lock</i> no other connection may hold an 
 | 
						|
      <i>exclusive-lock</i>, holding a <i>shared-lock</i> guarantees that
 | 
						|
      no other process may modify the database file while the <i>read-only
 | 
						|
      transaction</i> remains open. This ensures that <i>read-only
 | 
						|
      transactions</i> are sufficiently isolated from the transactions of
 | 
						|
      other database users (see section <cite>overview</cite>).
 | 
						|
    <p>Obtaining the <i>shared lock</i> itself on the database file is quite
 | 
						|
       simple, SQLite just calls the xLock() method of the database file 
 | 
						|
       handle. Some of the other processes that take place as part of 
 | 
						|
       opening the <i>read-only transaction</i> are quite complex. The 
 | 
						|
       steps that SQLite is required to take to open a <i>read-only
 | 
						|
       transaction</i>, in the order in which they must occur, is as follows:
 | 
						|
    <ol>
 | 
						|
      <li>A <i>shared-lock</i> is obtained on the database file.
 | 
						|
      <li>The connection checks if a <i>hot journal file</i> exists in the
 | 
						|
          file-system. If one does, then it is rolled back before continuing.
 | 
						|
      <li>The connection checks if the data in the <i>page cache</i> may 
 | 
						|
          still be trusted. If not, all page cache data is discarded.
 | 
						|
      <li>If the file-size is not zero bytes and the page cache does not
 | 
						|
          contain valid data for the first page of the database, then the
 | 
						|
          data for the first page must be read from the database.
 | 
						|
    </ol>
 | 
						|
    <p>
 | 
						|
      Of course, an error may occur while attempting any of the 4 steps
 | 
						|
      enumerated above. If this happens, then the <i>shared-lock</i> is 
 | 
						|
      released (if it was obtained) and an error returned to the user. 
 | 
						|
      Step 2 of the procedure above is described in more detail in section
 | 
						|
      <cite>hot_journal_detection</cite>. Section <cite>cache_validation</cite>
 | 
						|
      describes the process identified by step 3 above. Further detail
 | 
						|
      on step 4 may be found in section <cite>read_page_one</cite>.
 | 
						|
<p class=req id=H35100>
 | 
						|
When required to open a <i>read-only transaction</i> using a
 | 
						|
<i>database connection</i>, SQLite shall first attempt to obtain
 | 
						|
a <i>shared-lock</i> on the file-handle open on the database file.
 | 
						|
<p class=req id=H35110>
 | 
						|
If, while opening a <i>read-only transaction</i>, SQLite fails to obtain
 | 
						|
the <i>shared-lock</i> on the database file, then the process is
 | 
						|
abandoned, no transaction is opened and an error returned to the user.
 | 
						|
    <p>
 | 
						|
      The most common reason an attempt to obtain a <i>shared-lock</i> may
 | 
						|
      fail is that some other connection is holding an <i>exclusive</i> or
 | 
						|
      <i>pending lock</i>. However it may also fail because some other
 | 
						|
      error (e.g. an IO or comms related error) occurs within the call to the
 | 
						|
      xLock() method.
 | 
						|
<p class=req id=H35030>
 | 
						|
While opening a <i>read-only transaction</i>, after successfully
 | 
						|
obtaining a <i>shared lock</i> on the database file, SQLite shall
 | 
						|
attempt to detect and roll back a <i>hot journal file</i> associated
 | 
						|
with the same database file.
 | 
						|
<p class=req id=H35120>
 | 
						|
If, while opening a <i>read-only transaction</i>, SQLite encounters
 | 
						|
an error while attempting to detect or roll back a <i>hot journal
 | 
						|
file</i>, then the <i>shared-lock</i> on the database file is released,
 | 
						|
no transaction is opened and an error returned to the user.
 | 
						|
    <p>
 | 
						|
      Section <cite>hot_journal_detection</cite> contains a description of
 | 
						|
      and requirements governing the detection of a hot-journal file referred
 | 
						|
      to in the above requirements.
 | 
						|
<p class=req id=H35040>
 | 
						|
Assuming no errors have occurred, then after attempting to detect and
 | 
						|
roll back a <i>hot journal file</i>, if the <i>page cache</i> contains
 | 
						|
any entries associated with the current <i>database connection</i>,
 | 
						|
then SQLite shall validate the contents of the <i>page cache</i> by
 | 
						|
testing the <i>file change counter</i>.  This procedure is known as
 | 
						|
<i>cache validation</i>.
 | 
						|
    <p>
 | 
						|
      The <i>cache validation</i> process is described in detail in section
 | 
						|
      <cite>cache_validation</cite>
 | 
						|
<p class=req id=H35050>
 | 
						|
If the cache validate procedure prescribed by H35040 is required and
 | 
						|
does not prove that the <i>page cache</i> entries associated with the
 | 
						|
current <i>database connection</i> are valid, then SQLite shall discard
 | 
						|
all entries associated with the current <i>database connection</i> from
 | 
						|
the <i>page cache</i>.
 | 
						|
    <p>
 | 
						|
      The numbered list above notes that the data for the first page of the
 | 
						|
      database file, if it exists and is not already loaded into the <i>page
 | 
						|
      cache</i>, must be read from the database file before the <i>read-only
 | 
						|
      transaction</i> may be considered opened. This is handled by 
 | 
						|
      requirement H35240.
 | 
						|
  <h3 id=hot_journal_detection>Hot Journal Detection</h3>
 | 
						|
    <p>
 | 
						|
      This section describes the procedure that SQLite uses to detect a
 | 
						|
      <i>hot journal file</i>. If a <i>hot journal file</i> is detected,
 | 
						|
      this indicates that at some point the process of writing a 
 | 
						|
      transaction to the database was interrupted and a recovery operation
 | 
						|
      (<i>hot journal rollback</i>) needs to take place. This section does
 | 
						|
      not describe the process of <i>hot journal rollback</i> (see section
 | 
						|
      <cite>hot_journal_rollback</cite>) or the processes by which a
 | 
						|
      <i>hot journal file</i> may be created (see section
 | 
						|
      <cite>writing_data</cite>).
 | 
						|
    <p>
 | 
						|
      The procedure used to detect a <i>hot-journal file</i> is quite
 | 
						|
      complex. The following steps take place:
 | 
						|
      <ol class=spacedlist>
 | 
						|
        <li>Using the VFS xAccess() method, SQLite queries the file-system 
 | 
						|
            to see if the journal file associated with the database exists. 
 | 
						|
            If it does not, then there is no hot-journal file.
 | 
						|
        <li>By invoking the xCheckReservedLock() method of the file-handle
 | 
						|
            opened on the database file, SQLite checks if some other connection
 | 
						|
            holds a <i>reserved lock</i> or greater. If some other connection
 | 
						|
            does hold a <i>reserved lock</i>, this indicates that the other
 | 
						|
            connection is midway through a <i>read/write transaction</i> (see
 | 
						|
            section <cite>writing_data</cite>). In this case the 
 | 
						|
            <i>journal file</i> is not a <i>hot-journal</i> and must not be 
 | 
						|
            rolled back.
 | 
						|
        <li>Using the xFileSize() method of the file-handle opened
 | 
						|
            on the database file, SQLite checks if the database file is 
 | 
						|
            0 bytes in size. If it is, the journal file is not considered
 | 
						|
            to be a <i>hot journal</i> file. Instead of rolling back the
 | 
						|
            journal file, in this case it is deleted from the file-system
 | 
						|
            by calling the VFS xDelete() method. <span class=todo>Technically,
 | 
						|
            there is a race condition here. This step should be moved to
 | 
						|
            after the exclusive lock is held.</span>
 | 
						|
        <li>An attempt is made to upgrade to an <i>exclusive lock</i> on the
 | 
						|
            database file. If the attempt fails, then all locks, including 
 | 
						|
            the recently obtained <i>shared lock</i> are dropped. The attempt
 | 
						|
            to open a <i>read-only transaction</i> has failed. This occurs
 | 
						|
            when some other connection is also attempting to open a 
 | 
						|
            <i>read-only transaction</i> and the attempt to gain the
 | 
						|
            <i>exclusive lock</i> fails because the other connection is also
 | 
						|
            holding a <i>shared lock</i>. It is left to the other connection 
 | 
						|
            to roll back the <i>hot journal</i>.
 | 
						|
            <div style="margin-top:0.5em"></div>
 | 
						|
            It is important that the file-handle lock is upgraded 
 | 
						|
            directly from <i>shared</i> to <i>exclusive</i> in this case,
 | 
						|
            instead of first upgrading to <i>reserved</i> or </i>pending</i>
 | 
						|
            locks as is required when obtaining an <i>exclusive lock</i> to
 | 
						|
            write to the database file (section <cite>writing_data</cite>).
 | 
						|
            If SQLite were to first upgrade to a <i>reserved</i> or
 | 
						|
            <i>pending</i> lock in this scenario, then a second process also
 | 
						|
            trying to open a <i>read-transaction</i> on the database file might
 | 
						|
            detect the <i>reserved</i> lock in step 2 of this process, 
 | 
						|
            conclude that there was no <i>hot journal</i>, and commence
 | 
						|
            reading data from the <i>database file</i>.
 | 
						|
        <li>The xAccess() method is invoked again to detect if the journal 
 | 
						|
            file is still in the file system. If it is, then it is a 
 | 
						|
            hot-journal file and SQLite tries to roll it back (see section
 | 
						|
            <cite>rollback</cite>).
 | 
						|
      </ol>
 | 
						|
    <p class=todo> Master journal file pointers?
 | 
						|
    <p>
 | 
						|
      The following requirements describe step 1 of the above procedure in
 | 
						|
      more detail.
 | 
						|
<p class=req id=H35140>
 | 
						|
When required to attempt to detect a <i>hot-journal file</i>, SQLite
 | 
						|
shall first use the xAccess() method of the VFS layer to check if a
 | 
						|
journal file exists in the file-system.
 | 
						|
<p class=req id=H35510>
 | 
						|
If the call to xAccess() required by H35140 fails (due to an IO error or
 | 
						|
similar), then SQLite shall abandon the attempt to open a <i>read-only
 | 
						|
transaction</i>, relinquish the <i>shared lock</i> held on the database
 | 
						|
file and return an error to the user.
 | 
						|
<p class=req id=H35150>
 | 
						|
When required to attempt to detect a <i>hot-journal file</i>, if the
 | 
						|
call to xAccess() required by H35140 indicates that a journal file does
 | 
						|
not exist, then SQLite shall conclude that there is no <i>hot-journal
 | 
						|
file</i> in the file system and therefore that no <i>hot journal
 | 
						|
rollback</i> is required.
 | 
						|
    <p>
 | 
						|
      The following requirements describe step 2 of the above procedure in
 | 
						|
      more detail.
 | 
						|
<p class=req id=H35160>
 | 
						|
When required to attempt to detect a <i>hot-journal file</i>, if the
 | 
						|
call to xAccess() required by H35140 indicates that a journal file
 | 
						|
is present, then the xCheckReservedLock() method of the database file
 | 
						|
file-handle is invoked to determine whether or not some other
 | 
						|
process is holding a <i>reserved</i> or greater lock on the database
 | 
						|
file.
 | 
						|
<p class=req id=H35520>
 | 
						|
If the call to xCheckReservedLock() required by H35160 fails (due to an
 | 
						|
IO or other internal VFS error), then SQLite shall abandon the attempt
 | 
						|
to open a <i>read-only transaction</i>, relinquish the <i>shared lock</i>
 | 
						|
held on the database file and return an error to the user.
 | 
						|
<p class=req id=H35170>
 | 
						|
If the call to xCheckReservedLock() required by H35160 indicates that
 | 
						|
some other <i>database connection</i> is holding a <i>reserved</i>
 | 
						|
or greater lock on the database file, then SQLite shall conclude that
 | 
						|
there is no <i>hot journal file</i>. In this case the attempt to detect
 | 
						|
a <i>hot journal file</i> is concluded.
 | 
						|
    <p>
 | 
						|
      The following requirements describe step 3 of the above procedure in
 | 
						|
      more detail.
 | 
						|
<p class=req id=H35440>
 | 
						|
If while attempting to detect a <i>hot-journal file</i> the call to
 | 
						|
xCheckReservedLock() indicates that no process holds a <i>reserved</i>
 | 
						|
or greater lock on the <i>database file</i>, then SQLite shall open
 | 
						|
a file handle on the potentially hot journal file using the VFS xOpen()
 | 
						|
method.
 | 
						|
<p class=req id=H35530>
 | 
						|
If the call to xOpen() required by H35440 fails (due to an IO or other
 | 
						|
internal VFS error), then SQLite shall abandon the attempt to open a
 | 
						|
<i>read-only transaction</i>, relinquish the <i>shared lock</i> held on
 | 
						|
the database file and return an error to the user.
 | 
						|
<p class=req id=H35450>
 | 
						|
After successfully opening a file-handle on a potentially hot journal
 | 
						|
file, SQLite shall query the file for its size in bytes using the
 | 
						|
xFileSize() method of the open file handle.
 | 
						|
<p class=req id=H35540>
 | 
						|
If the call to xFileSize() required by H35450 fails (due to an IO or
 | 
						|
other internal VFS error), then SQLite shall abandon the attempt to open
 | 
						|
a <i>read-only transaction</i>, relinquish the <i>shared lock</i> held on
 | 
						|
the database file, close the file handle opened on the journal file and
 | 
						|
return an error to the user.
 | 
						|
<p class=req id=H35460>
 | 
						|
If the size of a potentially hot journal file is revealed to be zero
 | 
						|
bytes by a query required by H35450, then SQLite shall close the
 | 
						|
file handle opened on the journal file and delete the journal file using
 | 
						|
a call to the VFS xDelete() method. In this case SQLite shall conclude
 | 
						|
that there is no <i>hot journal file</i>.
 | 
						|
<p class=req id=H35550>
 | 
						|
If the call to xDelete() required by H35450 fails (due to an IO or
 | 
						|
other internal VFS error), then SQLite shall abandon the attempt to open
 | 
						|
a <i>read-only transaction</i>, relinquish the <i>shared lock</i> held on
 | 
						|
the database file and return an error to the user.
 | 
						|
    <p>
 | 
						|
      The following requirements describe step 4 of the above procedure in
 | 
						|
      more detail.
 | 
						|
<p class=req id=H35470>
 | 
						|
If the size of a potentially hot journal file is revealed to be greater
 | 
						|
than zero bytes by a query required by H35450, then SQLite shall attempt
 | 
						|
to upgrade the <i>shared lock</i> held by the <i>database connection</i>
 | 
						|
on the <i>database file</i> directly to an <i>exclusive lock</i>.
 | 
						|
<p class=req id=H35480>
 | 
						|
If an attempt to upgrade to an <i>exclusive lock</i> prescribed by
 | 
						|
H35470 fails for any reason, then SQLite shall release all locks held by
 | 
						|
the <i>database connection</i> and close the file handle opened on the
 | 
						|
<i>journal file</i>. The attempt to open a <i>read-only transaction</i>
 | 
						|
shall be deemed to have failed and an error returned to the user.
 | 
						|
    <p>
 | 
						|
      Finally, the following requirements describe step 5 of the above
 | 
						|
      procedure in more detail.
 | 
						|
<p class=req id=H35490>
 | 
						|
If, as part of the <i>hot journal file</i> detection process, the
 | 
						|
attempt to upgrade to an <i>exclusive lock</i> mandated by H35470 is
 | 
						|
successful, then SQLite shall query the file-system using the xAccess()
 | 
						|
method of the VFS implementation to test whether or not the journal
 | 
						|
file is still present in the file-system.
 | 
						|
<p class=req id=H35560>
 | 
						|
If the call to xAccess() required by H35490 fails (due to an IO or
 | 
						|
other internal VFS error), then SQLite shall abandon the attempt to open
 | 
						|
a <i>read-only transaction</i>, relinquish the lock held on the
 | 
						|
database file, close the file handle opened on the journal file and
 | 
						|
return an error to the user.
 | 
						|
<p class=req id=H35570>
 | 
						|
If the call to xAccess() required by H35490 reveals that the journal
 | 
						|
file is no longer present in the file system, then SQLite shall abandon
 | 
						|
the attempt to open a <i>read-only transaction</i>, relinquish the
 | 
						|
lock held on the database file, close the file handle opened on the
 | 
						|
journal file and return an SQLITE_BUSY error to the user.
 | 
						|
<p class=req id=H35500>
 | 
						|
If the xAccess() query required by H35490 reveals that the journal
 | 
						|
file is still present in the file system, then SQLite shall conclude
 | 
						|
that the journal file is a <i>hot journal file</i> that needs to
 | 
						|
be rolled back. SQLite shall immediately begin <i>hot journal
 | 
						|
rollback</i>.
 | 
						|
  <h3 id=cache_validation>Cache Validation</h3>
 | 
						|
    <p>
 | 
						|
      When a <i>database connection</i> opens a <i>read transaction</i>, the
 | 
						|
      <i>page cache</i> may already contain data associated with the
 | 
						|
      <i>database connection</i>. However, if another process has modified 
 | 
						|
      the database file since the cached pages were loaded it is possible that
 | 
						|
      the cached data is invalid.
 | 
						|
    <p>
 | 
						|
      SQLite determines whether or not the <i>page cache</i> entries belonging
 | 
						|
      to the <i>database connection</i> are valid or not using the <i>file
 | 
						|
      change counter</i>, a field in the <i>database file header</i>. The
 | 
						|
      <i>file change counter</i> is a 4-byte big-endian integer field stored
 | 
						|
      starting at byte offset 24 of the <i>database file header</i>. Before the
 | 
						|
      conclusion of a <i>read/write transaction</i> that modifies the contents
 | 
						|
      of the database file in any way (see section <cite>writing_data</cite>),
 | 
						|
      the value stored in the <i>file change counter</i> is incremented.  When
 | 
						|
      a <i>database connection</i> unlocks the database file, it stores the
 | 
						|
      current value of the <i>file change counter</i>. Later, while opening a
 | 
						|
      new <i>read-only transaction</i>, SQLite checks the value of the <i>file
 | 
						|
      change counter</i> stored in the database file. If the value has not
 | 
						|
      changed since the database file was unlocked, then the <i>page cache</i>
 | 
						|
      entries can be trusted. If the value has changed, then the <i>page
 | 
						|
      cache</i> entries cannot be trusted and all entries associated with
 | 
						|
      the current <i>database connection</i> are discarded.
 | 
						|
<p class=req id=H35180>
 | 
						|
When a file-handle open on a database file is unlocked, if the
 | 
						|
<i>page cache</i> contains one or more entries belonging to the
 | 
						|
associated <i>database connection</i>, SQLite shall store the value
 | 
						|
of the <i>file change counter</i> internally.
 | 
						|
<p class=req id=H35190>
 | 
						|
When required to perform <i>cache validation</i> as part of opening
 | 
						|
a <i>read transaction</i>, SQLite shall read a 16 byte block
 | 
						|
starting at byte offset 24 of the <i>database file</i> using the xRead()
 | 
						|
method of the <i>database connections</i> file handle.
 | 
						|
    <p class=todo>
 | 
						|
      Why a 16 byte block? Why not 4? (something to do with encrypted
 | 
						|
      databases).
 | 
						|
<p class=req id=H35200>
 | 
						|
While performing <i>cache validation</i>, after loading the 16 byte
 | 
						|
block as required by H35190, SQLite shall compare the 32-bit big-endian
 | 
						|
integer stored in the first 4 bytes of the block to the most
 | 
						|
recently stored value of the <i>file change counter</i> (see H35180).
 | 
						|
If the values are not the same, then SQLite shall conclude that
 | 
						|
the contents of the cache are invalid.
 | 
						|
    <p>
 | 
						|
      Requirement H35050 (section <cite>open_read_only_trans</cite>) 
 | 
						|
      specifies the action SQLite is required to take upon determining that 
 | 
						|
      the cache contents are invalid.
 | 
						|
  <h3 id=read_page_one>Page 1 and the Expected Page Size</h3>
 | 
						|
    <p>
 | 
						|
      As the last step in opening a <i>read transaction</i> on a database
 | 
						|
      file that is more than 0 bytes in size, SQLite is required to load 
 | 
						|
      data for page 1 of the database into the <i>page cache</i>, if it is 
 | 
						|
      not already there. This is slightly more complicated than it seems, 
 | 
						|
      as the database <i>page-size</i> is no known at this point.
 | 
						|
    <p>
 | 
						|
      Even though the database <i>page-size</i> cannot be known for sure,
 | 
						|
      SQLite is usually able to guess correctly by assuming it to be equal to
 | 
						|
      the connections <i>expected page size</i>. The <i>expected page size</i>
 | 
						|
      is the value of the <i>page-size</i> field read from the 
 | 
						|
      <i>database file header</i> while opening the database connection 
 | 
						|
      (see section <cite>open_new_connection</cite>), or the <i>page-size</i>
 | 
						|
      of the database file when the most <i>read transaction</i> was concluded.
 | 
						|
<p class=req id=H35210>
 | 
						|
During the conclusion of a <i>read transaction</i>, before unlocking
 | 
						|
the database file, SQLite shall set the connections
 | 
						|
<i>expected page size</i> to the current database <i>page-size</i>.
 | 
						|
<p class=req id=H35220>
 | 
						|
As part of opening a new <i>read transaction</i>, immediately after
 | 
						|
performing <i>cache validation</i>, if there is no data for database
 | 
						|
page 1 in the <i>page cache</i>, SQLite shall read <i>N</i> bytes from
 | 
						|
the start of the database file using the xRead() method of the
 | 
						|
connections file handle, where <i>N</i> is the connections current
 | 
						|
<i>expected page size</i> value.
 | 
						|
<p class=req id=H35230>
 | 
						|
If page 1 data is read as required by H35230, then the value of the
 | 
						|
<i>page-size</i> field that appears in the database file header that
 | 
						|
consumes the first 100 bytes of the read block is not the same as the
 | 
						|
connections current <i>expected page size</i>, then the
 | 
						|
<i>expected page size</i> is set to this value, the database file is
 | 
						|
unlocked and the entire procedure to open a <i>read transaction</i>
 | 
						|
is repeated.
 | 
						|
<p class=req id=H35240>
 | 
						|
If page 1 data is read as required by H35230, then the value of the
 | 
						|
<i>page-size</i> field that appears in the database file header that
 | 
						|
consumes the first 100 bytes of the read block is the same as the
 | 
						|
connections current <i>expected page size</i>, then the block of data
 | 
						|
read is stored in the <i>page cache</i> as page 1.
 | 
						|
  <h2>Reading Database Data</h2>
 | 
						|
  <p class=todo>
 | 
						|
    Add something about checking the page-cache first etc.
 | 
						|
  <h2>Ending a Read-only Transaction</h2>
 | 
						|
    <p>
 | 
						|
      To end a <i>read-only transaction</i>, SQLite simply relinquishes the
 | 
						|
      <i>shared lock</i> on the file-handle open on the database file. No
 | 
						|
      other action is required.
 | 
						|
<p class=req id=H35130>
 | 
						|
When required to end a <i>read-only transaction</i>, SQLite shall
 | 
						|
relinquish the <i>shared lock</i> held on the database file by
 | 
						|
calling the xUnlock() method of the file-handle.
 | 
						|
    <p>
 | 
						|
      See also requirements H35180 and H35210 above.
 | 
						|
<h1 id=writing_data>Writing Data</h1>
 | 
						|
  <p>
 | 
						|
    Using DDL or DML SQL statements, SQLite users may modify the contents and
 | 
						|
    size of a database file. Exactly how changes to the logical database are
 | 
						|
    translated to modifications to the database file is described in 
 | 
						|
    <cite>ff_sqlitert_requirements</cite>. From the point of view of the
 | 
						|
    sub-systems described in this document, each DDL or DML statement executed
 | 
						|
    results in the contents of zero or more database file pages being 
 | 
						|
    overwritten with new data. A DDL or DML statement may also append or 
 | 
						|
    truncate one or more pages to or from the end of the database file. One 
 | 
						|
    or more DDL and/or DML statements are grouped together to make up a 
 | 
						|
    single <i>write transaction</i>. A <i>write transaction</i> is required 
 | 
						|
    to have the special properties described in section <cite>overview</cite>; 
 | 
						|
    a <i>write transaction</i> must be isolated, durable and atomic.
 | 
						|
  <p>
 | 
						|
    SQLite accomplishes these goals using the following techniques:
 | 
						|
  <ul>
 | 
						|
    <li><p>
 | 
						|
        To ensure that <i>write transactions</i> are <b>isolated</b>, before
 | 
						|
        beginning to modify the contents of the database file to reflect the
 | 
						|
        results of a <i>write transaction</i>, SQLite obtains an <i>exclusive
 | 
						|
        lock</i> on the <i>database file</i>. The lock is not relinquished
 | 
						|
        until the <i>write transaction</i> is concluded. Because reading from
 | 
						|
        the <i>database file</i> requires a <i>shared lock</i> (see section
 | 
						|
        <cite>reading_data</cite>) and holding an <i>exclusive
 | 
						|
        lock</i> guarantees that no other <i>database connection</i> is holding
 | 
						|
        or can obtain a <i>shared lock</i>, this ensures that no other
 | 
						|
        connection may read data from the <i>database file</i> at a point when
 | 
						|
        a <i>write transaction</i> has been partially applied. 
 | 
						|
    <li><p>Ensuring that <i>write transactions</i> are <b>atomic</b> is the most
 | 
						|
        complex task required of the system. In this case, <i>atomic</i> means
 | 
						|
        that even if a system failure occurs, an attempt to commit a <i>write
 | 
						|
        transaction</i> to the database file either results in all changes
 | 
						|
        that are a part of the transaction being successfully applied to the
 | 
						|
        database file, or none of the changes are successfully applied. There
 | 
						|
        is no chance that a subset of the changes only are applied. Hence from
 | 
						|
        the point of view of an external observer, the <i>write transaction</i>
 | 
						|
        appears to be an atomic event. 
 | 
						|
        <p>
 | 
						|
        Of course, it is usually not possible to atomically apply all the
 | 
						|
        changes required by a <i>write transaction</i> to a database file
 | 
						|
        within the file-system. For example, if a <i>write transaction</i>
 | 
						|
        requires ten pages of a database file to be modified, and a power
 | 
						|
        outage causes a system failure after sqlite has modified only five
 | 
						|
        pages, then the database file will almost certainly be in an
 | 
						|
        inconsistent state following system recovery.
 | 
						|
        <p>
 | 
						|
        SQLite solves this problem by using a <i>journal file</i>. In almost
 | 
						|
        all cases, before the <i>database file</i> is modified in any way, 
 | 
						|
        SQLite stores sufficient information in the <i>journal file</i> to
 | 
						|
        allow the original the database file to be reconstructed if a system
 | 
						|
        failure occurs while the database file is being updated to reflect
 | 
						|
        the modifications made by the <i>write transaction</i>. Each time
 | 
						|
        SQLite opens a database file, it checks if such a system failure has
 | 
						|
        occurred and, if so, 
 | 
						|
        reconstructs the database file based on the contents
 | 
						|
        of the journal file. The procedure used to detect whether or not this
 | 
						|
        process, coined <i>hot journal rollback</i>, is required is described
 | 
						|
        in section <cite>hot_journal_detection</cite>. <i>Hot journal rollback
 | 
						|
        </i> itself is described in section <cite>hot_journal_rollback</cite>.
 | 
						|
        <p>
 | 
						|
        The same technique ensures that an SQLite database file cannot be
 | 
						|
        corrupted by a system failure that occurs at an inopportune moment.
 | 
						|
        If a system failure does occur before SQLite has had a chance to
 | 
						|
        execute sufficient <i>sync file</i> operations to ensure that the
 | 
						|
        changes that make up a <i>write transaction</i> have made it safely
 | 
						|
        to persistent storage, then the <i>journal file</i> will be used
 | 
						|
        to restore the database to a known good state following system
 | 
						|
        recovery.
 | 
						|
    <li><p>
 | 
						|
        So that <i>write transactions</i> are <b>durable</b> in the face of
 | 
						|
        a system failure, SQLite executes a <i>sync file</i> operation on the
 | 
						|
        database file before concluding the <i>write transaction</i>
 | 
						|
  </ul>
 | 
						|
  <p>
 | 
						|
    The <i>page cache</i> is used to buffer modifications to the database
 | 
						|
    file image before they are written to the <i>database file</i>. When
 | 
						|
    the contents of a page is required to be modified as the results of
 | 
						|
    an operation within a <i>write transaction</i>, the modified copy is
 | 
						|
    stored in the <i>page cache</i>. Similarly, if new pages are appended
 | 
						|
    to the end of a database file, they are added to the <i>page cache</i>
 | 
						|
    instead of being immediately written to the database file within the
 | 
						|
    file-system. 
 | 
						|
  <p>
 | 
						|
    Ideally, all changes for an entire write transaction are buffered in
 | 
						|
    the page cache until the end of the transaction. When the user commits
 | 
						|
    the transaction, all changes are applied to the database file in the
 | 
						|
    most efficient way possible, taking into account the assumptions 
 | 
						|
    enumerated in section <cite>fs_performance</cite>. Unfortunately, since
 | 
						|
    main-memory is a limited resource, this is not always possible for 
 | 
						|
    large transactions. In this case changes are buffered in the <i>page
 | 
						|
    cache</i> until some internal condition or limit is reached,
 | 
						|
    then written out to the database file in order to free resources
 | 
						|
    as they are required. Section <cite>page_cache_algorithms</cite>
 | 
						|
    describes the circumstances under which changes are flushed through
 | 
						|
    to the database file mid-transaction to free <i>page cache</i> resources.
 | 
						|
  <p>
 | 
						|
    Even if an application or system failure does not occur while a
 | 
						|
    <i>write transaction</i> is in progress, a rollback operation to restore
 | 
						|
    the database file and <i>page cache</i> to the state that it was in before
 | 
						|
    the transaction started may be required. This may occur if the user
 | 
						|
    explicitly requests transaction rollback (by issuing a "ROLLBACK" command),
 | 
						|
    or automatically, as a result of encountering an SQL constraint (see
 | 
						|
    <cite>sql_sqlitert_requirements</cite>). For this reason, the original page
 | 
						|
    content is stored in the <i>journal file</i> before the page is even
 | 
						|
    modified within the <i>page cache</i>.
 | 
						|
  <p class=todo>
 | 
						|
    Introduce the following sub-sections.
 | 
						|
  <h2 id=journal_file_format>Journal File Format</h2>
 | 
						|
    <p>
 | 
						|
      This section describes the format used by an SQLite <i>journal file</i>.
 | 
						|
    <p>
 | 
						|
      A journal file consists of one or more <i>journal headers</i>, zero
 | 
						|
      or more <i>journal records</i> and optionally a <i>master journal
 | 
						|
      pointer</i>. Each journal file always begins with a
 | 
						|
      <i>journal header</i>, followed by zero or more <i>journal records</i>.
 | 
						|
      Following this may be a second <i>journal header</i> followed by a
 | 
						|
      second set of zero or more <i>journal records</i> and so on. There
 | 
						|
      is no limit to the number of <i>journal headers</i> a journal file
 | 
						|
      may contain. Following the <i>journal headers</i> and their accompanying
 | 
						|
      sets of <i>journal records</i> may be the optional <i>master journal
 | 
						|
      pointer</i>. Or, the file may simply end following the final <i>journal
 | 
						|
      record</i>.
 | 
						|
    <p>
 | 
						|
      This section only describes the format of the journal file and the
 | 
						|
      various objects that make it up. But because a journal file may be
 | 
						|
      read by an SQLite process following recovery from a system failure
 | 
						|
      (<i>hot journal rollback</i>, see section
 | 
						|
      <cite>hot_journal_rollback</cite>) it is also important to describe
 | 
						|
      the way the file is created and populated within the file-system
 | 
						|
      using a combination of <i>write file</i>, <i>sync file</i> and
 | 
						|
      <i>truncate file</i> operations. These are described in section
 | 
						|
      <cite>write_transactions</cite>.
 | 
						|
    <h3 id=journal_header_format>Journal Header Format</h3>
 | 
						|
    <p>
 | 
						|
      A <i>journal header</i> is <i>sector-size</i> bytes in size, where <i>
 | 
						|
      sector-size</i> is the value returned by the xSectorSize method of
 | 
						|
      the file handle opened on the database file. Only the first 28 bytes
 | 
						|
      of the <i>journal header</i> are used, the remainder may contain garbage
 | 
						|
      data. The first 28 bytes of each <i>journal header</i> consists of an 
 | 
						|
      eight byte block set to a well-known value, followed by five big-endian 
 | 
						|
      32-bit unsigned integer fields.
 | 
						|
    <center><img src="images/fileformat/journal_header.gif">
 | 
						|
    <p><i>Figure <span class=fig id=figure_journal_header></span> - Journal Header Format</i>
 | 
						|
      </center>
 | 
						|
    <p>
 | 
						|
      Figure <cite>figure_journal_header</cite> graphically depicts the layout
 | 
						|
      of a <i>journal header</i>. The individual fields are described in
 | 
						|
      the following table. The offsets in the 'byte offset' column of the
 | 
						|
      table are relative to the start of the <i>journal header</i>.
 | 
						|
    <table class=striped>
 | 
						|
      <tr><th>Byte offset<th>Size in bytes<th width=100%>Description
 | 
						|
      <tr><td>0<td>8<td>The <b>journal magic</b> field always contains a
 | 
						|
                        well-known 8-byte string value used to identify SQLite
 | 
						|
                        journal files. The well-known sequence of byte values
 | 
						|
                        is:
 | 
						|
                        <pre>0xd9 0xd5 0x05 0xf9 0x20 0xa1 0x63 0xd7</pre>
 | 
						|
      <tr><td>8<td>4<td>This field, the <b>record count</b>, is set to the
 | 
						|
                        number of <i>journal records</i> that follow this
 | 
						|
                        <i>journal header</i> in the <i>journal file</i>.
 | 
						|
      <tr><td>12<td>4<td>The <b>checksum initializer</b> field is set to a 
 | 
						|
                         pseudo-random value. It is used as part of the
 | 
						|
                         algorithm to calculate the checksum for all <i>journal
 | 
						|
                         records</i> that follow this <i>journal header</i>.
 | 
						|
      <tr><td>16<td>4<td>This field, the <b>database page count</b>, is set
 | 
						|
                         to the number of pages that the <i>database file</i>
 | 
						|
                         contained before any modifications associated with
 | 
						|
                         <i>write transaction</i> are applied.
 | 
						|
      <tr><td>20<td>4<td>This field, the <b>sector size</b>, is set to the
 | 
						|
                         <i>sector size</i> of the device on which the 
 | 
						|
                         <i>journal file</i> was created, in bytes. This value
 | 
						|
                         is required when reading the journal file to determine
 | 
						|
                         the size of each <i>journal header</i>.
 | 
						|
      <tr><td>24<td>4<td>The <b>page size</b> field contains the database page
 | 
						|
                         size used by the corresponding <i>database file</i>
 | 
						|
                         when the <i>journal file</i> was created, in bytes.
 | 
						|
    </table>
 | 
						|
    <p>
 | 
						|
      All <i>journal headers</i> are positioned in the file so that they 
 | 
						|
      start at a <i>sector size</i> aligned offset. To achieve this, unused
 | 
						|
      space may be left between the start of the second and subsequent
 | 
						|
      <i>journal headers</i> and the end of the <i>journal records</i>
 | 
						|
      associated with the previous header.
 | 
						|
  <h3 id=journal_record_format>Journal Record Format</h3>
 | 
						|
    <p>
 | 
						|
      Each <i>journal record</i> contains the original data for a database page
 | 
						|
      modified by the <i>write transaction</i>. If a rollback is required, then
 | 
						|
      this data may be used to restore the contents of the database page to the
 | 
						|
      state it was in before the <i>write transaction</i> was started.
 | 
						|
    <center><img src="images/fileformat/journal_record.gif">
 | 
						|
    <p><i>Figure <span class=fig id=figure_journal_record></span> - Journal Record Format</i>
 | 
						|
      </center>
 | 
						|
    <p>
 | 
						|
      A <i>journal record</i>, depicted graphically by figure
 | 
						|
      <cite>figure_journal_record</cite>, contains three fields, as described
 | 
						|
      in the following table. Byte offsets are relative to the start of the
 | 
						|
      <i>journal record</i>.
 | 
						|
    <table class=striped>
 | 
						|
      <tr><th>Byte offset<th>Size in bytes<th width=100%>Description
 | 
						|
      <tr><td>0<td>4<td>The page number of the database page associated with
 | 
						|
                        this <i>journal record</i>, stored as a 4 byte
 | 
						|
                        big-endian unsigned integer.
 | 
						|
      <tr><td>4<td><i>page-size<td>
 | 
						|
                        This field contains the original data for the page,
 | 
						|
                        exactly as it appeared in the database file before the
 | 
						|
                        <i>write transaction</i> began.
 | 
						|
      <tr><td style="white-space: nowrap">4 + <i>page-size</i><td>4<td>
 | 
						|
                        This field contains a checksum value, calculated based
 | 
						|
                        on the contents of the journaled database page data
 | 
						|
                        (the previous field) and the values stored in the
 | 
						|
                        <i>checksum initializer</i> field of the preceding
 | 
						|
                        <i>journal header</i>.
 | 
						|
    </table>
 | 
						|
    <p>
 | 
						|
      The set of <i>journal records</i> that follow a <i>journal header</i>
 | 
						|
      in a <i>journal file</i> are packed tightly together. There are no
 | 
						|
      alignment requirements for <i>journal records</i> as there are for
 | 
						|
      <i>journal headers</i>.
 | 
						|
  <h3>Master Journal Pointer</h3>
 | 
						|
    <p>
 | 
						|
      To support <i>atomic</i> transactions that modify more than one 
 | 
						|
      database file, SQLite sometimes includes a <i>master journal pointer</i>
 | 
						|
      record in a <i>journal file</i>. Multiple file transactions are 
 | 
						|
      described in section <cite>multifile_transactions</cite>. A <i>
 | 
						|
      master journal pointer</i> contains the name of a <i>master journal-file
 | 
						|
      </i> along with a check-sum and some well known values that allow
 | 
						|
      the <i>master journal pointer</i> to be recognized as such when
 | 
						|
      the <i>journal file</i> is read during a rollback operation (section
 | 
						|
      <cite>rollback</cite>).
 | 
						|
    <p>
 | 
						|
      As is the case for a <i>journal header</i>, the start of a <i>master
 | 
						|
      journal pointer</i> is always positioned at a <i>sector size</i> 
 | 
						|
      aligned offset. If the <i>journal record</i> or <i>journal header</i>
 | 
						|
      that appears immediately before the <i>master journal pointer</i> does
 | 
						|
      not end at an aligned offset, then unused space is left between the
 | 
						|
      end of the <i>journal record</i> or <i>journal header</i> and the start
 | 
						|
      of the <i>master journal pointer</i>.
 | 
						|
    <center><img src="images/fileformat/master_journal_ptr.gif">
 | 
						|
    <p><i>Figure <span class=fig id=figure_master_journal_ptr></span> - Master Journal Pointer Format</i>
 | 
						|
      </center>
 | 
						|
    <p>
 | 
						|
      A <i>master journal pointer</i>, depicted graphically by figure
 | 
						|
      <cite>figure_master_journal_ptr</cite>, contains five fields, as 
 | 
						|
      described in the following table. Byte offsets are relative to the 
 | 
						|
      start of the <i>master journal pointer</i>.
 | 
						|
    <table class=striped>
 | 
						|
      <tr><th>Byte offset<th>Size in bytes<th width=100%>Description
 | 
						|
      <tr><td>0<td>4<td>This field, the <b>locking page number</b>, is always
 | 
						|
               set to the page number of the database <i>locking page</i>
 | 
						|
               stored as a 4-byte big-endian integer. The <i>locking page</i>
 | 
						|
               is the page that begins at byte offset 2<super>30</super> of the
 | 
						|
               database file. Even if the database file is large enough to
 | 
						|
               contain the <i>locking page</i>, the <i>locking page</i> is
 | 
						|
               never used to store any data and so the first four bytes of of a
 | 
						|
               valid <i>journal record</i> will never contain this value. For
 | 
						|
               further description of the <i>locking page</i>, refer to
 | 
						|
               <cite>ff_sqlitert_requirements</cite>.
 | 
						|
      <tr><td>4<td><i>name-length</i><td>
 | 
						|
               The <b>master journal name</b> field contains the name of the
 | 
						|
               master journal file, encoded as a utf-8 string. There is no
 | 
						|
               nul-terminator appended to the string.
 | 
						|
      <tr><td>4 + <i>name-length</i><td><i>4<td>
 | 
						|
               The <b>name-length</b> field contains the length of the 
 | 
						|
               previous field in bytes, formatted as a 4-byte big-endian 
 | 
						|
               unsigned integer.
 | 
						|
      <tr><td>8 + <i>name-length</i><td><i>4<td>
 | 
						|
               The <b>checksum</b> field contains a checksum value stored as
 | 
						|
               a 4-byte big-endian signed integer. The checksum value is
 | 
						|
               calculated as the sum of the bytes that make up the <i>
 | 
						|
               master journal name</i> field, interpreting each byte as
 | 
						|
               an 8-bit signed integer.
 | 
						|
      <tr><td style="white-space: nowrap">12 + <i>name-length</i><td><i>8<td>
 | 
						|
               Finally, the <b>journal magic</b> field always contains a
 | 
						|
               well-known 8-byte string value; the same value stored in the
 | 
						|
               first 8 bytes of a <i>journal header</i>. The well-known
 | 
						|
               sequence of bytes is:
 | 
						|
                 <pre>0xd9 0xd5 0x05 0xf9 0x20 0xa1 0x63 0xd7</pre>
 | 
						|
    </table>
 | 
						|
  <h2 id=write_transactions>Write Transactions</h2>
 | 
						|
    <p>
 | 
						|
      This section describes the progression of an SQLite <i>write
 | 
						|
      transaction</i>. From the point of view of the systems described in
 | 
						|
      this document, most <i>write transactions</i> consist of three steps:
 | 
						|
    <ol>
 | 
						|
      <li><p>The <i>write transaction</i> is opened. This process is described
 | 
						|
          in section <cite>opening_a_write_transaction</cite>.
 | 
						|
      <li><p>The end-user executes DML or DDL SQL statements that require the
 | 
						|
          structure of the database file of the database file to be modified.
 | 
						|
          These modifications may be any combination of operations to 
 | 
						|
          <ul><li>modify the content of an existing database page, 
 | 
						|
              <li>append a new database page to the database file image, or
 | 
						|
              <li>truncate (discard) a database page from the end of the
 | 
						|
                  database file. 
 | 
						|
          </ul>
 | 
						|
          These operations are described in detail in section
 | 
						|
          <cite>modifying_appending_truncating</cite>. How user DDL or DML
 | 
						|
          SQL statements are mapped to combinations of these three operations
 | 
						|
          is described in <cite>ff_sqlitert_requirements</cite>.
 | 
						|
      <li><p>The <i>write transaction</i> is concluded and the changes made
 | 
						|
          permanently committed to the database. The process required to 
 | 
						|
          commit a transaction is described in section
 | 
						|
          <cite>committing_a_transaction</cite>.
 | 
						|
    </ol>
 | 
						|
    <p>
 | 
						|
      As an alternative to step 3 above, the transaction may be rolled back.
 | 
						|
      Transaction rollback is described in section <cite>rollback</cite>.
 | 
						|
      Finally, it is also important to remember that a <i>write transaction</i>
 | 
						|
      may be interrupted by a <i>system failure</i> at any point. In this
 | 
						|
      case, the contents of the file-system (the <i>database file</i> and
 | 
						|
      <i>journal file</i>) must be left in such a state so as to enable
 | 
						|
      the <i>database file</i> to be restored to the state it was in before
 | 
						|
      the interrupted <i>write transaction</i> was started. This is known
 | 
						|
      as <i>hot journal rollback</i>, and is described in section
 | 
						|
      <cite>hot_journal_rollback</cite>. Section
 | 
						|
      <cite>fs_assumption_details</cite> describes the assumptions made 
 | 
						|
      regarding the effects of a <i>system failure</i> on the file-system
 | 
						|
      contents following recovery.
 | 
						|
  <h3 id=opening_a_write_transaction>Beginning a Write Transaction</h3>
 | 
						|
    <p>
 | 
						|
      Before any database pages may be modified within the <i>page cache</i>,
 | 
						|
      the <i>database connection</i> must open a <i>write transaction</i>. 
 | 
						|
      Opening a <i>write transaction</i> requires that the <i>database
 | 
						|
      connection</i> obtains a <i>reserved lock</i> (or greater) on the 
 | 
						|
      <i>database file</i>. Because obtaining a <i>reserved lock</i> on
 | 
						|
      a <i>database file</i> guarantees that no other <i>database
 | 
						|
      connection</i> may hold or obtain a <i>reserved lock</i> or greater,
 | 
						|
      it follows that no other <i>database connection</i> may have an
 | 
						|
      open <i>write transaction</i>.
 | 
						|
    <p>
 | 
						|
      A <i>reserved lock</i> on the <i>database file</i> may be thought of
 | 
						|
      as an exclusive lock on the <i>journal file</i>. No 
 | 
						|
      <i>database connection</i> may read from or write to a <i>journal
 | 
						|
      file</i> without a <i>reserved</i> or greater lock on the corresponding
 | 
						|
      <i>database file</i>.
 | 
						|
    <p>
 | 
						|
      Before opening a <i>write transaction</i>, a <i>database connection</i>
 | 
						|
      must have an open <i>read transaction</i>, opened via the procedure
 | 
						|
      described in section <cite>open_read_only_trans</cite>. This ensures
 | 
						|
      that there is no <i>hot-journal file</i> that needs to be rolled back
 | 
						|
      and that any data stored in the <i>page cache</i> can be trusted.
 | 
						|
    <p>
 | 
						|
      Once a <i>read transaction</i> has been opened, upgrading to a 
 | 
						|
      <i>write transaction</i> is a two step process, as follows:
 | 
						|
    <ol>
 | 
						|
      <li>A <i>reserved lock</i> is obtained on the <i>database file</i>.
 | 
						|
      <li>The <i>journal file</i> is opened and created if necessary (using 
 | 
						|
          the VFS xOpen method), and a <i>journal file header</i> written 
 | 
						|
          to the start of it using a single call to the file handles xWrite 
 | 
						|
          method.
 | 
						|
    </ol>
 | 
						|
    <p>
 | 
						|
      Requirements describing step 1 of the above procedure in detail:
 | 
						|
<p class=req id=H35350>
 | 
						|
When required to open a <i>write transaction</i> on the database,
 | 
						|
SQLite shall first open a <i>read transaction</i>, if the <i>database
 | 
						|
connection</i> in question has not already opened one.
 | 
						|
<p class=req id=H35360>
 | 
						|
When required to open a <i>write transaction</i> on the database, after
 | 
						|
ensuring a <i>read transaction</i> has already been opened, SQLite
 | 
						|
shall obtain a <i>reserved lock</i> on the database file by calling
 | 
						|
the xLock method of the file-handle open on the database file.
 | 
						|
<p class=req id=H35580>
 | 
						|
If an attempt to acquire a <i>reserved lock</i> prescribed by
 | 
						|
requirement H35360 fails, then SQLite shall deem the attempt to
 | 
						|
open a <i>write transaction</i> to have failed and return an error
 | 
						|
to the user.
 | 
						|
    <p>
 | 
						|
      Requirements describing step 2 of the above procedure in detail:
 | 
						|
<p class=req id=H35370>
 | 
						|
When required to open a <i>write transaction</i> on the database, after
 | 
						|
obtaining a <i>reserved lock</i> on the database file, SQLite shall
 | 
						|
open a read/write file-handle on the corresponding <i>journal file</i>.
 | 
						|
<p class=req id=H35380>
 | 
						|
When required to open a <i>write transaction</i> on the database, after
 | 
						|
opening a file-handle on the <i>journal file</i>, SQLite shall append
 | 
						|
a <i>journal header</i> to the (currently empty) <i>journal file</i>.
 | 
						|
    <h4 id=writing_journal_header>Writing a Journal Header</h4>
 | 
						|
    <p>
 | 
						|
      Requirements describing how a <i>journal header</i> is appended to
 | 
						|
      a journal file:
 | 
						|
<p class=req id=H35680>
 | 
						|
When required to append a <i>journal header</i> to the <i>journal
 | 
						|
file</i>, SQLite shall do so by writing a block of <i>sector-size</i>
 | 
						|
bytes using a single call to the xWrite method of the file-handle
 | 
						|
open on the <i>journal file</i>. The block of data written shall begin
 | 
						|
at the smallest sector-size aligned offset at or following the current
 | 
						|
end of the <i>journal file</i>.
 | 
						|
<p class=req id=H35690>
 | 
						|
The first 8 bytes of the <i>journal header</i> required to be written
 | 
						|
by H35680 shall contain the following values, in order from byte offset 0
 | 
						|
to 7: 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63 and 0xd7.
 | 
						|
<p class=req id=H35700>
 | 
						|
Bytes 8-11 of the <i>journal header</i> required to be written by
 | 
						|
H35680 shall contain 0x00.
 | 
						|
<p class=req id=H35710>
 | 
						|
Bytes 12-15 of the <i>journal header</i> required to be written by
 | 
						|
H35680 shall contain the number of pages that the database file
 | 
						|
contained when the current <i>write-transaction</i> was started,
 | 
						|
formatted as a 4-byte big-endian unsigned integer.
 | 
						|
<p class=req id=H35720>
 | 
						|
Bytes 16-19 of the <i>journal header</i> required to be written by
 | 
						|
H35680 shall contain pseudo-randomly generated values.
 | 
						|
<p class=req id=H35730>
 | 
						|
Bytes 20-23 of the <i>journal header</i> required to be written by
 | 
						|
H35680 shall contain the <i>sector size</i> used by the VFS layer,
 | 
						|
formatted as a 4-byte big-endian unsigned integer.
 | 
						|
<p class=req id=H35740>
 | 
						|
Bytes 24-27 of the <i>journal header</i> required to be written by
 | 
						|
H35680 shall contain the <i>page size</i> used by the database at
 | 
						|
the start of the <i>write transaction</i>, formatted as a 4-byte
 | 
						|
big-endian unsigned integer.
 | 
						|
  <h3 id=modifying_appending_truncating>
 | 
						|
    Modifying, Adding or Truncating a Database Page
 | 
						|
  </h3>
 | 
						|
    <p>
 | 
						|
      When the end-user executes a DML or DDL SQL statement to modify the
 | 
						|
      database schema or content, SQLite is required to update the database
 | 
						|
      file image to reflect the new database state. This involves modifying
 | 
						|
      the content of, appending or truncating one of more database file 
 | 
						|
      pages. Instead of modifying the database file directly using the VFS
 | 
						|
      interface, changes are first buffered within the <i>page cache</i>.
 | 
						|
    <p>
 | 
						|
      Before modifying a database page within the <i>page cache</i> that
 | 
						|
      may need to be restored by a rollback operation, the page must be
 | 
						|
      <i>journalled</i>. <i>Journalling a page</i> is the process of copying
 | 
						|
      that pages original data into the journal file so that it can be
 | 
						|
      recovered if the <i>write transaction</i> is rolled back. The process
 | 
						|
      of journalling a page is described in section 
 | 
						|
      <cite>journalling_a_page</cite>.
 | 
						|
<p class=req id=H35590>
 | 
						|
When required to modify the contents of an existing database page that
 | 
						|
existed and was not a <i>free-list leaf page</i> when the <i>write
 | 
						|
transaction</i> was opened, SQLite shall journal the page if it has not
 | 
						|
already been journalled within the current <i>write transaction</i>.
 | 
						|
<p class=req id=H35600>
 | 
						|
When required to modify the contents of an existing database page,
 | 
						|
SQLite shall update the cached version of the database page content
 | 
						|
stored as part of the <i>page cache entry</i> associated with the page.
 | 
						|
    <p>
 | 
						|
      When a new database page is appended to a database file, there is
 | 
						|
      no requirement to add a record to the <i>journal file</i>. If a 
 | 
						|
      rollback is required the database file will simply be truncated back 
 | 
						|
      to its original size based on the value stored at byte offset 12
 | 
						|
      of the <i>journal file</i>.
 | 
						|
<p class=req id=H35610>
 | 
						|
When required to append a new database page to the database file,
 | 
						|
SQLite shall create a new <i>page cache entry</i> corresponding to
 | 
						|
the new page and insert it into the <i>page cache</i>. The <i>dirty
 | 
						|
flag</i> of the new <i>page cache entry</i> shall be set.
 | 
						|
    <p>
 | 
						|
      If required to truncate a database page from the end of the database
 | 
						|
      file, the associated <i>page cache entry</i> is discarded. The adjusted
 | 
						|
      size of the database file is stored internally. The database file
 | 
						|
      is not actually truncated until the current <i>write transaction</i>
 | 
						|
      is committed (see section <cite>committing_a_transaction</cite>).
 | 
						|
<p class=req id=H35620>
 | 
						|
When required to truncate (remove) a database page that existed and was
 | 
						|
not a <i>free-list leaf page</i> when the <i>write transaction</i> was
 | 
						|
opened from the end of a database file, SQLite shall journal the page if
 | 
						|
it has not already been journalled within the current <i>write
 | 
						|
transaction</i>.
 | 
						|
<p class=req id=H35630>
 | 
						|
When required to truncate a database page from the end of the database
 | 
						|
file, SQLite shall discard the associated <i>page cache entry</i>
 | 
						|
from the page cache.
 | 
						|
  <h4 id=journalling_a_page>Journalling a Database Page</h4>
 | 
						|
    <p>
 | 
						|
      A page is journalled by adding a <i>journal record</i> to the <i>
 | 
						|
      journal file</i>. The format of a <i>journal record</i> is described
 | 
						|
      in section <cite>journal_record_format</cite>.
 | 
						|
<p class=req id=H35270>
 | 
						|
When required to <i>journal a database page</i>, SQLite shall first
 | 
						|
append the <i>page number</i> of the page being journalled to the
 | 
						|
<i>journal file</i>, formatted as a 4-byte big-endian unsigned integer,
 | 
						|
using a single call to the xWrite method of the file-handle opened
 | 
						|
on the journal file.
 | 
						|
<p class=req id=H35280>
 | 
						|
When required to <i>journal a database page</i>, if the attempt to
 | 
						|
append the <i>page number</i> to the journal file is successful,
 | 
						|
then the current page data (<i>page-size</i> bytes) shall be appended
 | 
						|
to the journal file, using a single call to the xWrite method of the
 | 
						|
file-handle opened on the journal file.
 | 
						|
<p class=req id=H35290>
 | 
						|
When required to <i>journal a database page</i>, if the attempt to
 | 
						|
append the current page data to the journal file is successful,
 | 
						|
then SQLite shall append a 4-byte big-endian integer checksum value
 | 
						|
to the to the journal file, using a single call to the xWrite method
 | 
						|
of the file-handle opened on the journal file.
 | 
						|
    <p>
 | 
						|
      The checksum value written to the <i>journal file</i> immediately after
 | 
						|
      the page data (requirement H35290), is a function of both the page
 | 
						|
      data and the <i>checksum initializer</i> field stored in the 
 | 
						|
      <i>journal header</i> (see section <cite>journal_header_format</cite>).
 | 
						|
      Specifically, it is the sum of the <i>checksum initializer</i> and
 | 
						|
      the value of every 200th byte of page data interpreted as an 8-bit
 | 
						|
      unsigned integer, starting with the (<i>page-size</i> % 200)'th 
 | 
						|
      byte of page data. For example, if the <i>page-size</i> is 1024 bytes,
 | 
						|
      then a checksum is calculated by adding the values of the bytes at
 | 
						|
      offsets 23, 223, 423, 623, 823 and 1023 (the last byte of the page)
 | 
						|
      together with the value of the <i>checksum initializer</i>.
 | 
						|
<p class=req id=H35300>
 | 
						|
The checksum value written to the <i>journal file</i> by the write
 | 
						|
required by H35290 shall be equal to the sum of the <i>checksum
 | 
						|
initializer</i> field stored in the <i>journal header</i> (H35700) and
 | 
						|
every 200th byte of the page data, beginning with the
 | 
						|
(<i>page-size</i> % 200)th byte.
 | 
						|
    <p>
 | 
						|
      The '%' character is used in requirement H35300 to represent the
 | 
						|
      modulo operator, just as it is in programming languages such as C, Java
 | 
						|
      and Javascript.
 | 
						|
  <h3 id=syncing_journal_file>Syncing the Journal File</h3>
 | 
						|
    <p>
 | 
						|
      Even after the original data of a database page has been written into
 | 
						|
      the journal file using calls to the journal file file-handle xWrite 
 | 
						|
      method (section <cite>journalling_a_page</cite>), it is still not
 | 
						|
      safe to write to the page within the database file. This is because
 | 
						|
      in the event of a system failure the data written to the journal file
 | 
						|
      may still be corrupted (see section <cite>fs_characteristics</cite>).
 | 
						|
      Before the page can be updated within the database itself, the 
 | 
						|
      following procedure takes place:
 | 
						|
    <ol>
 | 
						|
      <li> The xSync method of the file-handle opened on the journal file 
 | 
						|
           is called. This operation ensures that all <i>journal records</i>
 | 
						|
           in the journal file have been written to persistent storage, and
 | 
						|
           that they will not become corrupted as a result of a subsequent
 | 
						|
           system failure.
 | 
						|
      <li> The <i>journal record count</i> field (see section 
 | 
						|
           <cite>journal_header_format</cite>) of the most recently written
 | 
						|
           journal header in the journal file is updated to contain the
 | 
						|
           number of <i>journal records</i> added to the journal file since
 | 
						|
           the header was written.
 | 
						|
      <li> The xSync method is called again, to ensure that the update to
 | 
						|
           the <i>journal record count</i> has been committed to persistent
 | 
						|
           storage.
 | 
						|
    </ol> 
 | 
						|
    <p>
 | 
						|
      If all three of the steps enumerated above are executed successfully,
 | 
						|
      then it is safe to modify the content of the <i>journalled</i> 
 | 
						|
      database pages within the database file itself. The combination of
 | 
						|
      the three steps above is referred to as <i>syncing the journal file</i>.
 | 
						|
<p class=req id=H35750>
 | 
						|
When required to <i>sync the journal file</i>, SQLite shall invoke the
 | 
						|
xSync method of the file handle open on the <i>journal file</i>.
 | 
						|
<p class=req id=H35760>
 | 
						|
When required to <i>sync the journal file</i>, after invoking the
 | 
						|
xSync method as required by H35750, SQLite shall update the <i>record
 | 
						|
count</i> of the <i>journal header</i> most recently written to the
 | 
						|
<i>journal file</i>. The 4-byte field shall be updated to contain
 | 
						|
the number of <i>journal records</i> that have been written to the
 | 
						|
<i>journal file</i> since the <i>journal header</i> was written,
 | 
						|
formatted as a 4-byte big-endian unsigned integer.
 | 
						|
<p class=req id=H35770>
 | 
						|
When required to <i>sync the journal file</i>, after updating the
 | 
						|
<i>record count</i> field of a <i>journal header</i> as required by
 | 
						|
H35760, SQLite shall invoke the xSync method of the file handle open
 | 
						|
on the <i>journal file</i>.
 | 
						|
  <h3 id=upgrading_to_exclusive_lock>Upgrading to an Exclusive Lock</h3>
 | 
						|
    <p>
 | 
						|
      Before the content of a page modified within the <i>page cache</i> may
 | 
						|
      be written to the database file, an <i>exclusive lock</i> must be held
 | 
						|
      on the database file. The purpose of this lock is to prevent another
 | 
						|
      connection from reading from the database file while the first 
 | 
						|
      connection is midway through writing to it. Whether the reason for
 | 
						|
      writing to the database file is because a transaction is being committed,
 | 
						|
      or to free up space within the <i>page cache</i>, upgrading to an 
 | 
						|
      <i>exclusive lock</i> always occurs immediately after 
 | 
						|
      <i>syncing the journal file</i>.
 | 
						|
<p class=req id=H35780>
 | 
						|
When required to upgrade to an <i>exclusive lock</i> as part of a write
 | 
						|
transaction, SQLite shall first attempt to obtain a <i>pending lock</i>
 | 
						|
on the database file if one is not already held by invoking the xLock
 | 
						|
method of the file handle opened on the <i>database file</i>.
 | 
						|
<p class=req id=H35790>
 | 
						|
When required to upgrade to an <i>exclusive lock</i> as part of a write
 | 
						|
transaction, after successfully obtaining a <i>pending lock</i> SQLite
 | 
						|
shall attempt to obtain an <i>exclusive lock</i> by invoking the
 | 
						|
xLock method of the file handle opened on the <i>database file</i>.
 | 
						|
    <p class=todo>
 | 
						|
      What happens if the exclusive lock cannot be obtained? It is not
 | 
						|
      possible for the attempt to upgrade from a reserved to a pending 
 | 
						|
      lock to fail.
 | 
						|
  <h3 id=committing_a_transaction>Committing a Transaction</h3>
 | 
						|
    <p>
 | 
						|
      Committing a <i>write transaction</i> is the final step in updating the
 | 
						|
      database file. Committing a transaction is a seven step process,
 | 
						|
      summarized as follows:
 | 
						|
    <ol>
 | 
						|
      <li><p>
 | 
						|
        The database file header <i>change counter</i> field is incremented.
 | 
						|
        The <i>change counter</i>, described in
 | 
						|
        <cite>ff_sqlitert_requirements</cite>, is used by the <i>cache
 | 
						|
        validation</i> procedure described in section
 | 
						|
        <cite>cache_validation</cite>.
 | 
						|
      <li><p> 
 | 
						|
        The <i>journal file</i> is synced. The steps required to <i>sync the
 | 
						|
        journal file</i> are described in section
 | 
						|
        <cite>syncing_journal_file</cite>.
 | 
						|
      <li><p>
 | 
						|
        Upgrade to an <i>exclusive lock</i> on the database file, if an
 | 
						|
        <i>exclusive lock</i> is not already held. Upgrading to an 
 | 
						|
        <i>exclusive lock</i> is described in section
 | 
						|
        <cite>upgrading_to_exclusive_lock</cite>.
 | 
						|
      <li><p> 
 | 
						|
        Copy the contents of all <i>dirty pages</i> stored in the <i>page
 | 
						|
        cache</i> into the database file. The set of dirty pages are written
 | 
						|
        to the database file in page number order in order to improve
 | 
						|
        performance (see the assumptions in section <cite>fs_performance</cite>
 | 
						|
        for details).
 | 
						|
      <li><p>
 | 
						|
        The database file is synced to ensure that all updates are stored
 | 
						|
        safely on the persistent media.
 | 
						|
      <li><p>
 | 
						|
        The file-handle open on the <i>journal file</i> is closed and the
 | 
						|
        journal file itself deleted. At this point the <i>write transaction</i>
 | 
						|
        transaction has been irrevocably committed.
 | 
						|
      <li><p>
 | 
						|
        The database file is unlocked.
 | 
						|
    </ol>
 | 
						|
    <p class=todo>
 | 
						|
      Expand on and explain the above a bit.
 | 
						|
    <p>
 | 
						|
      The following requirements describe the steps enumerated above in more
 | 
						|
      detail.
 | 
						|
<p class=req id=H35800>
 | 
						|
When required to <i>commit a write-transaction</i>, SQLite shall
 | 
						|
modify page 1 to increment the value stored in the <i>change counter</i>
 | 
						|
field of the <i>database file header</i>.
 | 
						|
    <p>
 | 
						|
      The <i>change counter</i> is a 4-byte big-endian integer field stored
 | 
						|
      at byte offset 24 of the <i>database file</i>. The modification to page 1
 | 
						|
      required by H35800 is made using the process described in section
 | 
						|
      <cite>modifying_appending_truncating</cite>. If page 1 has not already
 | 
						|
      been journalled as a part of the current write-transaction, then
 | 
						|
      incrementing the <i>change counter</i> may require that page 1 be
 | 
						|
      journalled. In all cases the <i>page cache entry</i> corresponding to
 | 
						|
      page 1 becomes a <i>dirty page</i> as part of incrementing the <i>change
 | 
						|
      counter</i> value.
 | 
						|
<p class=req id=H35810>
 | 
						|
When required to <i>commit a write-transaction</i>, after incrementing
 | 
						|
the <i>change counter</i> field, SQLite shall <i>sync the journal
 | 
						|
file</i>.
 | 
						|
<p class=req id=H35820>
 | 
						|
When required to <i>commit a write-transaction</i>, after <i>syncing
 | 
						|
the journal file</i> as required by H35810, if an <i>exclusive lock</i>
 | 
						|
on the database file is not already held, SQLite shall attempt to
 | 
						|
<i>upgrade to an exclusive lock</i>.
 | 
						|
<p class=req id=H35830>
 | 
						|
When required to <i>commit a write-transaction</i>, after <i>syncing
 | 
						|
the journal file</i> as required by H35810 and ensuring that an
 | 
						|
<i>exclusive lock</i> is held on the database file as required by
 | 
						|
H35830, SQLite shall copy the contents of all <i>dirty page</i>
 | 
						|
stored in the <i>page cache</i> into the <i>database file</i> using
 | 
						|
calls to the xWrite method of the <i>database connection</i> file
 | 
						|
handle. Each call to xWrite shall write the contents of a single
 | 
						|
<i>dirty page</i> (<i>page-size</i> bytes of data) to the database
 | 
						|
file. Dirty pages shall be written in order of <i>page number</i>,
 | 
						|
from lowest to highest.
 | 
						|
<p class=req id=H35840>
 | 
						|
When required to <i>commit a write-transaction</i>, after copying the
 | 
						|
contents of any <i>dirty pages</i> to the database file as required
 | 
						|
by H35830, SQLite shall sync the database file by invoking the xSync
 | 
						|
method of the <i>database connection</i> file handle.
 | 
						|
<p class=req id=H35850>
 | 
						|
When required to <i>commit a write-transaction</i>, after syncing
 | 
						|
the database file as required by H35840, SQLite shall close the
 | 
						|
file-handle opened on the <i>journal file</i> and delete the
 | 
						|
<i>journal file</i> from the file system via a call to the VFS
 | 
						|
xDelete method.
 | 
						|
<p class=req id=H35860>
 | 
						|
When required to <i>commit a write-transaction</i>, after deleting
 | 
						|
the <i>journal file</i> as required by H35850, SQLite shall relinquish
 | 
						|
all locks held on the <i>database file</i> by invoking the xUnlock
 | 
						|
method of the <i>database connection</i> file handle.
 | 
						|
    <p class=todo>
 | 
						|
      Is the shared lock held after committing a <i>write transaction</i>?
 | 
						|
  <h3>Purging a Dirty Page</h3>
 | 
						|
    <p>
 | 
						|
      Usually, no data is actually written to the database file until the
 | 
						|
      user commits the active <i>write transaction</i>. The exception is
 | 
						|
      if a single <i>write transaction</i> contains too many modifications
 | 
						|
      to be stored in the <i>page cache</i>. In this case, some of the 
 | 
						|
      database file modifications stored in the <i>page cache</i> must be
 | 
						|
      applied to the database file before the transaction is committed so
 | 
						|
      that the associated <i>page cache entries</i> can be purged from the
 | 
						|
      page cache to free memory. Exactly when this condition is reached and
 | 
						|
      dirty pages must be purged is described in section
 | 
						|
      <cite>page_cache_algorithms</cite>.
 | 
						|
    <p>
 | 
						|
      Before the contents of the <i>page cache entry</i> can be written into
 | 
						|
      the database file, the <i>page cache entry</i> must meet the criteria
 | 
						|
      for a <i>writable dirty page</i>, as defined in section
 | 
						|
      <cite>page_cache_algorithms</cite>. If the dirty page selected by the
 | 
						|
      algorithms in section <cite>page_cache_algorithms</cite> for purging,
 | 
						|
      SQLite is required to <i>sync the journal file</i>. Immediately after
 | 
						|
      the journal file is synced, all dirty pages associated with the
 | 
						|
      <i>database connection</i> are classified as <i>writable dirty pages</i>.
 | 
						|
<p class=req id=H35640>
 | 
						|
When required to purge a <i>non-writable dirty page</i> from the
 | 
						|
<i>page cache</i>, SQLite shall <i>sync the journal file</i> before
 | 
						|
proceeding with the write operation required by H35670.
 | 
						|
<p class=req id=H35660>
 | 
						|
After <i>syncing the journal file</i> as required by H35640, SQLite
 | 
						|
shall append a new <i>journal header</i> to the <i>journal file</i>
 | 
						|
before proceeding with the write operation required by H35670.
 | 
						|
    <p>
 | 
						|
      Appending a new <i>journal header</i> to the journal file is described
 | 
						|
      in section <cite>writing_journal_header</cite>.
 | 
						|
    <p>
 | 
						|
      Once the dirty page being purged is writable, it is simply written
 | 
						|
      into the database file.
 | 
						|
<p class=req id=H35670>
 | 
						|
When required to purge a <i>page cache entry</i> that is a
 | 
						|
<i>dirty page</i> SQLite shall write the page data into the database
 | 
						|
file, using a single call to the xWrite method of the <i>database
 | 
						|
connection</i> file handle.
 | 
						|
  <h2 id="multifile_transactions">Multi-File Transactions</h2>
 | 
						|
  <h2 id="statement_transactions">Statement Transactions</h2>
 | 
						|
<h1 id=rollback>Rollback</h1>
 | 
						|
  <h2 id=hot_journal_rollback>Hot Journal Rollback</h2>
 | 
						|
  <h2>Transaction Rollback</h2>
 | 
						|
  <h2>Statement Rollback</h2>
 | 
						|
<h1>References</h1>
 | 
						|
  <table id="refs" style="width:auto; margin: 1em 5ex">
 | 
						|
    <tr><td style="width:5ex" id="capi_sqlitert_requirements">[1]<td>
 | 
						|
      C API Requirements Document.
 | 
						|
    <tr><td style="width:5ex" id="sql_sqlitert_requirements">[2]<td>
 | 
						|
      SQL Requirements Document.
 | 
						|
    <tr><td style="width:5ex" id="ff_sqlitert_requirements">[3]<td>
 | 
						|
      File Format Requirements Document.
 | 
						|
  </table>
 | 
						|
 | 
						|
 |