libcifpp/sqlite-backend/cifpp2sqlite.cpp

/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sqlite3.h>

/*
** 2016-05-28
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file contains the implementation of an SQLite virtual table for
** reading CSV files.
**
** Usage:
**
**    .load ./csv
**    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
**    SELECT * FROM csv;
**
** The columns are named "c1", "c2", "c3", ... by default.  Or the
** application can define its own CREATE TABLE statement using the
** schema= parameter, like this:
**
**    CREATE VIRTUAL TABLE temp.csv2 USING csv(
**       filename = '../http.log',
**       schema = 'CREATE TABLE x(date,ipaddr,url,referrer,userAgent)'
**    );
**
** Instead of specifying a file, the text of the CSV can be loaded using
** the data= parameter.
**
** If the columns=N parameter is supplied, then the CSV file is assumed to have
** N columns.  If both the columns= and schema= parameters are omitted, then
** the number and names of the columns is determined by the first line of
** the CSV input.
**
** Some extra debugging features (used for testing virtual tables) are available
** if this module is compiled with -DSQLITE_TEST.
*/
#include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1
#include <assert.h>
#include <ctype.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifndef SQLITE_OMIT_VIRTUALTABLE

/*
** A macro to hint to the compiler that a function should not be
** inlined.
*/
# if defined(__GNUC__)
#  define CSV_NOINLINE __attribute__((noinline))
# elif defined(_MSC_VER) && _MSC_VER >= 1310
#  define CSV_NOINLINE __declspec(noinline)
# else
#  define CSV_NOINLINE
# endif

/* Max size of the error message in a CsvReader */
# define CSV_MXERR 200

/* Size of the CsvReader input buffer */
# define CSV_INBUFSZ 1024

/* A context object used when read a CSV file. */
typedef struct CsvReader CsvReader;
struct CsvReader
{
	FILE *in;             /* Read the CSV text from this input stream */
	char *z;              /* Accumulated text for a field */
	int n;                /* Number of bytes in z */
	int nAlloc;           /* Space allocated for z[] */
	int nLine;            /* Current line number */
	int bNotFirst;        /* True if prior text has been seen */
	int cTerm;            /* Character that terminated the most recent field */
	size_t iIn;           /* Next unread character in the input buffer */
	size_t nIn;           /* Number of characters in the input buffer */
	char *zIn;            /* The input buffer */
	char zErr[CSV_MXERR]; /* Error message */
};

/* Initialize a CsvReader object */
static void csv_reader_init(CsvReader *p)
{
	p->in = 0;
	p->z = 0;
	p->n = 0;
	p->nAlloc = 0;
	p->nLine = 0;
	p->bNotFirst = 0;
	p->nIn = 0;
	p->zIn = 0;
	p->zErr[0] = 0;
}

/* Close and reset a CsvReader object */
static void csv_reader_reset(CsvReader *p)
{
	if (p->in)
	{
		fclose(p->in);
		sqlite3_free(p->zIn);
	}
	sqlite3_free(p->z);
	csv_reader_init(p);
}

/* Report an error on a CsvReader */
static void csv_errmsg(CsvReader *p, const char *zFormat, ...)
{
	va_list ap;
	va_start(ap, zFormat);
	sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
	va_end(ap);
}

/* Open the file associated with a CsvReader
** Return the number of errors.
*/
static int csv_reader_open(
	CsvReader *p,          /* The reader to open */
	const char *zFilename, /* Read from this filename */
	const char *zData      /*  ... or use this data */
)
{
	if (zFilename)
	{
		p->zIn = sqlite3_malloc(CSV_INBUFSZ);
		if (p->zIn == 0)
		{
			csv_errmsg(p, "out of memory");
			return 1;
		}
		p->in = fopen(zFilename, "rb");
		if (p->in == 0)
		{
			sqlite3_free(p->zIn);
			csv_reader_reset(p);
			csv_errmsg(p, "cannot open '%s' for reading", zFilename);
			return 1;
		}
	}
	else
	{
		assert(p->in == 0);
		p->zIn = (char *)zData;
		p->nIn = strlen(zData);
	}
	return 0;
}

/* The input buffer has overflowed.  Refill the input buffer, then
** return the next character
*/
static CSV_NOINLINE int csv_getc_refill(CsvReader *p)
{
	size_t got;

	assert(p->iIn >= p->nIn); /* Only called on an empty input buffer */
	assert(p->in != 0);       /* Only called if reading froma file */

	got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
	if (got == 0)
		return EOF;
	p->nIn = got;
	p->iIn = 1;
	return p->zIn[0];
}

/* Return the next character of input.  Return EOF at end of input. */
static int csv_getc(CsvReader *p)
{
	if (p->iIn >= p->nIn)
	{
		if (p->in != 0)
			return csv_getc_refill(p);
		return EOF;
	}
	return ((unsigned char *)p->zIn)[p->iIn++];
}

/* Increase the size of p->z and append character c to the end.
** Return 0 on success and non-zero if there is an OOM error */
static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c)
{
	char *zNew;
	int nNew = p->nAlloc * 2 + 100;
	zNew = sqlite3_realloc64(p->z, nNew);
	if (zNew)
	{
		p->z = zNew;
		p->nAlloc = nNew;
		p->z[p->n++] = c;
		return 0;
	}
	else
	{
		csv_errmsg(p, "out of memory");
		return 1;
	}
}

/* Append a single character to the CsvReader.z[] array.
** Return 0 on success and non-zero if there is an OOM error */
static int csv_append(CsvReader *p, char c)
{
	if (p->n >= p->nAlloc - 1)
		return csv_resize_and_append(p, c);
	p->z[p->n++] = c;
	return 0;
}

/* Read a single field of CSV text.  Compatible with rfc4180 and extended
** with the option of having a separator other than ",".
**
**   +  Input comes from p->in.
**   +  Store results in p->z of length p->n.  Space to hold p->z comes
**      from sqlite3_malloc64().
**   +  Keep track of the line number in p->nLine.
**   +  Store the character that terminates the field in p->cTerm.  Store
**      EOF on end-of-file.
**
** Return 0 at EOF or on OOM.  On EOF, the p->cTerm character will have
** been set to EOF.
*/
static char *csv_read_one_field(CsvReader *p)
{
	int c;
	p->n = 0;
	c = csv_getc(p);
	if (c == EOF)
	{
		p->cTerm = EOF;
		return 0;
	}
	if (c == '"')
	{
		int pc, ppc;
		int startLine = p->nLine;
		pc = ppc = 0;
		while (1)
		{
			c = csv_getc(p);
			if (c <= '"' || pc == '"')
			{
				if (c == '\n')
					p->nLine++;
				if (c == '"')
				{
					if (pc == '"')
					{
						pc = 0;
						continue;
					}
				}
				if ((c == ',' && pc == '"') || (c == '\n' && pc == '"') || (c == '\n' && pc == '\r' && ppc == '"') || (c == EOF && pc == '"'))
				{
					do
					{
						p->n--;
					} while (p->z[p->n] != '"');
					p->cTerm = (char)c;
					break;
				}
				if (pc == '"' && c != '\r')
				{
					csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
					break;
				}
				if (c == EOF)
				{
					csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
						startLine, '"');
					p->cTerm = (char)c;
					break;
				}
			}
			if (csv_append(p, (char)c))
				return 0;
			ppc = pc;
			pc = c;
		}
	}
	else
	{
		/* If this is the first field being parsed and it begins with the
		** UTF-8 BOM  (0xEF BB BF) then skip the BOM */
		if ((c & 0xff) == 0xef && p->bNotFirst == 0)
		{
			csv_append(p, (char)c);
			c = csv_getc(p);
			if ((c & 0xff) == 0xbb)
			{
				csv_append(p, (char)c);
				c = csv_getc(p);
				if ((c & 0xff) == 0xbf)
				{
					p->bNotFirst = 1;
					p->n = 0;
					return csv_read_one_field(p);
				}
			}
		}
		while (c > ',' || (c != EOF && c != ',' && c != '\n'))
		{
			if (csv_append(p, (char)c))
				return 0;
			c = csv_getc(p);
		}
		if (c == '\n')
		{
			p->nLine++;
			if (p->n > 0 && p->z[p->n - 1] == '\r')
				p->n--;
		}
		p->cTerm = (char)c;
	}
	assert(p->z == 0 || p->n < p->nAlloc);
	if (p->z)
		p->z[p->n] = 0;
	p->bNotFirst = 1;
	return p->z;
}

/* Forward references to the various virtual table methods implemented
** in this file. */
static int csvtabCreate(sqlite3 *, void *, int, const char *const *,
	sqlite3_vtab **, char **);
static int csvtabConnect(sqlite3 *, void *, int, const char *const *,
	sqlite3_vtab **, char **);
static int csvtabBestIndex(sqlite3_vtab *, sqlite3_index_info *);
static int csvtabDisconnect(sqlite3_vtab *);
static int csvtabOpen(sqlite3_vtab *, sqlite3_vtab_cursor **);
static int csvtabClose(sqlite3_vtab_cursor *);
static int csvtabFilter(sqlite3_vtab_cursor *, int idxNum, const char *idxStr,
	int argc, sqlite3_value **argv);
static int csvtabNext(sqlite3_vtab_cursor *);
static int csvtabEof(sqlite3_vtab_cursor *);
static int csvtabColumn(sqlite3_vtab_cursor *, sqlite3_context *, int);
static int csvtabRowid(sqlite3_vtab_cursor *, sqlite3_int64 *);

/* An instance of the CSV virtual table */
typedef struct CsvTable
{
	sqlite3_vtab base;     /* Base class.  Must be first */
	char *zFilename;       /* Name of the CSV file */
	char *zData;           /* Raw CSV data in lieu of zFilename */
	long iStart;           /* Offset to start of data in zFilename */
	int nCol;              /* Number of columns in the CSV file */
	unsigned int tstFlags; /* Bit values used for testing */
} CsvTable;

/* Allowed values for tstFlags */
# define CSVTEST_FIDX 0x0001 /* Pretend that constrained search cost less*/

/* A cursor for the CSV virtual table */
typedef struct CsvCursor
{
	sqlite3_vtab_cursor base; /* Base class.  Must be first */
	CsvReader rdr;            /* The CsvReader object */
	char **azVal;             /* Value of the current row */
	int *aLen;                /* Length of each entry */
	sqlite3_int64 iRowid;     /* The current rowid.  Negative for EOF */
} CsvCursor;

/* Transfer error message text from a reader into a CsvTable */
static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr)
{
	sqlite3_free(pTab->base.zErrMsg);
	pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
}

/*
** This method is the destructor fo a CsvTable object.
*/
static int csvtabDisconnect(sqlite3_vtab *pVtab)
{
	CsvTable *p = (CsvTable *)pVtab;
	sqlite3_free(p->zFilename);
	sqlite3_free(p->zData);
	sqlite3_free(p);
	return SQLITE_OK;
}

/* Skip leading whitespace.  Return a pointer to the first non-whitespace
** character, or to the zero terminator if the string has only whitespace */
static const char *csv_skip_whitespace(const char *z)
{
	while (isspace((unsigned char)z[0]))
		z++;
	return z;
}

/* Remove trailing whitespace from the end of string z[] */
static void csv_trim_whitespace(char *z)
{
	size_t n = strlen(z);
	while (n > 0 && isspace((unsigned char)z[n]))
		n--;
	z[n] = 0;
}

/* Dequote the string */
static void csv_dequote(char *z)
{
	int j;
	char cQuote = z[0];
	size_t i, n;

	if (cQuote != '\'' && cQuote != '"')
		return;
	n = strlen(z);
	if (n < 2 || z[n - 1] != z[0])
		return;
	for (i = 1, j = 0; i < n - 1; i++)
	{
		if (z[i] == cQuote && z[i + 1] == cQuote)
			i++;
		z[j++] = z[i];
	}
	z[j] = 0;
}

/* Check to see if the string is of the form:  "TAG = VALUE" with optional
** whitespace before and around tokens.  If it is, return a pointer to the
** first character of VALUE.  If it is not, return NULL.
*/
static const char *csv_parameter(const char *zTag, int nTag, const char *z)
{
	z = csv_skip_whitespace(z);
	if (strncmp(zTag, z, nTag) != 0)
		return 0;
	z = csv_skip_whitespace(z + nTag);
	if (z[0] != '=')
		return 0;
	return csv_skip_whitespace(z + 1);
}

/* Decode a parameter that requires a dequoted string.
**
** Return 1 if the parameter is seen, or 0 if not.  1 is returned
** even if there is an error.  If an error occurs, then an error message
** is left in p->zErr.  If there are no errors, p->zErr[0]==0.
*/
static int csv_string_parameter(
	CsvReader *p,       /* Leave the error message here, if there is one */
	const char *zParam, /* Parameter we are checking for */
	const char *zArg,   /* Raw text of the virtual table argment */
	char **pzVal        /* Write the dequoted string value here */
)
{
	const char *zValue;
	zValue = csv_parameter(zParam, (int)strlen(zParam), zArg);
	if (zValue == 0)
		return 0;
	p->zErr[0] = 0;
	if (*pzVal)
	{
		csv_errmsg(p, "more than one '%s' parameter", zParam);
		return 1;
	}
	*pzVal = sqlite3_mprintf("%s", zValue);
	if (*pzVal == 0)
	{
		csv_errmsg(p, "out of memory");
		return 1;
	}
	csv_trim_whitespace(*pzVal);
	csv_dequote(*pzVal);
	return 1;
}

/* Return 0 if the argument is false and 1 if it is true.  Return -1 if
** we cannot really tell.
*/
static int csv_boolean(const char *z)
{
	if (sqlite3_stricmp("yes", z) == 0 || sqlite3_stricmp("on", z) == 0 || sqlite3_stricmp("true", z) == 0 || (z[0] == '1' && z[1] == 0))
	{
		return 1;
	}
	if (sqlite3_stricmp("no", z) == 0 || sqlite3_stricmp("off", z) == 0 || sqlite3_stricmp("false", z) == 0 || (z[0] == '0' && z[1] == 0))
	{
		return 0;
	}
	return -1;
}

/* Check to see if the string is of the form:  "TAG = BOOLEAN" or just "TAG".
** If it is, set *pValue to be the value of the boolean ("true" if there is
** not "= BOOLEAN" component) and return non-zero.  If the input string
** does not begin with TAG, return zero.
*/
static int csv_boolean_parameter(
	const char *zTag, /* Tag we are looking for */
	int nTag,         /* Size of the tag in bytes */
	const char *z,    /* Input parameter */
	int *pValue       /* Write boolean value here */
)
{
	int b;
	z = csv_skip_whitespace(z);
	if (strncmp(zTag, z, nTag) != 0)
		return 0;
	z = csv_skip_whitespace(z + nTag);
	if (z[0] == 0)
	{
		*pValue = 1;
		return 1;
	}
	if (z[0] != '=')
		return 0;
	z = csv_skip_whitespace(z + 1);
	b = csv_boolean(z);
	if (b >= 0)
	{
		*pValue = b;
		return 1;
	}
	return 0;
}

/*
** Parameters:
**    filename=FILENAME          Name of file containing CSV content
**    data=TEXT                  Direct CSV content.
**    schema=SCHEMA              Alternative CSV schema.
**    header=YES|NO              First row of CSV defines the names of
**                               columns if "yes".  Default "no".
**    columns=N                  Assume the CSV file contains N columns.
**
** Only available if compiled with SQLITE_TEST:
**
**    testflags=N                Bitmask of test flags.  Optional
**
** If schema= is omitted, then the columns are named "c0", "c1", "c2",
** and so forth.  If columns=N is omitted, then the file is opened and
** the number of columns in the first row is counted to determine the
** column count.  If header=YES, then the first row is skipped.
*/
static int csvtabConnect(
	sqlite3 *db,
	void *pAux,
	int argc, const char *const *argv,
	sqlite3_vtab **ppVtab,
	char **pzErr)
{
	CsvTable *pNew = 0; /* The CsvTable object to construct */
	int bHeader = -1;   /* header= flags.  -1 means not seen yet */
	int rc = SQLITE_OK; /* Result code from this routine */
	int i, j;           /* Loop counters */
# ifdef SQLITE_TEST
	int tstFlags = 0; /* Value for testflags=N parameter */
# endif
	int b;          /* Value of a boolean parameter */
	int nCol = -99; /* Value of the columns= parameter */
	CsvReader sRdr; /* A CSV file reader used to store an error
	                ** message and/or to count the number of columns */
	static const char *azParam[] = {
		"filename",
		"data",
		"schema",
	};
	char *azPValue[3]; /* Parameter values */
# define CSV_FILENAME (azPValue[0])
# define CSV_DATA (azPValue[1])
# define CSV_SCHEMA (azPValue[2])

	assert(sizeof(azPValue) == sizeof(azParam));
	memset(&sRdr, 0, sizeof(sRdr));
	memset(azPValue, 0, sizeof(azPValue));
	for (i = 3; i < argc; i++)
	{
		const char *z = argv[i];
		const char *zValue;
		for (j = 0; j < sizeof(azParam) / sizeof(azParam[0]); j++)
		{
			if (csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]))
				break;
		}
		if (j < sizeof(azParam) / sizeof(azParam[0]))
		{
			if (sRdr.zErr[0])
				goto csvtab_connect_error;
		}
		else if (csv_boolean_parameter("header", 6, z, &b))
		{
			if (bHeader >= 0)
			{
				csv_errmsg(&sRdr, "more than one 'header' parameter");
				goto csvtab_connect_error;
			}
			bHeader = b;
		}
		else
# ifdef SQLITE_TEST
			if ((zValue = csv_parameter("testflags", 9, z)) != 0)
		{
			tstFlags = (unsigned int)atoi(zValue);
		}
		else
# endif
			if ((zValue = csv_parameter("columns", 7, z)) != 0)
		{
			if (nCol > 0)
			{
				csv_errmsg(&sRdr, "more than one 'columns' parameter");
				goto csvtab_connect_error;
			}
			nCol = atoi(zValue);
			if (nCol <= 0)
			{
				csv_errmsg(&sRdr, "column= value must be positive");
				goto csvtab_connect_error;
			}
		}
		else
		{
			csv_errmsg(&sRdr, "bad parameter: '%s'", z);
			goto csvtab_connect_error;
		}
	}
	if ((CSV_FILENAME == 0) == (CSV_DATA == 0))
	{
		csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
		goto csvtab_connect_error;
	}

	if ((nCol <= 0 || bHeader == 1) && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA))
	{
		goto csvtab_connect_error;
	}
	pNew = sqlite3_malloc(sizeof(*pNew));
	*ppVtab = (sqlite3_vtab *)pNew;
	if (pNew == 0)
		goto csvtab_connect_oom;
	memset(pNew, 0, sizeof(*pNew));
	if (CSV_SCHEMA == 0)
	{
		sqlite3_str *pStr = sqlite3_str_new(0);
		char *zSep = "";
		int iCol = 0;
		sqlite3_str_appendf(pStr, "CREATE TABLE x(");
		if (nCol < 0 && bHeader < 1)
		{
			nCol = 0;
			do
			{
				csv_read_one_field(&sRdr);
				nCol++;
			} while (sRdr.cTerm == ',');
		}
		if (nCol > 0 && bHeader < 1)
		{
			for (iCol = 0; iCol < nCol; iCol++)
			{
				sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
				zSep = ",";
			}
		}
		else
		{
			do
			{
				char *z = csv_read_one_field(&sRdr);
				if ((nCol > 0 && iCol < nCol) || (nCol < 0 && bHeader))
				{
					sqlite3_str_appendf(pStr, "%s\"%w\" TEXT", zSep, z);
					zSep = ",";
					iCol++;
				}
			} while (sRdr.cTerm == ',');
			if (nCol < 0)
			{
				nCol = iCol;
			}
			else
			{
				while (iCol < nCol)
				{
					sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, ++iCol);
					zSep = ",";
				}
			}
		}
		pNew->nCol = nCol;
		sqlite3_str_appendf(pStr, ")");
		CSV_SCHEMA = sqlite3_str_finish(pStr);
		if (CSV_SCHEMA == 0)
			goto csvtab_connect_oom;
	}
	else if (nCol < 0)
	{
		do
		{
			csv_read_one_field(&sRdr);
			pNew->nCol++;
		} while (sRdr.cTerm == ',');
	}
	else
	{
		pNew->nCol = nCol;
	}
	pNew->zFilename = CSV_FILENAME;
	CSV_FILENAME = 0;
	pNew->zData = CSV_DATA;
	CSV_DATA = 0;
# ifdef SQLITE_TEST
	pNew->tstFlags = tstFlags;
# endif
	if (bHeader != 1)
	{
		pNew->iStart = 0;
	}
	else if (pNew->zData)
	{
		pNew->iStart = (int)sRdr.iIn;
	}
	else
	{
		pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn);
	}
	csv_reader_reset(&sRdr);
	rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
	if (rc)
	{
		csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
		goto csvtab_connect_error;
	}
	for (i = 0; i < sizeof(azPValue) / sizeof(azPValue[0]); i++)
	{
		sqlite3_free(azPValue[i]);
	}
	/* Rationale for DIRECTONLY:
	** An attacker who controls a database schema could use this vtab
	** to exfiltrate sensitive data from other files in the filesystem.
	** And, recommended practice is to put all CSV virtual tables in the
	** TEMP namespace, so they should still be usable from within TEMP
	** views, so there shouldn't be a serious loss of functionality by
	** prohibiting the use of this vtab from persistent triggers and views.
	*/
	sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
	return SQLITE_OK;

csvtab_connect_oom:
	rc = SQLITE_NOMEM;
	csv_errmsg(&sRdr, "out of memory");

csvtab_connect_error:
	if (pNew)
		csvtabDisconnect(&pNew->base);
	for (i = 0; i < sizeof(azPValue) / sizeof(azPValue[0]); i++)
	{
		sqlite3_free(azPValue[i]);
	}
	if (sRdr.zErr[0])
	{
		sqlite3_free(*pzErr);
		*pzErr = sqlite3_mprintf("%s", sRdr.zErr);
	}
	csv_reader_reset(&sRdr);
	if (rc == SQLITE_OK)
		rc = SQLITE_ERROR;
	return rc;
}

/*
** Reset the current row content held by a CsvCursor.
*/
static void csvtabCursorRowReset(CsvCursor *pCur)
{
	CsvTable *pTab = (CsvTable *)pCur->base.pVtab;
	int i;
	for (i = 0; i < pTab->nCol; i++)
	{
		sqlite3_free(pCur->azVal[i]);
		pCur->azVal[i] = 0;
		pCur->aLen[i] = 0;
	}
}

/*
** The xConnect and xCreate methods do the same thing, but they must be
** different so that the virtual table is not an eponymous virtual table.
*/
static int csvtabCreate(
	sqlite3 *db,
	void *pAux,
	int argc, const char *const *argv,
	sqlite3_vtab **ppVtab,
	char **pzErr)
{
	return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
}

/*
** Destructor for a CsvCursor.
*/
static int csvtabClose(sqlite3_vtab_cursor *cur)
{
	CsvCursor *pCur = (CsvCursor *)cur;
	csvtabCursorRowReset(pCur);
	csv_reader_reset(&pCur->rdr);
	sqlite3_free(cur);
	return SQLITE_OK;
}

/*
** Constructor for a new CsvTable cursor object.
*/
static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor)
{
	CsvTable *pTab = (CsvTable *)p;
	CsvCursor *pCur;
	size_t nByte;
	nByte = sizeof(*pCur) + (sizeof(char *) + sizeof(int)) * pTab->nCol;
	pCur = sqlite3_malloc64(nByte);
	if (pCur == 0)
		return SQLITE_NOMEM;
	memset(pCur, 0, nByte);
	pCur->azVal = (char **)&pCur[1];
	pCur->aLen = (int *)&pCur->azVal[pTab->nCol];
	*ppCursor = &pCur->base;
	if (csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData))
	{
		csv_xfer_error(pTab, &pCur->rdr);
		return SQLITE_ERROR;
	}
	return SQLITE_OK;
}

/*
** Advance a CsvCursor to its next row of input.
** Set the EOF marker if we reach the end of input.
*/
static int csvtabNext(sqlite3_vtab_cursor *cur)
{
	CsvCursor *pCur = (CsvCursor *)cur;
	CsvTable *pTab = (CsvTable *)cur->pVtab;
	int i = 0;
	char *z;
	do
	{
		z = csv_read_one_field(&pCur->rdr);
		if (z == 0)
		{
			break;
		}
		if (i < pTab->nCol)
		{
			if (pCur->aLen[i] < pCur->rdr.n + 1)
			{
				char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n + 1);
				if (zNew == 0)
				{
					csv_errmsg(&pCur->rdr, "out of memory");
					csv_xfer_error(pTab, &pCur->rdr);
					break;
				}
				pCur->azVal[i] = zNew;
				pCur->aLen[i] = pCur->rdr.n + 1;
			}
			memcpy(pCur->azVal[i], z, pCur->rdr.n + 1);
			i++;
		}
	} while (pCur->rdr.cTerm == ',');
	if (z == 0 && i == 0)
	{
		pCur->iRowid = -1;
	}
	else
	{
		pCur->iRowid++;
		while (i < pTab->nCol)
		{
			sqlite3_free(pCur->azVal[i]);
			pCur->azVal[i] = 0;
			pCur->aLen[i] = 0;
			i++;
		}
	}
	return SQLITE_OK;
}

/*
** Return values of columns for the row at which the CsvCursor
** is currently pointing.
*/
static int csvtabColumn(
	sqlite3_vtab_cursor *cur, /* The cursor */
	sqlite3_context *ctx,     /* First argument to sqlite3_result_...() */
	int i                     /* Which column to return */
)
{
	CsvCursor *pCur = (CsvCursor *)cur;
	CsvTable *pTab = (CsvTable *)cur->pVtab;
	if (i >= 0 && i < pTab->nCol && pCur->azVal[i] != 0)
	{
		sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
	}
	return SQLITE_OK;
}

/*
** Return the rowid for the current row.
*/
static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid)
{
	CsvCursor *pCur = (CsvCursor *)cur;
	*pRowid = pCur->iRowid;
	return SQLITE_OK;
}

/*
** Return TRUE if the cursor has been moved off of the last
** row of output.
*/
static int csvtabEof(sqlite3_vtab_cursor *cur)
{
	CsvCursor *pCur = (CsvCursor *)cur;
	return pCur->iRowid < 0;
}

/*
** Only a full table scan is supported.  So xFilter simply rewinds to
** the beginning.
*/
static int csvtabFilter(
	sqlite3_vtab_cursor *pVtabCursor,
	int idxNum, const char *idxStr,
	int argc, sqlite3_value **argv)
{
	CsvCursor *pCur = (CsvCursor *)pVtabCursor;
	CsvTable *pTab = (CsvTable *)pVtabCursor->pVtab;
	pCur->iRowid = 0;

	/* Ensure the field buffer is always allocated. Otherwise, if the
	** first field is zero bytes in size, this may be mistaken for an OOM
	** error in csvtabNext(). */
	if (csv_append(&pCur->rdr, 0))
		return SQLITE_NOMEM;

	if (pCur->rdr.in == 0)
	{
		assert(pCur->rdr.zIn == pTab->zData);
		assert(pTab->iStart >= 0);
		assert((size_t)pTab->iStart <= pCur->rdr.nIn);
		pCur->rdr.iIn = pTab->iStart;
	}
	else
	{
		fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
		pCur->rdr.iIn = 0;
		pCur->rdr.nIn = 0;
	}
	return csvtabNext(pVtabCursor);
}

/*
** Only a forward full table scan is supported.  xBestIndex is mostly
** a no-op.  If CSVTEST_FIDX is set, then the presence of equality
** constraints lowers the estimated cost, which is fiction, but is useful
** for testing certain kinds of virtual table behavior.
*/
static int csvtabBestIndex(
	sqlite3_vtab *tab,
	sqlite3_index_info *pIdxInfo)
{
	pIdxInfo->estimatedCost = 1000000;
# ifdef SQLITE_TEST
	if ((((CsvTable *)tab)->tstFlags & CSVTEST_FIDX) != 0)
	{
		/* The usual (and sensible) case is to always do a full table scan.
		** The code in this branch only runs when testflags=1.  This code
		** generates an artifical and unrealistic plan which is useful
		** for testing virtual table logic but is not helpful to real applications.
		**
		** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
		** table (even though it is not) and the cost of running the virtual table
		** is reduced from 1 million to just 10.  The constraints are *not* marked
		** as omittable, however, so the query planner should still generate a
		** plan that gives a correct answer, even if they plan is not optimal.
		*/
		int i;
		int nConst = 0;
		for (i = 0; i < pIdxInfo->nConstraint; i++)
		{
			unsigned char op;
			if (pIdxInfo->aConstraint[i].usable == 0)
				continue;
			op = pIdxInfo->aConstraint[i].op;
			if (op == SQLITE_INDEX_CONSTRAINT_EQ || op == SQLITE_INDEX_CONSTRAINT_LIKE || op == SQLITE_INDEX_CONSTRAINT_GLOB)
			{
				pIdxInfo->estimatedCost = 10;
				pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst + 1;
				nConst++;
			}
		}
	}
# endif
	return SQLITE_OK;
}

static sqlite3_module CsvModule = {
	0,                /* iVersion */
	csvtabCreate,     /* xCreate */
	csvtabConnect,    /* xConnect */
	csvtabBestIndex,  /* xBestIndex */
	csvtabDisconnect, /* xDisconnect */
	csvtabDisconnect, /* xDestroy */
	csvtabOpen,       /* xOpen - open a cursor */
	csvtabClose,      /* xClose - close a cursor */
	csvtabFilter,     /* xFilter - configure scan constraints */
	csvtabNext,       /* xNext - advance a cursor */
	csvtabEof,        /* xEof - check for end of scan */
	csvtabColumn,     /* xColumn - read data */
	csvtabRowid,      /* xRowid - read data */
	0,                /* xUpdate */
	0,                /* xBegin */
	0,                /* xSync */
	0,                /* xCommit */
	0,                /* xRollback */
	0,                /* xFindMethod */
	0,                /* xRename */
	0,                /* xSavepoint */
	0,                /* xRelease */
	0,                /* xRollbackTo */
	0,                /* xShadowName */
	0                 /* xIntegrity */
};

# ifdef SQLITE_TEST
/*
** For virtual table testing, make a version of the CSV virtual table
** available that has an xUpdate function.  But the xUpdate always returns
** SQLITE_READONLY since the CSV file is not really writable.
*/
static int csvtabUpdate(sqlite3_vtab *p, int n, sqlite3_value **v, sqlite3_int64 *x)
{
	return SQLITE_READONLY;
}
static sqlite3_module CsvModuleFauxWrite = {
	0,                /* iVersion */
	csvtabCreate,     /* xCreate */
	csvtabConnect,    /* xConnect */
	csvtabBestIndex,  /* xBestIndex */
	csvtabDisconnect, /* xDisconnect */
	csvtabDisconnect, /* xDestroy */
	csvtabOpen,       /* xOpen - open a cursor */
	csvtabClose,      /* xClose - close a cursor */
	csvtabFilter,     /* xFilter - configure scan constraints */
	csvtabNext,       /* xNext - advance a cursor */
	csvtabEof,        /* xEof - check for end of scan */
	csvtabColumn,     /* xColumn - read data */
	csvtabRowid,      /* xRowid - read data */
	csvtabUpdate,     /* xUpdate */
	0,                /* xBegin */
	0,                /* xSync */
	0,                /* xCommit */
	0,                /* xRollback */
	0,                /* xFindMethod */
	0,                /* xRename */
	0,                /* xSavepoint */
	0,                /* xRelease */
	0,                /* xRollbackTo */
	0,                /* xShadowName */
	0                 /* xIntegrity */
};
# endif /* SQLITE_TEST */

#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */

#ifdef _WIN32
__declspec(dllexport)
#endif
/*
** This routine is called when the extension is loaded.  The new
** CSV virtual table module is registered with the calling database
** connection.
*/
int
sqlite3_csv_init(
	sqlite3 *db,
	char **pzErrMsg,
	const sqlite3_api_routines *pApi)
{
#ifndef SQLITE_OMIT_VIRTUALTABLE
	int rc;
	SQLITE_EXTENSION_INIT2(pApi);
	rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
# ifdef SQLITE_TEST
	if (rc == SQLITE_OK)
	{
		rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
	}
# endif
	return rc;
#else
	return SQLITE_OK;
#endif
}