SQLite

Check-in [75d573080d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The built-in substr() function applied to a BLOB counts bytes, not characters. (CVS 3997)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 75d573080d03ee48fe88710f70c6875ff9cae19c
User & Date: drh 2007-05-15 01:13:47.000
Context
2007-05-15
02:34
Fix a bug in sqlite3_mprintf() which could have caused a buffer overrun if malloc() failed. (CVS 3998) (check-in: 5af49a57d4 user: drh tags: trunk)
01:13
The built-in substr() function applied to a BLOB counts bytes, not characters. (CVS 3997) (check-in: 75d573080d user: drh tags: trunk)
00:09
Make sure the parser aborts quickly following a syntax error. (CVS 3996) (check-in: d07cdd3c09 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/func.c.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.155 2007/05/12 06:11:12 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
/* #include <math.h> */
#include <stdlib.h>
#include <assert.h>
#include "vdbeInt.h"







|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.156 2007/05/15 01:13:47 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
/* #include <math.h> */
#include <stdlib.h>
#include <assert.h>
#include "vdbeInt.h"
142
143
144
145
146
147
148
149







150
151
152
153
154
155
156
157
158
159

160
161
162






163
164


165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

180
181
182
183
184
185
186
187
188
189




190
191
192
193
194
195
196
      sqlite3_result_double(context, rVal);
      break;
    }
  }
}

/*
** Implementation of the substr() function







*/
static void substrFunc(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  const unsigned char *z;
  const unsigned char *z2;
  int i;
  int len;

  i64 p1, p2;

  assert( argc==3 );






  z = sqlite3_value_text(argv[0]);
  if( z==0 ) return;


  p1 = sqlite3_value_int(argv[1]);
  p2 = sqlite3_value_int(argv[2]);
  for(len=0, z2=z; *z2; z2++){ if( (0xc0&*z2)!=0x80 ) len++; }
  if( p1<0 ){
    p1 += len;
    if( p1<0 ){
      p2 += p1;
      p1 = 0;
    }
  }else if( p1>0 ){
    p1--;
  }
  if( p1+p2>len ){
    p2 = len-p1;
  }

  for(i=0; i<p1 && z[i]; i++){
    if( (z[i]&0xc0)==0x80 ) p1++;
  }
  while( z[i] && (z[i]&0xc0)==0x80 ){ i++; p1++; }
  for(; i<p1+p2 && z[i]; i++){
    if( (z[i]&0xc0)==0x80 ) p2++;
  }
  while( z[i] && (z[i]&0xc0)==0x80 ){ i++; p2++; }
  if( p2<0 ) p2 = 0;
  sqlite3_result_text(context, (char*)&z[p1], p2, SQLITE_TRANSIENT);




}

/*
** Implementation of the round() function
*/
static void roundFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
  int n = 0;







|
>
>
>
>
>
>
>










>



>
>
>
>
>
>
|
|
>
>


<












>
|
|
|
|
|
|
|
|
|
|
>
>
>
>







142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
      sqlite3_result_double(context, rVal);
      break;
    }
  }
}

/*
** Implementation of the substr() function.
**
** substr(x,p1,p2)  returns p2 characters of x[] beginning with p1.
** p1 is 1-indexed.  So substr(x,1,1) returns the first character
** of x.  If x is text, then we actually count UTF-8 characters.
** If x is a blob, then we count bytes.
**
** If p1 is negative, then we begin abs(p1) from the end of x[].
*/
static void substrFunc(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  const unsigned char *z;
  const unsigned char *z2;
  int i;
  int len;
  int p0type;
  i64 p1, p2;

  assert( argc==3 );
  p0type = sqlite3_value_type(argv[0]);
  if( p0type==SQLITE_BLOB ){
    len = sqlite3_value_bytes(argv[0]);
    z = sqlite3_value_blob(argv[0]);
    if( z==0 ) return;
  }else{
    z = sqlite3_value_text(argv[0]);
    if( z==0 ) return;
    for(len=0, z2=z; *z2; z2++){ if( (0xc0&*z2)!=0x80 ) len++; }
  }
  p1 = sqlite3_value_int(argv[1]);
  p2 = sqlite3_value_int(argv[2]);

  if( p1<0 ){
    p1 += len;
    if( p1<0 ){
      p2 += p1;
      p1 = 0;
    }
  }else if( p1>0 ){
    p1--;
  }
  if( p1+p2>len ){
    p2 = len-p1;
  }
  if( p0type!=SQLITE_BLOB ){
    for(i=0; i<p1 && z[i]; i++){
      if( (z[i]&0xc0)==0x80 ) p1++;
    }
    while( z[i] && (z[i]&0xc0)==0x80 ){ i++; p1++; }
    for(; i<p1+p2 && z[i]; i++){
      if( (z[i]&0xc0)==0x80 ) p2++;
    }
    while( z[i] && (z[i]&0xc0)==0x80 ){ i++; p2++; }
    if( p2<0 ) p2 = 0;
    sqlite3_result_text(context, (char*)&z[p1], p2, SQLITE_TRANSIENT);
  }else{
    if( p2<0 ) p2 = 0;
    sqlite3_result_blob(context, (char*)&z[p1], p2, SQLITE_TRANSIENT);
  }
}

/*
** Implementation of the round() function
*/
static void roundFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
  int n = 0;
Added test/substr.test.














































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# 2007 May 14
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the built-in SUBSTR() functions.
#
# $Id: substr.test,v 1.1 2007/05/15 01:13:47 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Create a table to work with.
#
execsql { 
  CREATE TABLE t1(t text, b blob)
}
proc substr-test {id string i1 i2 result} {
  db eval {
    DELETE FROM t1;
    INSERT INTO t1(t) VALUES($string)
  }
  do_test substr-$id.1 [subst {
    execsql {
      SELECT substr(t, $i1, $i2) FROM t1
    }
  }] [list $result]
  set qstr '[string map {' ''} $string]'
  do_test substr-$id.2 [subst {
    execsql {
      SELECT substr($qstr, $i1, $i2)
    }
  }] [list $result]
}
proc subblob-test {id hex i1 i2 hexresult} {
  db eval "
    DELETE FROM t1;
    INSERT INTO t1(b) VALUES(x'$hex')
  "
  do_test substr-$id.1 [subst {
    execsql {
      SELECT hex(substr(b, $i1, $i2)) FROM t1
    }
  }] [list $hexresult]
  do_test substr-$id.2 [subst {
    execsql {
      SELECT hex(substr(x'$hex', $i1, $i2))
    }
  }] [list $hexresult]
}

# Basic SUBSTR functionality
#
substr-test 1.1 abcdefg 1 1 a
substr-test 1.2 abcdefg 2 1 b
substr-test 1.3 abcdefg 1 2 ab
substr-test 1.4 abcdefg 1 100 abcdefg
substr-test 1.5 abcdefg 0 1 a
substr-test 1.6 abcdefg -1 1 g
substr-test 1.7 abcdefg -1 10 g
substr-test 1.8 abcdefg -5 3 cde
substr-test 1.9 abcdefg -7 3 abc
substr-test 1.10 abcdefg -100 98 abcde

# Make sure everything works with long unicode characters
#
substr-test 2.1 \u1234\u2345\u3456 1 1 \u1234
substr-test 2.2 \u1234\u2345\u3456 2 1 \u2345
substr-test 2.3 \u1234\u2345\u3456 1 2 \u1234\u2345
substr-test 2.4 \u1234\u2345\u3456 -1 1 \u3456
substr-test 2.5 a\u1234b\u2345c\u3456c -5 3 b\u2345c

# Basic functionality for BLOBs
#
subblob-test 3.1 61626364656667 1 1 61
subblob-test 3.2 61626364656667 2 1 62
subblob-test 3.3 61626364656667 1 2 6162
subblob-test 3.4 61626364656667 1 100 61626364656667
subblob-test 3.5 61626364656667 0 1 61
subblob-test 3.6 61626364656667 -1 1 67
subblob-test 3.7 61626364656667 -1 10 67
subblob-test 3.8 61626364656667 -5 3 636465
subblob-test 3.9 61626364656667 -7 3 616263
subblob-test 3.10 61626364656667 -100 98 6162636465

# If these blobs were strings, then they would contain multi-byte
# characters.  But since they are blobs, the substr indices refer
# to bytes.
#
subblob-test 4.1 61E188B462E28D8563E3919663 1 1 61
subblob-test 4.2 61E188B462E28D8563E3919663 2 1 E1
subblob-test 4.3 61E188B462E28D8563E3919663 1 2 61E1
subblob-test 4.4 61E188B462E28D8563E3919663 -2 1 96
subblob-test 4.5 61E188B462E28D8563E3919663 -5 4 63E39196
subblob-test 4.6 61E188B462E28D8563E3919663 -100 98 61E188B462E28D8563E391 

finish_test
Changes to www/lang.tcl.
1
2
3
4
5
6
7
8
9
10
11
#
# Run this Tcl script to generate the lang-*.html files.
#
set rcsid {$Id: lang.tcl,v 1.128 2007/04/06 11:26:00 drh Exp $}
source common.tcl

if {[llength $argv]>0} {
  set outputdir [lindex $argv 0]
} else {
  set outputdir ""
}



|







1
2
3
4
5
6
7
8
9
10
11
#
# Run this Tcl script to generate the lang-*.html files.
#
set rcsid {$Id: lang.tcl,v 1.129 2007/05/15 01:13:47 drh Exp $}
source common.tcl

if {[llength $argv]>0} {
  set outputdir [lindex $argv 0]
} else {
  set outputdir ""
}
1451
1452
1453
1454
1455
1456
1457
1458
1459

1460
1461
1462
1463
1464
1465
1466

<tr>
<td valign="top" align="right">substr(<i>X</i>,<i>Y</i>,<i>Z</i>)</td>
<td valign="top">Return a substring of input string <i>X</i> that begins
with the <i>Y</i>-th character and which is <i>Z</i> characters long.
The left-most character of <i>X</i> is number 1.  If <i>Y</i> is negative
the the first character of the substring is found by counting from the
right rather than the left.  If SQLite is configured to support UTF-8,
then characters indices refer to actual UTF-8 characters, not bytes.</td>

</tr>

<tr>
<td valign="top" align="right">
<a name="trimFunc">
trim(<i>X</i>)<br>trim(<i>X</i>,<i>Y</i>)</td>
<td valign="top">Return a string formed by removing any and all







|
|
>







1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467

<tr>
<td valign="top" align="right">substr(<i>X</i>,<i>Y</i>,<i>Z</i>)</td>
<td valign="top">Return a substring of input string <i>X</i> that begins
with the <i>Y</i>-th character and which is <i>Z</i> characters long.
The left-most character of <i>X</i> is number 1.  If <i>Y</i> is negative
the the first character of the substring is found by counting from the
right rather than the left.  If <i>X</i> is string
then characters indices refer to actual UTF-8 characters.  If
<i>X</i> is a BLOB then the indices refer to bytes.</td>
</tr>

<tr>
<td valign="top" align="right">
<a name="trimFunc">
trim(<i>X</i>)<br>trim(<i>X</i>,<i>Y</i>)</td>
<td valign="top">Return a string formed by removing any and all