SQLite

Check-in [786fe54556]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Make sqlite_encode_binary() and sqlite_decode_binary() an official part of the library. (CVS 1295)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 786fe545560ec6c42bb0e344345031f425bf177a
User & Date: drh 2004-03-14 22:12:00.000
Context
2004-03-14
22:12
Prototypes for sqlite_encode_binary() and sqlite_decode_binary() added to sqlite.h. (CVS 1296) (check-in: 359f0e787f user: drh tags: trunk)
22:12
Make sqlite_encode_binary() and sqlite_decode_binary() an official part of the library. (CVS 1295) (check-in: 786fe54556 user: drh tags: trunk)
11:57
Updates to the architecture document. (CVS 1294) (check-in: c661cc81b6 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to main.mk.
68
69
70
71
72
73
74

75
76
77
78
79
80
81
  $(TOP)/src/btree.c \
  $(TOP)/src/btree.h \
  $(TOP)/src/btree_rb.c \
  $(TOP)/src/build.c \
  $(TOP)/src/copy.c \
  $(TOP)/src/date.c \
  $(TOP)/src/delete.c \

  $(TOP)/src/expr.c \
  $(TOP)/src/func.c \
  $(TOP)/src/hash.c \
  $(TOP)/src/hash.h \
  $(TOP)/src/insert.c \
  $(TOP)/src/main.c \
  $(TOP)/src/os.c \







>







68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  $(TOP)/src/btree.c \
  $(TOP)/src/btree.h \
  $(TOP)/src/btree_rb.c \
  $(TOP)/src/build.c \
  $(TOP)/src/copy.c \
  $(TOP)/src/date.c \
  $(TOP)/src/delete.c \
  $(TOP)/src/encode.c \
  $(TOP)/src/expr.c \
  $(TOP)/src/func.c \
  $(TOP)/src/hash.c \
  $(TOP)/src/hash.h \
  $(TOP)/src/insert.c \
  $(TOP)/src/main.c \
  $(TOP)/src/os.c \
264
265
266
267
268
269
270



271
272
273
274
275
276
277

date.o:	$(TOP)/src/date.c $(HDR)
	$(TCCX) -c $(TOP)/src/date.c

delete.o:	$(TOP)/src/delete.c $(HDR)
	$(TCCX) -c $(TOP)/src/delete.c




expr.o:	$(TOP)/src/expr.c $(HDR)
	$(TCCX) -c $(TOP)/src/expr.c

func.o:	$(TOP)/src/func.c $(HDR)
	$(TCCX) -c $(TOP)/src/func.c

hash.o:	$(TOP)/src/hash.c $(HDR)







>
>
>







265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281

date.o:	$(TOP)/src/date.c $(HDR)
	$(TCCX) -c $(TOP)/src/date.c

delete.o:	$(TOP)/src/delete.c $(HDR)
	$(TCCX) -c $(TOP)/src/delete.c

encode.o:	$(TOP)/src/encode.c
	$(TCCX) -c $(TOP)/src/encode.c

expr.o:	$(TOP)/src/expr.c $(HDR)
	$(TCCX) -c $(TOP)/src/expr.c

func.o:	$(TOP)/src/func.c $(HDR)
	$(TCCX) -c $(TOP)/src/func.c

hash.o:	$(TOP)/src/hash.c $(HDR)
Changes to publish.sh.
118
119
120
121
122
123
124


125
126
127
128
129
130
131
sqlite_trace
sqlite_compile
sqlite_step
sqlite_finalize
sqlite_reset
sqlite_bind
sqlite_last_statement_changes


END_OF_FILE
i386-mingw32msvc-dllwrap \
     --def sqlite.def -v --export-all \
     --driver-name i386-mingw32msvc-gcc \
     --dlltool-name i386-mingw32msvc-dlltool \
     --as i386-mingw32msvc-as \
     --target i386-mingw32 \







>
>







118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
sqlite_trace
sqlite_compile
sqlite_step
sqlite_finalize
sqlite_reset
sqlite_bind
sqlite_last_statement_changes
sqlite_encode_binary
sqlite_decode_binary
END_OF_FILE
i386-mingw32msvc-dllwrap \
     --def sqlite.def -v --export-all \
     --driver-name i386-mingw32msvc-gcc \
     --dlltool-name i386-mingw32msvc-dlltool \
     --as i386-mingw32msvc-as \
     --target i386-mingw32 \
Changes to src/encode.c.
11
12
13
14
15
16
17
18
19
20

21
22
23
24
25
26
27
*************************************************************************
** This file contains helper routines used to translate binary data into
** a null-terminated string (suitable for use in SQLite) and back again.
** These are convenience routines for use by people who want to store binary
** data in an SQLite database.  The code in this file is not used by any other
** part of the SQLite library.
**
** $Id: encode.c,v 1.10 2004/01/14 21:59:23 drh Exp $
*/
#include <string.h>


/*
** How This Encoder Works
**
** The output is allowed to contain any character except 0x27 (') and
** 0x00.  This is accomplished by using an escape character to encode
** 0x27 and 0x00 as a two-byte sequence.  The escape character is always







|


>







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
*************************************************************************
** This file contains helper routines used to translate binary data into
** a null-terminated string (suitable for use in SQLite) and back again.
** These are convenience routines for use by people who want to store binary
** data in an SQLite database.  The code in this file is not used by any other
** part of the SQLite library.
**
** $Id: encode.c,v 1.11 2004/03/14 22:12:00 drh Exp $
*/
#include <string.h>
#include <assert.h>

/*
** How This Encoder Works
**
** The output is allowed to contain any character except 0x27 (') and
** 0x00.  This is accomplished by using an escape character to encode
** 0x27 and 0x00 as a two-byte sequence.  The escape character is always
110
111
112
113
114
115
116




117
118
119
120
121

122
123

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

220
221
222




223
224
225
226
227
228
229
** "out" must be able to hold at least 2 +(257*n)/254 bytes.
** In other words, the output will be expanded by as much as 3
** bytes for every 254 bytes of input plus 2 bytes of fixed overhead.
** (This is approximately 2 + 1.0118*n or about a 1.2% size increase.)
**
** The return value is the number of characters in the encoded
** string, excluding the "\000" terminator.




*/
int sqlite_encode_binary(const unsigned char *in, int n, unsigned char *out){
  int i, j, e, m;
  int cnt[256];
  if( n<=0 ){

    out[0] = 'x';
    out[1] = 0;

    return 1;
  }
  memset(cnt, 0, sizeof(cnt));
  for(i=n-1; i>=0; i--){ cnt[in[i]]++; }
  m = n;
  for(i=1; i<256; i++){
    int sum;
    if( i=='\'' ) continue;
    sum = cnt[i] + cnt[(i+1)&0xff] + cnt[(i+'\'')&0xff];
    if( sum<m ){
      m = sum;
      e = i;
      if( m==0 ) break;
    }
  }



  out[0] = e;
  j = 1;
  for(i=0; i<n; i++){
    int c = (in[i] - e)&0xff;
    if( c==0 ){
      out[j++] = 1;
      out[j++] = 1;
    }else if( c==1 ){
      out[j++] = 1;
      out[j++] = 2;
    }else if( c=='\'' ){
      out[j++] = 1;
      out[j++] = 3;
    }else{
      out[j++] = c;
    }
  }
  out[j] = 0;

  return j;
}

/*
** Decode the string "in" into binary data and write it into "out".
** This routine reverses the encoding created by sqlite_encode_binary().
** The output will always be a few bytes less than the input.  The number
** of bytes of output is returned.  If the input is not a well-formed
** encoding, -1 is returned.
**
** The "in" and "out" parameters may point to the same buffer in order
** to decode a string in place.
*/
int sqlite_decode_binary(const unsigned char *in, unsigned char *out){
  int i, c, e;
  e = *(in++);
  i = 0;
  while( (c = *(in++))!=0 ){
    if( c==1 ){
      c = *(in++);
      if( c==1 ){
        c = 0;
      }else if( c==2 ){
        c = 1;
      }else if( c==3 ){
        c = '\'';
      }else{
        return -1;
      }
    }
    out[i++] = (c + e)&0xff;
  }
  return i;
}

#ifdef ENCODER_TEST
#include <stdio.h>
/*
** The subroutines above are not tested by the usual test suite.  To test
** these routines, compile just this one file with a -DENCODER_TEST=1 option
** and run the result.
*/
int main(int argc, char **argv){
  int i, j, n, m, nOut, nByte;
  unsigned char in[30000];
  unsigned char out[33000];

  nByte = 0;
  for(i=0; i<sizeof(in); i++){
    printf("Test %d: ", i+1);
    n = rand() % (i+1);
    if( i%100==0 ){
      int k;
      for(j=k=0; j<n; j++){
        /* if( k==0 || k=='\'' ) k++; */
        in[j] = k;
        k = (k+1)&0xff;
      }
    }else{
      for(j=0; j<n; j++) in[j] = rand() & 0xff;
    }
    nByte += n;
    nOut = sqlite_encode_binary(in, n, out);

    if( nOut!=strlen(out) ){
      printf(" ERROR return value is %d instead of %d\n", nOut, strlen(out));
      exit(1);




    }
    m = (256*n + 1262)/253;
    printf("size %d->%d (max %d)", n, strlen(out)+1, m);
    if( strlen(out)+1>m ){
      printf(" ERROR output too big\n");
      exit(1);
    }







>
>
>
>





>
|
|
>















>
>
>




|

|
<
<
<








>




















|
|
<
<



















|



|













|

>



>
>
>
>







111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186


187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
** "out" must be able to hold at least 2 +(257*n)/254 bytes.
** In other words, the output will be expanded by as much as 3
** bytes for every 254 bytes of input plus 2 bytes of fixed overhead.
** (This is approximately 2 + 1.0118*n or about a 1.2% size increase.)
**
** The return value is the number of characters in the encoded
** string, excluding the "\000" terminator.
**
** If out==NULL then no output is generated but the routine still returns
** the number of characters that would have been generated if out had
** not been NULL.
*/
int sqlite_encode_binary(const unsigned char *in, int n, unsigned char *out){
  int i, j, e, m;
  int cnt[256];
  if( n<=0 ){
    if( out ){
      out[0] = 'x';
      out[1] = 0;
    }
    return 1;
  }
  memset(cnt, 0, sizeof(cnt));
  for(i=n-1; i>=0; i--){ cnt[in[i]]++; }
  m = n;
  for(i=1; i<256; i++){
    int sum;
    if( i=='\'' ) continue;
    sum = cnt[i] + cnt[(i+1)&0xff] + cnt[(i+'\'')&0xff];
    if( sum<m ){
      m = sum;
      e = i;
      if( m==0 ) break;
    }
  }
  if( out==0 ){
    return n+m+1;
  }
  out[0] = e;
  j = 1;
  for(i=0; i<n; i++){
    int c = (in[i] - e)&0xff;
    if( c==0 || c==1 ){
      out[j++] = 1;
      out[j++] = c+1;



    }else if( c=='\'' ){
      out[j++] = 1;
      out[j++] = 3;
    }else{
      out[j++] = c;
    }
  }
  out[j] = 0;
  assert( j==n+m+1 );
  return j;
}

/*
** Decode the string "in" into binary data and write it into "out".
** This routine reverses the encoding created by sqlite_encode_binary().
** The output will always be a few bytes less than the input.  The number
** of bytes of output is returned.  If the input is not a well-formed
** encoding, -1 is returned.
**
** The "in" and "out" parameters may point to the same buffer in order
** to decode a string in place.
*/
int sqlite_decode_binary(const unsigned char *in, unsigned char *out){
  int i, c, e;
  e = *(in++);
  i = 0;
  while( (c = *(in++))!=0 ){
    if( c==1 ){
      c = *(in++);
      if( c==1 || c==2 ){
        c--;


      }else if( c==3 ){
        c = '\'';
      }else{
        return -1;
      }
    }
    out[i++] = (c + e)&0xff;
  }
  return i;
}

#ifdef ENCODER_TEST
#include <stdio.h>
/*
** The subroutines above are not tested by the usual test suite.  To test
** these routines, compile just this one file with a -DENCODER_TEST=1 option
** and run the result.
*/
int main(int argc, char **argv){
  int i, j, n, m, nOut, nByteIn, nByteOut;
  unsigned char in[30000];
  unsigned char out[33000];

  nByteIn = nByteOut = 0;
  for(i=0; i<sizeof(in); i++){
    printf("Test %d: ", i+1);
    n = rand() % (i+1);
    if( i%100==0 ){
      int k;
      for(j=k=0; j<n; j++){
        /* if( k==0 || k=='\'' ) k++; */
        in[j] = k;
        k = (k+1)&0xff;
      }
    }else{
      for(j=0; j<n; j++) in[j] = rand() & 0xff;
    }
    nByteIn += n;
    nOut = sqlite_encode_binary(in, n, out);
    nByteOut += nOut;
    if( nOut!=strlen(out) ){
      printf(" ERROR return value is %d instead of %d\n", nOut, strlen(out));
      exit(1);
    }
    if( nOut!=sqlite_encode_binary(in, n, 0) ){
      printf(" ERROR actual output size disagrees with predicted size\n");
      exit(1);
    }
    m = (256*n + 1262)/253;
    printf("size %d->%d (max %d)", n, strlen(out)+1, m);
    if( strlen(out)+1>m ){
      printf(" ERROR output too big\n");
      exit(1);
    }
240
241
242
243
244
245
246
247



248
249
    }
    if( memcmp(in, out, n)!=0 ){
      printf(" ERROR decode mismatch\n");
      exit(1);
    }
    printf(" OK\n");
  }
  fprintf(stderr, "Finished.  Total encoding: %d bytes\n", nByte);



}
#endif /* ENCODER_TEST */







|
>
>
>


251
252
253
254
255
256
257
258
259
260
261
262
263
    }
    if( memcmp(in, out, n)!=0 ){
      printf(" ERROR decode mismatch\n");
      exit(1);
    }
    printf(" OK\n");
  }
  fprintf(stderr,"Finished.  Total encoding: %d->%d bytes\n",
          nByteIn, nByteOut);
  fprintf(stderr,"Avg size increase: %.3f%%\n",
    (nByteOut-nByteIn)*100.0/(double)nByteIn);
}
#endif /* ENCODER_TEST */