/ Check-in [b80cafa6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the fossildelta.c extension in ext/misc with implementations of the Fossil delta functions.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: b80cafa6f8a5c6ff1dc9efd2f670777ab131ace2df1eb431cedc8cfa901baf18
User & Date: drh 2019-02-19 18:39:16
Context
2019-02-19
20:19
Add the delta_parse(DELTA) table-valued function to the fossildelta extension. check-in: d91fcc26 user: drh tags: trunk
18:39
Add the fossildelta.c extension in ext/misc with implementations of the Fossil delta functions. check-in: b80cafa6 user: drh tags: trunk
17:45
Fix a potential memory leak in RBU if the rbu_fossil_delta() SQL function is misused. Misuse never happens in a working RBU system, so this is not a particularly important fix. check-in: 12517d1b user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Show Whitespace Changes Patch

Added ext/misc/fossildelta.c.

            1  +/*
            2  +** 2019-02-19
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +******************************************************************************
           12  +**
           13  +** This SQLite extension implements the delta functions used by Fossil.
           14  +*/
           15  +#include <string.h>
           16  +#include <assert.h>
           17  +#include <stdlib.h>
           18  +#include "sqlite3ext.h"
           19  +SQLITE_EXTENSION_INIT1
           20  +
           21  +/*
           22  +** The "u32" type must be an unsigned 32-bit integer.  Adjust this
           23  +*/
           24  +typedef unsigned int u32;
           25  +
           26  +/*
           27  +** Must be a 16-bit value
           28  +*/
           29  +typedef short int s16;
           30  +typedef unsigned short int u16;
           31  +
           32  +
           33  +/*
           34  +** The width of a hash window in bytes.  The algorithm only works if this
           35  +** is a power of 2.
           36  +*/
           37  +#define NHASH 16
           38  +
           39  +/*
           40  +** The current state of the rolling hash.
           41  +**
           42  +** z[] holds the values that have been hashed.  z[] is a circular buffer.
           43  +** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
           44  +** the window.
           45  +**
           46  +** Hash.a is the sum of all elements of hash.z[].  Hash.b is a weighted
           47  +** sum.  Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1.
           48  +** (Each index for z[] should be module NHASH, of course.  The %NHASH operator
           49  +** is omitted in the prior expression for brevity.)
           50  +*/
           51  +typedef struct hash hash;
           52  +struct hash {
           53  +  u16 a, b;         /* Hash values */
           54  +  u16 i;            /* Start of the hash window */
           55  +  char z[NHASH];    /* The values that have been hashed */
           56  +};
           57  +
           58  +/*
           59  +** Initialize the rolling hash using the first NHASH characters of z[]
           60  +*/
           61  +static void hash_init(hash *pHash, const char *z){
           62  +  u16 a, b, i;
           63  +  a = b = z[0];
           64  +  for(i=1; i<NHASH; i++){
           65  +    a += z[i];
           66  +    b += a;
           67  +  }
           68  +  memcpy(pHash->z, z, NHASH);
           69  +  pHash->a = a & 0xffff;
           70  +  pHash->b = b & 0xffff;
           71  +  pHash->i = 0;
           72  +}
           73  +
           74  +/*
           75  +** Advance the rolling hash by a single character "c"
           76  +*/
           77  +static void hash_next(hash *pHash, int c){
           78  +  u16 old = pHash->z[pHash->i];
           79  +  pHash->z[pHash->i] = c;
           80  +  pHash->i = (pHash->i+1)&(NHASH-1);
           81  +  pHash->a = pHash->a - old + c;
           82  +  pHash->b = pHash->b - NHASH*old + pHash->a;
           83  +}
           84  +
           85  +/*
           86  +** Return a 32-bit hash value
           87  +*/
           88  +static u32 hash_32bit(hash *pHash){
           89  +  return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16);
           90  +}
           91  +
           92  +/*
           93  +** Compute a hash on NHASH bytes.
           94  +**
           95  +** This routine is intended to be equivalent to:
           96  +**    hash h;
           97  +**    hash_init(&h, zInput);
           98  +**    return hash_32bit(&h);
           99  +*/
          100  +static u32 hash_once(const char *z){
          101  +  u16 a, b, i;
          102  +  a = b = z[0];
          103  +  for(i=1; i<NHASH; i++){
          104  +    a += z[i];
          105  +    b += a;
          106  +  }
          107  +  return a | (((u32)b)<<16);
          108  +}
          109  +
          110  +/*
          111  +** Write an base-64 integer into the given buffer.
          112  +*/
          113  +static void putInt(unsigned int v, char **pz){
          114  +  static const char zDigits[] =
          115  +    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~";
          116  +  /*  123456789 123456789 123456789 123456789 123456789 123456789 123 */
          117  +  int i, j;
          118  +  char zBuf[20];
          119  +  if( v==0 ){
          120  +    *(*pz)++ = '0';
          121  +    return;
          122  +  }
          123  +  for(i=0; v>0; i++, v>>=6){
          124  +    zBuf[i] = zDigits[v&0x3f];
          125  +  }
          126  +  for(j=i-1; j>=0; j--){
          127  +    *(*pz)++ = zBuf[j];
          128  +  }
          129  +}
          130  +
          131  +/*
          132  +** Read bytes from *pz and convert them into a positive integer.  When
          133  +** finished, leave *pz pointing to the first character past the end of
          134  +** the integer.  The *pLen parameter holds the length of the string
          135  +** in *pz and is decremented once for each character in the integer.
          136  +*/
          137  +static unsigned int getInt(const char **pz, int *pLen){
          138  +  static const signed char zValue[] = {
          139  +    -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
          140  +    -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
          141  +    -1, -1, -1, -1, -1, -1, -1, -1,   -1, -1, -1, -1, -1, -1, -1, -1,
          142  +     0,  1,  2,  3,  4,  5,  6,  7,    8,  9, -1, -1, -1, -1, -1, -1,
          143  +    -1, 10, 11, 12, 13, 14, 15, 16,   17, 18, 19, 20, 21, 22, 23, 24,
          144  +    25, 26, 27, 28, 29, 30, 31, 32,   33, 34, 35, -1, -1, -1, -1, 36,
          145  +    -1, 37, 38, 39, 40, 41, 42, 43,   44, 45, 46, 47, 48, 49, 50, 51,
          146  +    52, 53, 54, 55, 56, 57, 58, 59,   60, 61, 62, -1, -1, -1, 63, -1,
          147  +  };
          148  +  unsigned int v = 0;
          149  +  int c;
          150  +  unsigned char *z = (unsigned char*)*pz;
          151  +  unsigned char *zStart = z;
          152  +  while( (c = zValue[0x7f&*(z++)])>=0 ){
          153  +     v = (v<<6) + c;
          154  +  }
          155  +  z--;
          156  +  *pLen -= z - zStart;
          157  +  *pz = (char*)z;
          158  +  return v;
          159  +}
          160  +
          161  +/*
          162  +** Return the number digits in the base-64 representation of a positive integer
          163  +*/
          164  +static int digit_count(int v){
          165  +  unsigned int i, x;
          166  +  for(i=1, x=64; v>=x; i++, x <<= 6){}
          167  +  return i;
          168  +}
          169  +
          170  +#ifdef __GNUC__
          171  +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
          172  +#else
          173  +# define GCC_VERSION 0
          174  +#endif
          175  +
          176  +/*
          177  +** Compute a 32-bit big-endian checksum on the N-byte buffer.  If the
          178  +** buffer is not a multiple of 4 bytes length, compute the sum that would
          179  +** have occurred if the buffer was padded with zeros to the next multiple
          180  +** of four bytes.
          181  +*/
          182  +static unsigned int checksum(const char *zIn, size_t N){
          183  +  static const int byteOrderTest = 1;
          184  +  const unsigned char *z = (const unsigned char *)zIn;
          185  +  const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3];
          186  +  unsigned sum = 0;
          187  +  assert( (z - (const unsigned char*)0)%4==0 );  /* Four-byte alignment */
          188  +  if( 0==*(char*)&byteOrderTest ){
          189  +    /* This is a big-endian machine */
          190  +    while( z<zEnd ){
          191  +      sum += *(unsigned*)z;
          192  +      z += 4;
          193  +    }
          194  +  }else{
          195  +    /* A little-endian machine */
          196  +#if GCC_VERSION>=4003000
          197  +    while( z<zEnd ){
          198  +      sum += __builtin_bswap32(*(unsigned*)z);
          199  +      z += 4;
          200  +    }
          201  +#elif defined(_MSC_VER) && _MSC_VER>=1300
          202  +    while( z<zEnd ){
          203  +      sum += _byteswap_ulong(*(unsigned*)z);
          204  +      z += 4;
          205  +    }
          206  +#else
          207  +    unsigned sum0 = 0;
          208  +    unsigned sum1 = 0;
          209  +    unsigned sum2 = 0;
          210  +    while(N >= 16){
          211  +      sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]);
          212  +      sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]);
          213  +      sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]);
          214  +      sum  += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
          215  +      z += 16;
          216  +      N -= 16;
          217  +    }
          218  +    while(N >= 4){
          219  +      sum0 += z[0];
          220  +      sum1 += z[1];
          221  +      sum2 += z[2];
          222  +      sum  += z[3];
          223  +      z += 4;
          224  +      N -= 4;
          225  +    }
          226  +    sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24);
          227  +#endif
          228  +  }
          229  +  switch(N&3){
          230  +    case 3:   sum += (z[2] << 8);
          231  +    case 2:   sum += (z[1] << 16);
          232  +    case 1:   sum += (z[0] << 24);
          233  +    default:  ;
          234  +  }
          235  +  return sum;
          236  +}
          237  +
          238  +/*
          239  +** Create a new delta.
          240  +**
          241  +** The delta is written into a preallocated buffer, zDelta, which
          242  +** should be at least 60 bytes longer than the target file, zOut.
          243  +** The delta string will be NUL-terminated, but it might also contain
          244  +** embedded NUL characters if either the zSrc or zOut files are
          245  +** binary.  This function returns the length of the delta string
          246  +** in bytes, excluding the final NUL terminator character.
          247  +**
          248  +** Output Format:
          249  +**
          250  +** The delta begins with a base64 number followed by a newline.  This
          251  +** number is the number of bytes in the TARGET file.  Thus, given a
          252  +** delta file z, a program can compute the size of the output file
          253  +** simply by reading the first line and decoding the base-64 number
          254  +** found there.  The delta_output_size() routine does exactly this.
          255  +**
          256  +** After the initial size number, the delta consists of a series of
          257  +** literal text segments and commands to copy from the SOURCE file.
          258  +** A copy command looks like this:
          259  +**
          260  +**     NNN@MMM,
          261  +**
          262  +** where NNN is the number of bytes to be copied and MMM is the offset
          263  +** into the source file of the first byte (both base-64).   If NNN is 0
          264  +** it means copy the rest of the input file.  Literal text is like this:
          265  +**
          266  +**     NNN:TTTTT
          267  +**
          268  +** where NNN is the number of bytes of text (base-64) and TTTTT is the text.
          269  +**
          270  +** The last term is of the form
          271  +**
          272  +**     NNN;
          273  +**
          274  +** In this case, NNN is a 32-bit bigendian checksum of the output file
          275  +** that can be used to verify that the delta applied correctly.  All
          276  +** numbers are in base-64.
          277  +**
          278  +** Pure text files generate a pure text delta.  Binary files generate a
          279  +** delta that may contain some binary data.
          280  +**
          281  +** Algorithm:
          282  +**
          283  +** The encoder first builds a hash table to help it find matching
          284  +** patterns in the source file.  16-byte chunks of the source file
          285  +** sampled at evenly spaced intervals are used to populate the hash
          286  +** table.
          287  +**
          288  +** Next we begin scanning the target file using a sliding 16-byte
          289  +** window.  The hash of the 16-byte window in the target is used to
          290  +** search for a matching section in the source file.  When a match
          291  +** is found, a copy command is added to the delta.  An effort is
          292  +** made to extend the matching section to regions that come before
          293  +** and after the 16-byte hash window.  A copy command is only issued
          294  +** if the result would use less space that just quoting the text
          295  +** literally. Literal text is added to the delta for sections that
          296  +** do not match or which can not be encoded efficiently using copy
          297  +** commands.
          298  +*/
          299  +static int delta_create(
          300  +  const char *zSrc,      /* The source or pattern file */
          301  +  unsigned int lenSrc,   /* Length of the source file */
          302  +  const char *zOut,      /* The target file */
          303  +  unsigned int lenOut,   /* Length of the target file */
          304  +  char *zDelta           /* Write the delta into this buffer */
          305  +){
          306  +  int i, base;
          307  +  char *zOrigDelta = zDelta;
          308  +  hash h;
          309  +  int nHash;                 /* Number of hash table entries */
          310  +  int *landmark;             /* Primary hash table */
          311  +  int *collide;              /* Collision chain */
          312  +  int lastRead = -1;         /* Last byte of zSrc read by a COPY command */
          313  +
          314  +  /* Add the target file size to the beginning of the delta
          315  +  */
          316  +  putInt(lenOut, &zDelta);
          317  +  *(zDelta++) = '\n';
          318  +
          319  +  /* If the source file is very small, it means that we have no
          320  +  ** chance of ever doing a copy command.  Just output a single
          321  +  ** literal segment for the entire target and exit.
          322  +  */
          323  +  if( lenSrc<=NHASH ){
          324  +    putInt(lenOut, &zDelta);
          325  +    *(zDelta++) = ':';
          326  +    memcpy(zDelta, zOut, lenOut);
          327  +    zDelta += lenOut;
          328  +    putInt(checksum(zOut, lenOut), &zDelta);
          329  +    *(zDelta++) = ';';
          330  +    return zDelta - zOrigDelta;
          331  +  }
          332  +
          333  +  /* Compute the hash table used to locate matching sections in the
          334  +  ** source file.
          335  +  */
          336  +  nHash = lenSrc/NHASH;
          337  +  collide = sqlite3_malloc64( (sqlite3_int64)nHash*2*sizeof(int) );
          338  +  memset(collide, -1, nHash*2*sizeof(int));
          339  +  landmark = &collide[nHash];
          340  +  for(i=0; i<lenSrc-NHASH; i+=NHASH){
          341  +    int hv = hash_once(&zSrc[i]) % nHash;
          342  +    collide[i/NHASH] = landmark[hv];
          343  +    landmark[hv] = i/NHASH;
          344  +  }
          345  +
          346  +  /* Begin scanning the target file and generating copy commands and
          347  +  ** literal sections of the delta.
          348  +  */
          349  +  base = 0;    /* We have already generated everything before zOut[base] */
          350  +  while( base+NHASH<lenOut ){
          351  +    int iSrc, iBlock;
          352  +    unsigned int bestCnt, bestOfst=0, bestLitsz=0;
          353  +    hash_init(&h, &zOut[base]);
          354  +    i = 0;     /* Trying to match a landmark against zOut[base+i] */
          355  +    bestCnt = 0;
          356  +    while( 1 ){
          357  +      int hv;
          358  +      int limit = 250;
          359  +
          360  +      hv = hash_32bit(&h) % nHash;
          361  +      iBlock = landmark[hv];
          362  +      while( iBlock>=0 && (limit--)>0 ){
          363  +        /*
          364  +        ** The hash window has identified a potential match against
          365  +        ** landmark block iBlock.  But we need to investigate further.
          366  +        **
          367  +        ** Look for a region in zOut that matches zSrc. Anchor the search
          368  +        ** at zSrc[iSrc] and zOut[base+i].  Do not include anything prior to
          369  +        ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
          370  +        **
          371  +        ** Set cnt equal to the length of the match and set ofst so that
          372  +        ** zSrc[ofst] is the first element of the match.  litsz is the number
          373  +        ** of characters between zOut[base] and the beginning of the match.
          374  +        ** sz will be the overhead (in bytes) needed to encode the copy
          375  +        ** command.  Only generate copy command if the overhead of the
          376  +        ** copy command is less than the amount of literal text to be copied.
          377  +        */
          378  +        int cnt, ofst, litsz;
          379  +        int j, k, x, y;
          380  +        int sz;
          381  +        int limitX;
          382  +
          383  +        /* Beginning at iSrc, match forwards as far as we can.  j counts
          384  +        ** the number of characters that match */
          385  +        iSrc = iBlock*NHASH;
          386  +        y = base+i;
          387  +        limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y;
          388  +        for(x=iSrc; x<limitX; x++, y++){
          389  +          if( zSrc[x]!=zOut[y] ) break;
          390  +        }
          391  +        j = x - iSrc - 1;
          392  +
          393  +        /* Beginning at iSrc-1, match backwards as far as we can.  k counts
          394  +        ** the number of characters that match */
          395  +        for(k=1; k<iSrc && k<=i; k++){
          396  +          if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
          397  +        }
          398  +        k--;
          399  +
          400  +        /* Compute the offset and size of the matching region */
          401  +        ofst = iSrc-k;
          402  +        cnt = j+k+1;
          403  +        litsz = i-k;  /* Number of bytes of literal text before the copy */
          404  +        /* sz will hold the number of bytes needed to encode the "insert"
          405  +        ** command and the copy command, not counting the "insert" text */
          406  +        sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
          407  +        if( cnt>=sz && cnt>bestCnt ){
          408  +          /* Remember this match only if it is the best so far and it
          409  +          ** does not increase the file size */
          410  +          bestCnt = cnt;
          411  +          bestOfst = iSrc-k;
          412  +          bestLitsz = litsz;
          413  +        }
          414  +
          415  +        /* Check the next matching block */
          416  +        iBlock = collide[iBlock];
          417  +      }
          418  +
          419  +      /* We have a copy command that does not cause the delta to be larger
          420  +      ** than a literal insert.  So add the copy command to the delta.
          421  +      */
          422  +      if( bestCnt>0 ){
          423  +        if( bestLitsz>0 ){
          424  +          /* Add an insert command before the copy */
          425  +          putInt(bestLitsz,&zDelta);
          426  +          *(zDelta++) = ':';
          427  +          memcpy(zDelta, &zOut[base], bestLitsz);
          428  +          zDelta += bestLitsz;
          429  +          base += bestLitsz;
          430  +        }
          431  +        base += bestCnt;
          432  +        putInt(bestCnt, &zDelta);
          433  +        *(zDelta++) = '@';
          434  +        putInt(bestOfst, &zDelta);
          435  +        *(zDelta++) = ',';
          436  +        if( bestOfst + bestCnt -1 > lastRead ){
          437  +          lastRead = bestOfst + bestCnt - 1;
          438  +        }
          439  +        bestCnt = 0;
          440  +        break;
          441  +      }
          442  +
          443  +      /* If we reach this point, it means no match is found so far */
          444  +      if( base+i+NHASH>=lenOut ){
          445  +        /* We have reached the end of the file and have not found any
          446  +        ** matches.  Do an "insert" for everything that does not match */
          447  +        putInt(lenOut-base, &zDelta);
          448  +        *(zDelta++) = ':';
          449  +        memcpy(zDelta, &zOut[base], lenOut-base);
          450  +        zDelta += lenOut-base;
          451  +        base = lenOut;
          452  +        break;
          453  +      }
          454  +
          455  +      /* Advance the hash by one character.  Keep looking for a match */
          456  +      hash_next(&h, zOut[base+i+NHASH]);
          457  +      i++;
          458  +    }
          459  +  }
          460  +  /* Output a final "insert" record to get all the text at the end of
          461  +  ** the file that does not match anything in the source file.
          462  +  */
          463  +  if( base<lenOut ){
          464  +    putInt(lenOut-base, &zDelta);
          465  +    *(zDelta++) = ':';
          466  +    memcpy(zDelta, &zOut[base], lenOut-base);
          467  +    zDelta += lenOut-base;
          468  +  }
          469  +  /* Output the final checksum record. */
          470  +  putInt(checksum(zOut, lenOut), &zDelta);
          471  +  *(zDelta++) = ';';
          472  +  sqlite3_free(collide);
          473  +  return zDelta - zOrigDelta;
          474  +}
          475  +
          476  +/*
          477  +** Return the size (in bytes) of the output from applying
          478  +** a delta.
          479  +**
          480  +** This routine is provided so that an procedure that is able
          481  +** to call delta_apply() can learn how much space is required
          482  +** for the output and hence allocate nor more space that is really
          483  +** needed.
          484  +*/
          485  +static int delta_output_size(const char *zDelta, int lenDelta){
          486  +  int size;
          487  +  size = getInt(&zDelta, &lenDelta);
          488  +  if( *zDelta!='\n' ){
          489  +    /* ERROR: size integer not terminated by "\n" */
          490  +    return -1;
          491  +  }
          492  +  return size;
          493  +}
          494  +
          495  +
          496  +/*
          497  +** Apply a delta.
          498  +**
          499  +** The output buffer should be big enough to hold the whole output
          500  +** file and a NUL terminator at the end.  The delta_output_size()
          501  +** routine will determine this size for you.
          502  +**
          503  +** The delta string should be null-terminated.  But the delta string
          504  +** may contain embedded NUL characters (if the input and output are
          505  +** binary files) so we also have to pass in the length of the delta in
          506  +** the lenDelta parameter.
          507  +**
          508  +** This function returns the size of the output file in bytes (excluding
          509  +** the final NUL terminator character).  Except, if the delta string is
          510  +** malformed or intended for use with a source file other than zSrc,
          511  +** then this routine returns -1.
          512  +**
          513  +** Refer to the delta_create() documentation above for a description
          514  +** of the delta file format.
          515  +*/
          516  +static int delta_apply(
          517  +  const char *zSrc,      /* The source or pattern file */
          518  +  int lenSrc,            /* Length of the source file */
          519  +  const char *zDelta,    /* Delta to apply to the pattern */
          520  +  int lenDelta,          /* Length of the delta */
          521  +  char *zOut             /* Write the output into this preallocated buffer */
          522  +){
          523  +  unsigned int limit;
          524  +  unsigned int total = 0;
          525  +#ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
          526  +  char *zOrigOut = zOut;
          527  +#endif
          528  +
          529  +  limit = getInt(&zDelta, &lenDelta);
          530  +  if( *zDelta!='\n' ){
          531  +    /* ERROR: size integer not terminated by "\n" */
          532  +    return -1;
          533  +  }
          534  +  zDelta++; lenDelta--;
          535  +  while( *zDelta && lenDelta>0 ){
          536  +    unsigned int cnt, ofst;
          537  +    cnt = getInt(&zDelta, &lenDelta);
          538  +    switch( zDelta[0] ){
          539  +      case '@': {
          540  +        zDelta++; lenDelta--;
          541  +        ofst = getInt(&zDelta, &lenDelta);
          542  +        if( lenDelta>0 && zDelta[0]!=',' ){
          543  +          /* ERROR: copy command not terminated by ',' */
          544  +          return -1;
          545  +        }
          546  +        zDelta++; lenDelta--;
          547  +        total += cnt;
          548  +        if( total>limit ){
          549  +          /* ERROR: copy exceeds output file size */
          550  +          return -1;
          551  +        }
          552  +        if( ofst+cnt > lenSrc ){
          553  +          /* ERROR: copy extends past end of input */
          554  +          return -1;
          555  +        }
          556  +        memcpy(zOut, &zSrc[ofst], cnt);
          557  +        zOut += cnt;
          558  +        break;
          559  +      }
          560  +      case ':': {
          561  +        zDelta++; lenDelta--;
          562  +        total += cnt;
          563  +        if( total>limit ){
          564  +          /* ERROR:  insert command gives an output larger than predicted */
          565  +          return -1;
          566  +        }
          567  +        if( cnt>lenDelta ){
          568  +          /* ERROR: insert count exceeds size of delta */
          569  +          return -1;
          570  +        }
          571  +        memcpy(zOut, zDelta, cnt);
          572  +        zOut += cnt;
          573  +        zDelta += cnt;
          574  +        lenDelta -= cnt;
          575  +        break;
          576  +      }
          577  +      case ';': {
          578  +        zDelta++; lenDelta--;
          579  +        zOut[0] = 0;
          580  +#ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
          581  +        if( cnt!=checksum(zOrigOut, total) ){
          582  +          /* ERROR:  bad checksum */
          583  +          return -1;
          584  +        }
          585  +#endif
          586  +        if( total!=limit ){
          587  +          /* ERROR: generated size does not match predicted size */
          588  +          return -1;
          589  +        }
          590  +        return total;
          591  +      }
          592  +      default: {
          593  +        /* ERROR: unknown delta operator */
          594  +        return -1;
          595  +      }
          596  +    }
          597  +  }
          598  +  /* ERROR: unterminated delta */
          599  +  return -1;
          600  +}
          601  +
          602  +/*
          603  +** Analyze a delta.  Figure out the total number of bytes copied from
          604  +** source to target, and the total number of bytes inserted by the delta,
          605  +** and return both numbers.
          606  +*/
          607  +static int delta_analyze(
          608  +  const char *zDelta,    /* Delta to apply to the pattern */
          609  +  int lenDelta,          /* Length of the delta */
          610  +  int *pnCopy,           /* OUT: Number of bytes copied */
          611  +  int *pnInsert          /* OUT: Number of bytes inserted */
          612  +){
          613  +  unsigned int nInsert = 0;
          614  +  unsigned int nCopy = 0;
          615  +
          616  +  (void)getInt(&zDelta, &lenDelta);
          617  +  if( *zDelta!='\n' ){
          618  +    /* ERROR: size integer not terminated by "\n" */
          619  +    return -1;
          620  +  }
          621  +  zDelta++; lenDelta--;
          622  +  while( *zDelta && lenDelta>0 ){
          623  +    unsigned int cnt;
          624  +    cnt = getInt(&zDelta, &lenDelta);
          625  +    switch( zDelta[0] ){
          626  +      case '@': {
          627  +        zDelta++; lenDelta--;
          628  +        (void)getInt(&zDelta, &lenDelta);
          629  +        if( lenDelta>0 && zDelta[0]!=',' ){
          630  +          /* ERROR: copy command not terminated by ',' */
          631  +          return -1;
          632  +        }
          633  +        zDelta++; lenDelta--;
          634  +        nCopy += cnt;
          635  +        break;
          636  +      }
          637  +      case ':': {
          638  +        zDelta++; lenDelta--;
          639  +        nInsert += cnt;
          640  +        if( cnt>lenDelta ){
          641  +          /* ERROR: insert count exceeds size of delta */
          642  +          return -1;
          643  +        }
          644  +        zDelta += cnt;
          645  +        lenDelta -= cnt;
          646  +        break;
          647  +      }
          648  +      case ';': {
          649  +        *pnCopy = nCopy;
          650  +        *pnInsert = nInsert;
          651  +        return 0;
          652  +      }
          653  +      default: {
          654  +        /* ERROR: unknown delta operator */
          655  +        return -1;
          656  +      }
          657  +    }
          658  +  }
          659  +  /* ERROR: unterminated delta */
          660  +  return -1;
          661  +}
          662  +
          663  +/*
          664  +** SQL functions:  fossildelta_create(X,Y)
          665  +**
          666  +** Return a delta for carrying X into Y.
          667  +*/
          668  +static void deltaCreateFunc(
          669  +  sqlite3_context *context,
          670  +  int argc,
          671  +  sqlite3_value **argv
          672  +){
          673  +  const char *aOrig; int nOrig;  /* old blob */
          674  +  const char *aNew;  int nNew;   /* new blob */
          675  +  char *aOut;        int nOut;   /* output delta */
          676  +
          677  +  assert( argc==2 );
          678  +  if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
          679  +  if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
          680  +  nOrig = sqlite3_value_bytes(argv[0]);
          681  +  aOrig = (const char*)sqlite3_value_blob(argv[0]);
          682  +  nNew = sqlite3_value_bytes(argv[1]);
          683  +  aNew = (const char*)sqlite3_value_blob(argv[1]);
          684  +  aOut = sqlite3_malloc64(nNew+70);
          685  +  if( aOut==0 ){
          686  +    sqlite3_result_error_nomem(context);
          687  +  }else{
          688  +    nOut = delta_create(aOrig, nOrig, aNew, nNew, aOut);
          689  +    if( nOut<0 ){
          690  +      sqlite3_free(aOut);
          691  +      sqlite3_result_error(context, "cannot create fossil delta", -1);
          692  +    }else{
          693  +      sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
          694  +    }
          695  +  }
          696  +}
          697  +
          698  +/*
          699  +** SQL functions:  fossildelta_apply(X,D)
          700  +**
          701  +** Return the result of applying delta D to input X.
          702  +*/
          703  +static void deltaApplyFunc(
          704  +  sqlite3_context *context,
          705  +  int argc,
          706  +  sqlite3_value **argv
          707  +){
          708  +  const char *aOrig;   int nOrig;        /* The X input */
          709  +  const char *aDelta;  int nDelta;       /* The input delta (D) */
          710  +  char *aOut;          int nOut, nOut2;  /* The output */
          711  +
          712  +  assert( argc==2 );
          713  +  if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
          714  +  if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
          715  +  nOrig = sqlite3_value_bytes(argv[0]);
          716  +  aOrig = (const char*)sqlite3_value_blob(argv[0]);
          717  +  nDelta = sqlite3_value_bytes(argv[1]);
          718  +  aDelta = (const char*)sqlite3_value_blob(argv[1]);
          719  +
          720  +  /* Figure out the size of the output */
          721  +  nOut = delta_output_size(aDelta, nDelta);
          722  +  if( nOut<0 ){
          723  +    sqlite3_result_error(context, "corrupt fossil delta", -1);
          724  +    return;
          725  +  }
          726  +  aOut = sqlite3_malloc64((sqlite3_int64)nOut+1);
          727  +  if( aOut==0 ){
          728  +    sqlite3_result_error_nomem(context);
          729  +  }else{
          730  +    nOut2 = delta_apply(aOrig, nOrig, aDelta, nDelta, aOut);
          731  +    if( nOut2!=nOut ){
          732  +      sqlite3_free(aOut);
          733  +      sqlite3_result_error(context, "corrupt fossil delta", -1);
          734  +    }else{
          735  +      sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
          736  +    }
          737  +  }
          738  +}
          739  +
          740  +
          741  +/*
          742  +** SQL functions:  fossildelta_output_size(D)
          743  +**
          744  +** Return the size of the output that results from applying delta D.
          745  +*/
          746  +static void deltaOutputSizeFunc(
          747  +  sqlite3_context *context,
          748  +  int argc,
          749  +  sqlite3_value **argv
          750  +){
          751  +  const char *aDelta;  int nDelta;       /* The input delta (D) */
          752  +  int nOut;                              /* Size of output */
          753  +  assert( argc==1 );
          754  +  if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
          755  +  nDelta = sqlite3_value_bytes(argv[0]);
          756  +  aDelta = (const char*)sqlite3_value_blob(argv[0]);
          757  +
          758  +  /* Figure out the size of the output */
          759  +  nOut = delta_output_size(aDelta, nDelta);
          760  +  if( nOut<0 ){
          761  +    sqlite3_result_error(context, "corrupt fossil delta", -1);
          762  +    return;
          763  +  }else{
          764  +    sqlite3_result_int(context, nOut);
          765  +  }
          766  +}
          767  +
          768  +
          769  +#ifdef _WIN32
          770  +__declspec(dllexport)
          771  +#endif
          772  +int sqlite3_fossildelta_init(
          773  +  sqlite3 *db, 
          774  +  char **pzErrMsg, 
          775  +  const sqlite3_api_routines *pApi
          776  +){
          777  +  int rc = SQLITE_OK;
          778  +  SQLITE_EXTENSION_INIT2(pApi);
          779  +  (void)pzErrMsg;  /* Unused parameter */
          780  +  rc = sqlite3_create_function(db, "delta_create", 2, SQLITE_UTF8, 0,
          781  +                               deltaCreateFunc, 0, 0);
          782  +  if( rc==SQLITE_OK ){
          783  +    rc = sqlite3_create_function(db, "delta_apply", 2, SQLITE_UTF8, 0,
          784  +                                 deltaApplyFunc, 0, 0);
          785  +  }
          786  +  if( rc==SQLITE_OK ){
          787  +    rc = sqlite3_create_function(db, "delta_output_size", 1, SQLITE_UTF8, 0,
          788  +                                 deltaOutputSizeFunc, 0, 0);
          789  +  }
          790  +  return rc;
          791  +}