Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fixes for overflow pages and very large values.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 74e7bb267f4ffb7a0ae2859321b018a1cd8cb870
User & Date: dan 2013-10-10 19:38:27.934
Context
2013-10-11
18:07
Fixes for keys that spill over onto overflow pages. check-in: bca95ca536 user: dan tags: trunk
2013-10-10
19:38
Fixes for overflow pages and very large values. check-in: 74e7bb267f user: dan tags: trunk
2013-10-09
20:14
Add code to deal with large values (those that will not fit on a leaf) to the btree code. Large keys are still not working. check-in: 2bd81969a6 user: dan tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/bt_main.c.
662
663
664
665
666
667
668

669
670

671
672
673
674
675
676
677
678
679
680
681

682
683
684

685
686
687

688
689
690
691
692
693
694
695
696
697
698
699


700
701
702
703
704
705
706
707
708








































709
710
711
712
713
714
715
static int btOverflowArrayRead(
  bt_db *db,
  u8 *pOvfl,
  u8 *aOut,
  int nOut
){
  const int pgsz = sqlite4BtPagerPagesize(db->pPager);

  int rc = SQLITE4_OK;
  int nDirect;                    /* Number of direct overflow pages */

  int iOut;                       /* Bytes of data copied so far */
  int iPg;

  nDirect = (int)(pOvfl[0] & 0x0F);
  nDepth = (int)(pOvfl[0]>>4);

  iOut = 0;

  /* Read from the direct overflow pages. And from the overflow tree, if
  ** it has a depth of zero.  */
  for(iPg=0; rc==SQLITE4_OK && iPg<(nDirect+(nDepth==0)); iPg++){

    BtPage *pPg = 0;
    rc = sqlite4BtPageGet(db->pPager, btGetU32(&pOvfl[1+iPg*4]), &pPg);
    if( rc==SQLITE4_OK ){

      u8 *a = sqlite4BtPageData(pPg);
      memcpy(&aOut[iOut], a, MIN(nOut-iOut, pgsz));
      sqlite4BtPageRelease(pPg);

    }
  }

  /* Read from the overflow tree, if it was not read by the block above. */
  if( nDepth>0 ){
    struct Heir {
      BtPage *pPg;
      int iCell;
    } apHier[8];
    int i;

    memset(apHier, 0, sizeof(apHier));


    pgno = btGetU32(&pOvfl[1+nDirect*4]);
    for(i=0; i<nDepth && rc==SQLITE4_OK; i++){
      u8 *a;
      rc = sqlite4BtPageGet(db->pPager, pgno, &apHier[i].pPg);
      if( rc==SQLITE4_OK ){
        a = sqlite4BtPageData(apHier[i].pPg);
        pgno = btGetU32(a);
      }
    }









































    for(i=0; i<nDepth && rc==SQLITE4_OK; i++){
      sqlite4BtPageRelease(apHier[i].pPg);
    }
  }

  return rc;







>


>










|
>

|

>

|

>










|

>
>









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
static int btOverflowArrayRead(
  bt_db *db,
  u8 *pOvfl,
  u8 *aOut,
  int nOut
){
  const int pgsz = sqlite4BtPagerPagesize(db->pPager);
  const int nPgPtr = pgsz / 4;
  int rc = SQLITE4_OK;
  int nDirect;                    /* Number of direct overflow pages */
  int nDepth;                     /* Depth of overflow tree */
  int iOut;                       /* Bytes of data copied so far */
  int iPg;

  nDirect = (int)(pOvfl[0] & 0x0F);
  nDepth = (int)(pOvfl[0]>>4);

  iOut = 0;

  /* Read from the direct overflow pages. And from the overflow tree, if
  ** it has a depth of zero.  */
  for(iPg=0; rc==SQLITE4_OK && iPg<(nDirect+(nDepth==0)) && iOut<nOut; iPg++){
    u32 pgno = btGetU32(&pOvfl[1+iPg*4]);
    BtPage *pPg = 0;
    rc = sqlite4BtPageGet(db->pPager, pgno, &pPg);
    if( rc==SQLITE4_OK ){
      int nCopy = MIN(nOut-iOut, pgsz);
      u8 *a = sqlite4BtPageData(pPg);
      memcpy(&aOut[iOut], a, nCopy);
      sqlite4BtPageRelease(pPg);
      iOut += nCopy;
    }
  }

  /* Read from the overflow tree, if it was not read by the block above. */
  if( nDepth>0 ){
    struct Heir {
      BtPage *pPg;
      int iCell;
    } apHier[8];
    int i;
    u32 pgno;
    memset(apHier, 0, sizeof(apHier));

    /* Initialize the apHier[] array. */
    pgno = btGetU32(&pOvfl[1+nDirect*4]);
    for(i=0; i<nDepth && rc==SQLITE4_OK; i++){
      u8 *a;
      rc = sqlite4BtPageGet(db->pPager, pgno, &apHier[i].pPg);
      if( rc==SQLITE4_OK ){
        a = sqlite4BtPageData(apHier[i].pPg);
        pgno = btGetU32(a);
      }
    }

    /* Loop runs once for each leaf page we read from. */
    while( iOut<nOut ){
      u8 *a;                      /* Data associated with some page */
      BtPage *pLeaf;              /* Leaf page */
      int nCopy;                  /* Bytes of data to read from leaf page */

      int iLvl;

      nCopy =  MIN(nOut-iOut, pgsz);
      assert( nCopy>0 );

      /* Read data from the current leaf page */
      rc = sqlite4BtPageGet(db->pPager, pgno, &pLeaf);
      if( rc!=SQLITE4_OK ) break;
      a = sqlite4BtPageData(pLeaf);
      memcpy(&aOut[iOut], a, nCopy);
      sqlite4BtPageRelease(pLeaf);
      iOut += nCopy;

      /* If all required data has been read, break out of the loop */
      if( iOut>=nOut ) break;

      for(iLvl=nDepth-1; iLvl>=0; iLvl--){
        if( apHier[iLvl].iCell<(nPgPtr-1) ) break;
      }
      if( iLvl<0 ) break; /* SQLITE4_CORRUPT? */
      apHier[iLvl].iCell++;

      for(iLvl; iLvl<nDepth && rc==SQLITE4_OK; iLvl++){
        a = sqlite4BtPageData(apHier[iLvl].pPg);
        pgno = btGetU32(&a[apHier[iLvl].iCell * 4]);
        if( iLvl<(nDepth-1) ){
          apHier[iLvl+1].iCell = 0;
          sqlite4BtPageRelease(apHier[iLvl+1].pPg);
          apHier[iLvl+1].pPg = 0;
          rc = sqlite4BtPageGet(db->pPager, pgno, &apHier[iLvl+1].pPg);
        }
      }
    }

    for(i=0; i<nDepth && rc==SQLITE4_OK; i++){
      sqlite4BtPageRelease(apHier[i].pPg);
    }
  }

  return rc;
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015

  /* Calculate the number of required overflow pages. And the depth of
  ** the overflow tree.  */
  nOvfl = (nBuf1+nBuf2+pgsz-1) / pgsz;
  nOvfl -= BT_MAX_DIRECT_OVERFLOW;
  while( nOvfl>1 ){
    nDepth++;
    nOvfl = (nOvfl / nPgPtr);
  }

  for(i=0; rc==SQLITE4_OK && i<nDepth; i++){
    u32 pgno;
    rc = sqlite4BtPageAllocate(db->pPager, &apHier[i].pPg);
    pgno = sqlite4BtPagePgno(apHier[i].pPg);
    if( i==0 ){







|







1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062

  /* Calculate the number of required overflow pages. And the depth of
  ** the overflow tree.  */
  nOvfl = (nBuf1+nBuf2+pgsz-1) / pgsz;
  nOvfl -= BT_MAX_DIRECT_OVERFLOW;
  while( nOvfl>1 ){
    nDepth++;
    nOvfl = (nOvfl+nPgPtr-1) / nPgPtr;
  }

  for(i=0; rc==SQLITE4_OK && i<nDepth; i++){
    u32 pgno;
    rc = sqlite4BtPageAllocate(db->pPager, &apHier[i].pPg);
    pgno = sqlite4BtPagePgno(apHier[i].pPg);
    if( i==0 ){
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067

1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
          }

          if( rc!=SQLITE4_OK ){
            pgno = 0;
          }

          apHier[i].pPg = pNew;
          apHier[i].iCell = 0;
        }else{
          btPutU32(&a[apHier[i].iCell*4], pgno);

          pgno = 0;
        }
      }
    }
  }

  for(i=0; i<nDepth; i++){
    int rc2 = sqlite4BtPageRelease(apHier[i].pPg);
    if( rc==SQLITE4_OK ) rc = rc2;
  }

  if( rc==SQLITE4_OK ){
    if( nDirect>BT_MAX_DIRECT_OVERFLOW ){
      assert( nDirect==BT_MAX_DIRECT_OVERFLOW+1 );
      nDirect = BT_MAX_DIRECT_OVERFLOW;
    }
    assert( nDirect>=1 );
    aOut[0] = (u8)nDirect | (u8)(nDepth<<4) ;
  }

  *ppOut = &aOut[1 + nDirect*4];
  return rc;
}

/*
** Argument pKV contains a key/value pair destined for a leaf page in
** a database with page size pgsz. Currently it is in KV_VALUE form.
** If the key/value pair is too large to fit entirely within a leaf







|


>












|
<
|

|



|







1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128

1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
          }

          if( rc!=SQLITE4_OK ){
            pgno = 0;
          }

          apHier[i].pPg = pNew;
          apHier[i].iCell = 1;
        }else{
          btPutU32(&a[apHier[i].iCell*4], pgno);
          apHier[i].iCell++;
          pgno = 0;
        }
      }
    }
  }

  for(i=0; i<nDepth; i++){
    int rc2 = sqlite4BtPageRelease(apHier[i].pPg);
    if( rc==SQLITE4_OK ) rc = rc2;
  }

  if( rc==SQLITE4_OK ){
    if( nDepth==0 ){

      nDirect--;
    }
    assert( nDirect>=0 );
    aOut[0] = (u8)nDirect | (u8)(nDepth<<4) ;
  }

  *ppOut = &aOut[1 + (nDirect+1)*4];
  return rc;
}

/*
** Argument pKV contains a key/value pair destined for a leaf page in
** a database with page size pgsz. Currently it is in KV_VALUE form.
** If the key/value pair is too large to fit entirely within a leaf
1161
1162
1163
1164
1165
1166
1167

1168
1169
1170
1171
1172
1173
1174
      );
      if( rc==SQLITE4_OK ){
        memset(pKV, 0, sizeof(*pKV));
        pKV->pV = pBuf;
        pKV->nV = pOut - pBuf;
        pKV->eType = KV_CELL;
        pBuf = 0;

      }
    }

    if( pBuf ){
      btFreeBuffer(db, pBuf);
    }
  }







>







1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
      );
      if( rc==SQLITE4_OK ){
        memset(pKV, 0, sizeof(*pKV));
        pKV->pV = pBuf;
        pKV->nV = pOut - pBuf;
        pKV->eType = KV_CELL;
        pBuf = 0;
        assert( pKV->nV<=nMaxSize );
      }
    }

    if( pBuf ){
      btFreeBuffer(db, pBuf);
    }
  }
Changes to test/simple3.test.
161
162
163
164
165
166
167




























168
169
170
    if {0==($iRow % $nStep)} {
      do_execsql_test 4.$tn.3.$iRow {
        SELECT count(*) FROM t1;
      } [expr $nRow - $iRow]
    }
  }
}





























finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
    if {0==($iRow % $nStep)} {
      do_execsql_test 4.$tn.3.$iRow {
        SELECT count(*) FROM t1;
      } [expr $nRow - $iRow]
    }
  }
}

proc bigstr {n} {
  set nRep [expr 1+($n/20)]
  string range [string repeat "abcdefghijklmnopqrstuvwxyz" $nRep] 0 [expr $n-1]
}

foreach {tn nStr} {
  1 3000
  2 30000
  3 300000
  4 3000000
  5 30000000
} {
  set big [bigstr $nStr]
  do_execsql_test 5.$tn.1 {
    DROP TABLE IF EXISTS t5;
    CREATE TABLE t5(a PRIMARY KEY, b VALUE);
    INSERT INTO t5 VALUES(1, $big);
  }

  do_execsql_test 5.$tn.2 {
    SELECT length(b) FROM t5;
  } $nStr

  do_execsql_test 5.$tn.3 {
    SELECT b FROM t5;
  } [list $big]
}

finish_test