SQLite4
Check-in [21db5f73f5d8ae85b917ebe63323b40af5fbd9ec]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
SHA1 Hash:21db5f73f5d8ae85b917ebe63323b40af5fbd9ec
Date: 2012-11-28 14:54:23
User: dan
Comment:Fix an mmap mode bug. Improve upon multi-threaded setups in lsmtest_tdb3.c.
Tags And Properties
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to lsm-test/lsmtest_tdb3.c

1
2
3
4
5
6
7
8
9
10
11
..
14
15
16
17
18
19
20













21
22
23
24
25
26
27
28
29
30
31
32

33
34
35
36
37
38
39
40
41
42
...
477
478
479
480
481
482
483
484
485
486
487
488







489
490
491
492
493
494
495
...
498
499
500
501
502
503
504

505
506
507
508
509
510
511
...
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
...
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
....
1023
1024
1025
1026
1027
1028
1029



1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042




1043


1044
1045
1046
1047
1048
1049
1050
1051
1052
1053


1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
....
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
....
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160




1161
1162
1163
1164
1165

1166
1167
1168
1169
1170
1171
1172
....
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194

1195

1196
1197

1198
1199


1200
1201
1202
1203


1204

1205
1206
1207
1208
1209

1210









1211
1212
1213
1214
1215
1216
1217

#include "lsmtest_tdb.h"
#include "lsm.h"

#include "lsmtest.h"

#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <stdio.h>
................................................................................

typedef struct LsmDb LsmDb;
typedef struct LsmWorker LsmWorker;
typedef struct LsmFile LsmFile;

#ifdef LSM_MUTEX_PTHREADS
#include <pthread.h>













struct LsmWorker {
  LsmDb *pDb;                     /* Main database structure */
  lsm_db *pWorker;                /* Worker database handle */
  pthread_t worker_thread;        /* Worker thread */
  pthread_cond_t worker_cond;     /* Condition var the worker waits on */
  pthread_mutex_t worker_mutex;   /* Mutex used with worker_cond */
  int bDoWork;                    /* Set to true by client when there is work */
  int worker_rc;                  /* Store error code here */

  int lsm_work_flags;             /* Flags to pass to lsm_work() */
  int lsm_work_npage;             /* nPage parameter to pass to lsm_work() */
  int bCkpt;                      /* True to call lsm_checkpoint() */

};
#else
struct LsmWorker { int worker_rc; };
#endif

static void mt_shutdown(LsmDb *);

lsm_env *tdb_lsm_env(void){
  static int bInit = 0;
  static lsm_env env;
................................................................................
  return rc;
}

static int test_lsm_write(
  TestDb *pTestDb, 
  void *pKey, 
  int nKey, 
  void *pVal, 
  int nVal
){
  LsmDb *pDb = (LsmDb *)pTestDb;








  if( pDb->aWorker ){
    int nLimit = -1;
    int nSleep = 0;
    lsm_config(pDb->db, LSM_CONFIG_AUTOFLUSH, &nLimit);
    do {
      int bOld, nNew, rc;
      rc = lsm_info(pDb->db, LSM_INFO_TREE_SIZE, &bOld, &nNew);
................................................................................
      usleep(1000);
      nSleep += 1;
    }while( 1 );
#if 0
    if( nSleep ) printf("nSleep=%d\n", nSleep);
#endif
  }


  return lsm_insert(pDb->db, pKey, nKey, pVal, nVal);
}

static int test_lsm_delete(TestDb *pTestDb, void *pKey, int nKey){
  LsmDb *pDb = (LsmDb *)pTestDb;
  return lsm_delete(pDb->db, pKey, nKey);
................................................................................
    lsm_config_log(pDb->db, xLog, 0);
    lsm_config_work_hook(pDb->db, xWorkHook, (void *)pDb);

    rc = test_lsm_config_str(pDb, pDb->db, 0, zCfg, &nThread);
    if( rc==LSM_OK ) rc = lsm_open(pDb->db, zFilename);

#ifdef LSM_MUTEX_PTHREADS
    if( rc==LSM_OK && (nThread==2 || nThread==3) ){
      testLsmStartWorkers(pDb, nThread-1, zFilename, zCfg);
    }
#endif

    if( rc!=LSM_OK ){
      test_lsm_close((TestDb *)pDb);
      pDb = 0;
    }
................................................................................
  }
  return 0;
}

void tdb_lsm_enable_log(TestDb *pDb, int bEnable){
  lsm_db *db = tdb_lsm(pDb);
  if( db ){
    LsmDb *p = (LsmDb *)pDb;
    int i;
    lsm_config_log(db, (bEnable ? xLog : 0), (void *)"client");
  }
}

void tdb_lsm_application_crash(TestDb *pDb){
  if( tdb_lsm(pDb) ){
    LsmDb *p = (LsmDb *)pDb;
................................................................................
  LsmWorker *p = &pDb->aWorker[iWorker];
  pthread_mutex_lock(&p->worker_mutex);
  p->bDoWork = 1;
  pthread_cond_signal(&p->worker_cond);
  pthread_mutex_unlock(&p->worker_mutex);
}




static void *worker_main(void *pArg){
  LsmWorker *p = (LsmWorker *)pArg;
  lsm_db *pWorker;                /* Connection to access db through */

  pthread_mutex_lock(&p->worker_mutex);
  while( (pWorker = p->pWorker) ){
    int rc = LSM_OK;
    int nCkpt = -1;

    /* Do some work. If an error occurs, exit. */
    pthread_mutex_unlock(&p->worker_mutex);

    if( p->bCkpt ){




      rc = lsm_checkpoint(pWorker, 0);


    }else{
      int nWrite = 0;             /* Pages written by lsm_work() call */
      int nAuto = -1;             /* Configured AUTOCHECKPOINT value */
      int nLimit = -1;            /* Configured AUTOFLUSH value */

      lsm_config(pWorker, LSM_CONFIG_AUTOFLUSH, &nLimit);
      lsm_config(pWorker, LSM_CONFIG_AUTOCHECKPOINT, &nAuto);
      do {
        int nSleep = 0;
        lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nCkpt);


        while( nAuto==0 && nCkpt>(nLimit*4) ){
          usleep(1000);
          mt_signal_worker(p->pDb, 1);
          nSleep++;
          lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nCkpt);
        }
#if 0
          if( nSleep ) printf("nLimit=%d nSleep=%d (worker)\n", nLimit, nSleep);
#endif

        rc = lsm_work(pWorker, p->lsm_work_flags, p->lsm_work_npage, &nWrite);
        if( nAuto==0 && nWrite && rc==LSM_OK ) mt_signal_worker(p->pDb, 1);
      }while( nWrite && p->pWorker );
    }
    pthread_mutex_lock(&p->worker_mutex);

    if( rc!=LSM_OK && rc!=LSM_BUSY ){
      p->worker_rc = rc;
      break;
................................................................................
** This callback is invoked by LSM when the client database writes to
** the database file (i.e. to flush the contents of the in-memory tree).
** This implies there may be work to do on the database, so signal
** the worker threads.
*/
static void mt_client_work_hook(lsm_db *db, void *pArg){
  LsmDb *pDb = (LsmDb *)pArg;     /* LsmDb database handle */
  int i;                          /* Iterator variable */

  /* Invoke the user level work-hook, if any. */
  if( pDb->xWork ) pDb->xWork(db, pDb->pWorkCtx);

  /* Signal the lsm_work() thread */
  mt_signal_worker(pDb, 0);
}
................................................................................
/*
** Launch worker thread iWorker for database connection pDb.
*/
static int mt_start_worker(
  LsmDb *pDb,                     /* Main database structure */
  int iWorker,                    /* Worker number to start */
  const char *zFilename,          /* File name of database to open */
  const char *zCfg,
  int flags,                      /* flags parameter to lsm_work() */
  int nPage,                      /* nPage parameter to lsm_work() */
  int bCkpt                       /* True to call lsm_checkpoint() */
){
  int rc = 0;                     /* Return code */
  LsmWorker *p;                   /* Object to initialize */

  assert( iWorker<pDb->nWorker );





  p = &pDb->aWorker[iWorker];
  p->lsm_work_flags = flags;
  p->lsm_work_npage = nPage;
  p->bCkpt = bCkpt;

  p->pDb = pDb;

  /* Open the worker connection */
  if( rc==0 ) rc = lsm_new(&pDb->env, &p->pWorker);
  if( zCfg ){
    test_lsm_config_str(pDb, p->pWorker, 1, zCfg, 0);
  }
................................................................................
  if( rc==0 ) rc = pthread_create(&p->worker_thread, 0, worker_main, (void *)p);

  return rc;
}


static int testLsmStartWorkers(
  LsmDb *pDb, int nWorker, const char *zFilename, const char *zCfg
){
  int rc;
  int bAutowork = 0;

  assert( nWorker==1 || nWorker==2 );


  /* Configure a work-hook for the client connection. */

  lsm_config_work_hook(pDb->db, mt_client_work_hook, (void *)pDb);



  pDb->aWorker = (LsmWorker *)testMalloc(sizeof(LsmWorker) * nWorker);
  memset(pDb->aWorker, 0, sizeof(LsmWorker) * nWorker);
  pDb->nWorker = nWorker;



  if( nWorker==1 ){

    rc = mt_start_worker(pDb, 0, zFilename, zCfg, 0, 256, 0);
  }else{
    rc = mt_start_worker(pDb, 0, zFilename, zCfg, 0, 256, 0);
    if( rc==LSM_OK ){
      rc = mt_start_worker(pDb, 1, zFilename, zCfg, 0, 0, 1);

    }









  }

  return rc;
}


int test_lsm_mt2(const char *zFilename, int bClear, TestDb **ppDb){



<







 







>
>
>
>
>
>
>
>
>
>
>
>
>








<
<
<
|
>


|







 







|




>
>
>
>
>
>
>







 







>







 







|
|







 







<
<







 







>
>
>












|
>
>
>
>
|
>
>








<

>
>






|
|
|
|
|
|







 







<







 







|
<
<
|





>
>
>
>


<
<
<
>







 







|


<
>
|
>

|
>


>
>
|
|
<

>
>
|
>
|
<
<
<
<
>
|
>
>
>
>
>
>
>
>
>







1
2
3

4
5
6
7
8
9
10
..
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40



41
42
43
44
45
46
47
48
49
50
51
52
...
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
...
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
...
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
...
951
952
953
954
955
956
957


958
959
960
961
962
963
964
....
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076

1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
....
1148
1149
1150
1151
1152
1153
1154

1155
1156
1157
1158
1159
1160
1161
....
1170
1171
1172
1173
1174
1175
1176
1177


1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189



1190
1191
1192
1193
1194
1195
1196
1197
....
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218

1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230

1231
1232
1233
1234
1235
1236




1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254

#include "lsmtest_tdb.h"
#include "lsm.h"

#include "lsmtest.h"

#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <stdio.h>
................................................................................

typedef struct LsmDb LsmDb;
typedef struct LsmWorker LsmWorker;
typedef struct LsmFile LsmFile;

#ifdef LSM_MUTEX_PTHREADS
#include <pthread.h>

#define LSMTEST_THREAD_CKPT      1
#define LSMTEST_THREAD_WORKER    2
#define LSMTEST_THREAD_WORKER_AC 3

/*
** There are several different types of worker threads that run in different
** test configurations, depending on the value of LsmWorker.eType.
**
**   1. Checkpointer.
**   2. Worker with auto-checkpoint.
**   3. Worker without auto-checkpoint.
*/
struct LsmWorker {
  LsmDb *pDb;                     /* Main database structure */
  lsm_db *pWorker;                /* Worker database handle */
  pthread_t worker_thread;        /* Worker thread */
  pthread_cond_t worker_cond;     /* Condition var the worker waits on */
  pthread_mutex_t worker_mutex;   /* Mutex used with worker_cond */
  int bDoWork;                    /* Set to true by client when there is work */
  int worker_rc;                  /* Store error code here */



  int eType;                      /* LSMTEST_THREAD_XXX constant */
  int bBlock;
};
#else
struct LsmWorker { int worker_rc; int bBlock; };
#endif

static void mt_shutdown(LsmDb *);

lsm_env *tdb_lsm_env(void){
  static int bInit = 0;
  static lsm_env env;
................................................................................
  return rc;
}

static int test_lsm_write(
  TestDb *pTestDb, 
  void *pKey, 
  int nKey, 
  void *pVal,
  int nVal
){
  LsmDb *pDb = (LsmDb *)pTestDb;

  int nSleep = 0;
  while( pDb->aWorker && pDb->aWorker[0].bBlock ){
    usleep(1000);
    nSleep++;
  }
#if 0
  if( nSleep ) printf("nSleep=%d\n", nSleep);
  if( pDb->aWorker ){
    int nLimit = -1;
    int nSleep = 0;
    lsm_config(pDb->db, LSM_CONFIG_AUTOFLUSH, &nLimit);
    do {
      int bOld, nNew, rc;
      rc = lsm_info(pDb->db, LSM_INFO_TREE_SIZE, &bOld, &nNew);
................................................................................
      usleep(1000);
      nSleep += 1;
    }while( 1 );
#if 0
    if( nSleep ) printf("nSleep=%d\n", nSleep);
#endif
  }
#endif

  return lsm_insert(pDb->db, pKey, nKey, pVal, nVal);
}

static int test_lsm_delete(TestDb *pTestDb, void *pKey, int nKey){
  LsmDb *pDb = (LsmDb *)pTestDb;
  return lsm_delete(pDb->db, pKey, nKey);
................................................................................
    lsm_config_log(pDb->db, xLog, 0);
    lsm_config_work_hook(pDb->db, xWorkHook, (void *)pDb);

    rc = test_lsm_config_str(pDb, pDb->db, 0, zCfg, &nThread);
    if( rc==LSM_OK ) rc = lsm_open(pDb->db, zFilename);

#ifdef LSM_MUTEX_PTHREADS
    if( rc==LSM_OK && nThread>1 ){
      testLsmStartWorkers(pDb, nThread, zFilename, zCfg);
    }
#endif

    if( rc!=LSM_OK ){
      test_lsm_close((TestDb *)pDb);
      pDb = 0;
    }
................................................................................
  }
  return 0;
}

void tdb_lsm_enable_log(TestDb *pDb, int bEnable){
  lsm_db *db = tdb_lsm(pDb);
  if( db ){


    lsm_config_log(db, (bEnable ? xLog : 0), (void *)"client");
  }
}

void tdb_lsm_application_crash(TestDb *pDb){
  if( tdb_lsm(pDb) ){
    LsmDb *p = (LsmDb *)pDb;
................................................................................
  LsmWorker *p = &pDb->aWorker[iWorker];
  pthread_mutex_lock(&p->worker_mutex);
  p->bDoWork = 1;
  pthread_cond_signal(&p->worker_cond);
  pthread_mutex_unlock(&p->worker_mutex);
}

/*
** This routine is used as the main() for all worker threads.
*/
static void *worker_main(void *pArg){
  LsmWorker *p = (LsmWorker *)pArg;
  lsm_db *pWorker;                /* Connection to access db through */

  pthread_mutex_lock(&p->worker_mutex);
  while( (pWorker = p->pWorker) ){
    int rc = LSM_OK;
    int nCkpt = -1;

    /* Do some work. If an error occurs, exit. */
    pthread_mutex_unlock(&p->worker_mutex);

    if( p->eType==LSMTEST_THREAD_CKPT ){
      int nByte = 0;
      rc = lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nByte);
      if( rc==LSM_OK && nByte>=(2*1024*1024) ){
        if( nByte>(8*1024*1024) ) p->bBlock = 1;
        rc = lsm_checkpoint(pWorker, 0);
        p->bBlock = 0;
      }
    }else{
      int nWrite = 0;             /* Pages written by lsm_work() call */
      int nAuto = -1;             /* Configured AUTOCHECKPOINT value */
      int nLimit = -1;            /* Configured AUTOFLUSH value */

      lsm_config(pWorker, LSM_CONFIG_AUTOFLUSH, &nLimit);
      lsm_config(pWorker, LSM_CONFIG_AUTOCHECKPOINT, &nAuto);
      do {

        lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nCkpt);
#if 0
        int nSleep = 0;
        while( nAuto==0 && nCkpt>(nLimit*4) ){
          usleep(1000);
          mt_signal_worker(p->pDb, 1);
          nSleep++;
          lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nCkpt);
        }
          if( nSleep ) printf("nLimit=%d nSleep=%d (worker)\n", nLimit, nSleep);
#endif
        rc = lsm_work(pWorker, 0, 256, &nWrite);
        if( p->eType==LSMTEST_THREAD_WORKER_AC && nWrite && rc==LSM_OK ){
          mt_signal_worker(p->pDb, 1);
        }
      }while( nWrite && p->pWorker );
    }
    pthread_mutex_lock(&p->worker_mutex);

    if( rc!=LSM_OK && rc!=LSM_BUSY ){
      p->worker_rc = rc;
      break;
................................................................................
** This callback is invoked by LSM when the client database writes to
** the database file (i.e. to flush the contents of the in-memory tree).
** This implies there may be work to do on the database, so signal
** the worker threads.
*/
static void mt_client_work_hook(lsm_db *db, void *pArg){
  LsmDb *pDb = (LsmDb *)pArg;     /* LsmDb database handle */


  /* Invoke the user level work-hook, if any. */
  if( pDb->xWork ) pDb->xWork(db, pDb->pWorkCtx);

  /* Signal the lsm_work() thread */
  mt_signal_worker(pDb, 0);
}
................................................................................
/*
** Launch worker thread iWorker for database connection pDb.
*/
static int mt_start_worker(
  LsmDb *pDb,                     /* Main database structure */
  int iWorker,                    /* Worker number to start */
  const char *zFilename,          /* File name of database to open */
  const char *zCfg,               /* Connection configuration string */


  int eType                       /* Type of worker thread */
){
  int rc = 0;                     /* Return code */
  LsmWorker *p;                   /* Object to initialize */

  assert( iWorker<pDb->nWorker );
  assert( eType==LSMTEST_THREAD_CKPT 
       || eType==LSMTEST_THREAD_WORKER 
       || eType==LSMTEST_THREAD_WORKER_AC 
  );

  p = &pDb->aWorker[iWorker];



  p->eType = eType;
  p->pDb = pDb;

  /* Open the worker connection */
  if( rc==0 ) rc = lsm_new(&pDb->env, &p->pWorker);
  if( zCfg ){
    test_lsm_config_str(pDb, p->pWorker, 1, zCfg, 0);
  }
................................................................................
  if( rc==0 ) rc = pthread_create(&p->worker_thread, 0, worker_main, (void *)p);

  return rc;
}


static int testLsmStartWorkers(
  LsmDb *pDb, int eModel, const char *zFilename, const char *zCfg
){
  int rc;


  if( eModel<1 || eModel>4 ) return 1;
  if( eModel==1 ) return 0;

  /* Configure a work-hook for the client connection. Worker 0 is signalled
  ** every time the users connection writes to the database.  */
  lsm_config_work_hook(pDb->db, mt_client_work_hook, (void *)pDb);

  /* Allocate space for two worker connections. They may not both be
  ** used, but both are allocated.  */
  pDb->aWorker = (LsmWorker *)testMalloc(sizeof(LsmWorker) * 2);
  memset(pDb->aWorker, 0, sizeof(LsmWorker) * 2);


  switch( eModel ){
    case 2:
      pDb->nWorker = 1;
      test_lsm_config_str(0, pDb->db, 0, "autocheckpoint=0", 0);
      rc = mt_start_worker(pDb, 0, zFilename, zCfg, LSMTEST_THREAD_CKPT);




      break;

    case 3:
      pDb->nWorker = 2;
      assert( 0 );
      break;

    case 4:
      pDb->nWorker = 2;
      assert( 0 );
      break;
  }

  return rc;
}


int test_lsm_mt2(const char *zFilename, int bClear, TestDb **ppDb){

Changes to src/lsm.h

392
393
394
395
396
397
398


399
400
401
402
403
404
405
#define LSM_INFO_ARRAY_STRUCTURE  5
#define LSM_INFO_PAGE_ASCII_DUMP  6
#define LSM_INFO_PAGE_HEX_DUMP    7
#define LSM_INFO_FREELIST         8
#define LSM_INFO_ARRAY_PAGES      9
#define LSM_INFO_CHECKPOINT_SIZE 10
#define LSM_INFO_TREE_SIZE       11




/* 
** CAPI: Opening and Closing Write Transactions
**
** These functions are used to open and close transactions and nested 
** sub-transactions.







>
>







392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
#define LSM_INFO_ARRAY_STRUCTURE  5
#define LSM_INFO_PAGE_ASCII_DUMP  6
#define LSM_INFO_PAGE_HEX_DUMP    7
#define LSM_INFO_FREELIST         8
#define LSM_INFO_ARRAY_PAGES      9
#define LSM_INFO_CHECKPOINT_SIZE 10
#define LSM_INFO_TREE_SIZE       11

#define LSM_INFO_FREELIST_SIZE   12


/* 
** CAPI: Opening and Closing Write Transactions
**
** These functions are used to open and close transactions and nested 
** sub-transactions.

Changes to src/lsmInt.h

786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
int lsmLogCommit(lsm_db *);
void lsmLogEnd(lsm_db *pDb, int bCommit);
void lsmLogTell(lsm_db *, LogMark *);
void lsmLogSeek(lsm_db *, LogMark *);

int lsmLogRecover(lsm_db *);
int lsmInfoLogStructure(lsm_db *pDb, char **pzVal);
int lsmInfoFreelist(lsm_db *, char **pzVal);


/**************************************************************************
** Functions from file "lsm_shared.c".
*/

int lsmDbDatabaseConnect(lsm_db*, const char *);







<







786
787
788
789
790
791
792

793
794
795
796
797
798
799
int lsmLogCommit(lsm_db *);
void lsmLogEnd(lsm_db *pDb, int bCommit);
void lsmLogTell(lsm_db *, LogMark *);
void lsmLogSeek(lsm_db *, LogMark *);

int lsmLogRecover(lsm_db *);
int lsmInfoLogStructure(lsm_db *pDb, char **pzVal);



/**************************************************************************
** Functions from file "lsm_shared.c".
*/

int lsmDbDatabaseConnect(lsm_db*, const char *);

Changes to src/lsm_ckpt.c

32
33
34
35
36
37
38

39
40
41
42
43
44
45
..
68
69
70
71
72
73
74


75
76
77
78
79
80
81
82
**     2. The checkpoint id LSW.
**     3. The number of integer values in the entire checkpoint, including 
**        the two checksum values.
**     4. The total number of blocks in the database.
**     5. The block size.
**     6. The number of levels.
**     7. The nominal database page size.

**
**   Log pointer:
**
**     1. The log offset MSW.
**     2. The log offset LSW.
**     3. Log checksum 0.
**     4. Log checksum 1.
................................................................................
**     9. Current pointer value (64-bits - 2 integers).
**
**   The in-memory freelist entries. Each entry is either an insert or a
**   delete. The in-memory freelist is to the free-block-list as the
**   in-memory tree is to the users database content.
**
**     1. Number of free-list entries stored in checkpoint header.


**     2. For each entry:
**        2a. Block number of free block.
**        2b. A 64-bit integer (MSW followed by LSW). -1 for a delete entry,
**            or the associated checkpoint id for an insert.
**
**   The checksum:
**
**     1. Checksum value 1.







>







 







>
>
|







32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
..
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
**     2. The checkpoint id LSW.
**     3. The number of integer values in the entire checkpoint, including 
**        the two checksum values.
**     4. The total number of blocks in the database.
**     5. The block size.
**     6. The number of levels.
**     7. The nominal database page size.
**     8. The number of pages (in total) written to the database file.
**
**   Log pointer:
**
**     1. The log offset MSW.
**     2. The log offset LSW.
**     3. Log checksum 0.
**     4. Log checksum 1.
................................................................................
**     9. Current pointer value (64-bits - 2 integers).
**
**   The in-memory freelist entries. Each entry is either an insert or a
**   delete. The in-memory freelist is to the free-block-list as the
**   in-memory tree is to the users database content.
**
**     1. Number of free-list entries stored in checkpoint header.
**     2. Number of free blocks (in total).
**     3. Total number of blocks freed during database lifetime.
**     4. For each entry:
**        2a. Block number of free block.
**        2b. A 64-bit integer (MSW followed by LSW). -1 for a delete entry,
**            or the associated checkpoint id for an insert.
**
**   The checksum:
**
**     1. Checksum value 1.

Changes to src/lsm_file.c

1114
1115
1116
1117
1118
1119
1120

1121
1122
1123
1124









1125
1126
1127
1128
1129
1130
1131
....
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990

1991
1992
1993


1994
1995
1996
1997
1998
1999
2000
....
2458
2459
2460
2461
2462
2463
2464






2465
2466
2467
2468
2469
2470
2471
  int rc = LSM_OK;

  assert( iPg>=fsFirstPageOnBlock(pFS, 1) );
  *ppPg = 0;

  assert( pFS->bUseMmap==0 || pFS->pCompress==0 );
  if( pFS->bUseMmap ){

    i64 iEnd = (i64)iPg * pFS->nPagesize;
    fsGrowMapping(pFS, iEnd, &rc);
    if( rc!=LSM_OK ) return rc;










    if( pFS->pFree ){
      p = pFS->pFree;
      pFS->pFree = p->pHashNext;
      assert( p->nRef==0 );
    }else{
      p = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);
      if( rc ) return rc;
................................................................................
        ** lsmFsPagePersist() to write an out-of-order page. Instead a page 
        ** number is assigned here so that the page data will be appended
        ** to the current segment.
        */
        Page **pp;
        int iPrev = 0;
        int iNext = 0;
        int iHash;

        assert( pPg->pSeg->iFirst );
        assert( pPg->flags & PAGE_FREE );
        assert( (pPg->flags & PAGE_HASPREV)==0 );
        assert( pPg->nData==pFS->nPagesize-4 );

        rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext);
        if( rc!=LSM_OK ) return rc;


        iHash = fsHashKey(pFS->nHash, pPg->iPg);
        pPg->pHashNext = pFS->apHash[iHash];
        pFS->apHash[iHash] = pPg;



        if( iPrev ){
          assert( iNext==0 );
          memmove(&pPg->aData[4], pPg->aData, pPg->nData);
          lsmPutU32(pPg->aData, iPrev);
          pPg->flags |= PAGE_HASPREV;
          pPg->aData += 4;
................................................................................
  int i;
  int rc;
  Freelist freelist = {0, 0, 0};
  u8 *aUsed;
  Level *pLevel;
  Snapshot *pWorker = pDb->pWorker;
  int nBlock = pWorker->nBlock;







  aUsed = lsmMallocZero(pDb->pEnv, nBlock);
  if( aUsed==0 ){
    /* Malloc has failed. Since this function is only called within debug
    ** builds, this probably means the user is running an OOM injection test.
    ** Regardless, it will not be possible to run the integrity-check at this
    ** time, so assume the database is Ok and return non-zero. */







>




>
>
>
>
>
>
>
>
>







 







<









>
|
|
|
>
>







 







>
>
>
>
>
>







1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
....
1984
1985
1986
1987
1988
1989
1990

1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
....
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
  int rc = LSM_OK;

  assert( iPg>=fsFirstPageOnBlock(pFS, 1) );
  *ppPg = 0;

  assert( pFS->bUseMmap==0 || pFS->pCompress==0 );
  if( pFS->bUseMmap ){
    Page *pTest;
    i64 iEnd = (i64)iPg * pFS->nPagesize;
    fsGrowMapping(pFS, iEnd, &rc);
    if( rc!=LSM_OK ) return rc;

    p = 0;
    for(pTest=pFS->pWaiting; pTest; pTest=pTest->pNextWaiting){
      if( pTest->iPg==iPg ){
        p = pTest;
        p->nRef++;
        *ppPg = p;
        return LSM_OK;
      }
    }
    if( pFS->pFree ){
      p = pFS->pFree;
      pFS->pFree = p->pHashNext;
      assert( p->nRef==0 );
    }else{
      p = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);
      if( rc ) return rc;
................................................................................
        ** lsmFsPagePersist() to write an out-of-order page. Instead a page 
        ** number is assigned here so that the page data will be appended
        ** to the current segment.
        */
        Page **pp;
        int iPrev = 0;
        int iNext = 0;


        assert( pPg->pSeg->iFirst );
        assert( pPg->flags & PAGE_FREE );
        assert( (pPg->flags & PAGE_HASPREV)==0 );
        assert( pPg->nData==pFS->nPagesize-4 );

        rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext);
        if( rc!=LSM_OK ) return rc;

        if( pFS->bUseMmap==0 ){
          int iHash = fsHashKey(pFS->nHash, pPg->iPg);
          pPg->pHashNext = pFS->apHash[iHash];
          pFS->apHash[iHash] = pPg;
          assert( pPg->pHashNext==0 || pPg->pHashNext->iPg!=pPg->iPg );
        }

        if( iPrev ){
          assert( iNext==0 );
          memmove(&pPg->aData[4], pPg->aData, pPg->nData);
          lsmPutU32(pPg->aData, iPrev);
          pPg->flags |= PAGE_HASPREV;
          pPg->aData += 4;
................................................................................
  int i;
  int rc;
  Freelist freelist = {0, 0, 0};
  u8 *aUsed;
  Level *pLevel;
  Snapshot *pWorker = pDb->pWorker;
  int nBlock = pWorker->nBlock;

#if 0 
  static int nCall = 0;
  nCall++;
  printf("%d calls\n", nCall);
#endif

  aUsed = lsmMallocZero(pDb->pEnv, nBlock);
  if( aUsed==0 ){
    /* Malloc has failed. Since this function is only called within debug
    ** builds, this probably means the user is running an OOM injection test.
    ** Regardless, it will not be possible to run the integrity-check at this
    ** time, so assume the database is Ok and return non-zero. */

Changes to src/lsm_main.c

421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
...
444
445
446
447
448
449
450



451
452
453
454
455
456
457
...
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550

static int infoFreelistCb(void *pCtx, int iBlk, i64 iSnapshot){
  LsmString *pStr = (LsmString *)pCtx;
  lsmStringAppendf(pStr, "%s{%d %lld}", (pStr->n?" ":""), iBlk, iSnapshot);
  return 0;
}

int lsmInfoFreelist(lsm_db *pDb, char **pzOut){
  Snapshot *pWorker;              /* Worker snapshot */
  int bUnlock = 0;
  LsmString s;
  int i;
  int rc;

  /* Obtain the worker snapshot */
................................................................................
    *pzOut = s.z;
  }

  /* Release the snapshot and return */
  infoFreeWorker(pDb, bUnlock);
  return rc;
}




static int infoTreeSize(lsm_db *db, int *pnOld, int *pnNew){
  ShmHeader *pShm = db->pShmhdr;
  TreeHeader *p = &pShm->hdr1;

  /* The following code suffers from two race conditions, as it accesses and
  ** trusts the contents of shared memory without verifying checksums:
................................................................................
      char **pzVal = va_arg(ap, char **);
      rc = lsmInfoLogStructure(pDb, pzVal);
      break;
    }

    case LSM_INFO_FREELIST: {
      char **pzVal = va_arg(ap, char **);
      rc = lsmInfoFreelist(pDb, pzVal);
      break;
    }

    case LSM_INFO_CHECKPOINT_SIZE: {
      int *pnByte = va_arg(ap, int *);
      rc = lsmCheckpointSize(pDb, pnByte);
      break;







|







 







>
>
>







 







|







421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
...
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
...
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553

static int infoFreelistCb(void *pCtx, int iBlk, i64 iSnapshot){
  LsmString *pStr = (LsmString *)pCtx;
  lsmStringAppendf(pStr, "%s{%d %lld}", (pStr->n?" ":""), iBlk, iSnapshot);
  return 0;
}

static int infoFreelist(lsm_db *pDb, char **pzOut){
  Snapshot *pWorker;              /* Worker snapshot */
  int bUnlock = 0;
  LsmString s;
  int i;
  int rc;

  /* Obtain the worker snapshot */
................................................................................
    *pzOut = s.z;
  }

  /* Release the snapshot and return */
  infoFreeWorker(pDb, bUnlock);
  return rc;
}

static int infoFreelistSize(lsm_db *pDb, int *pnFree, int *pnWaiting){
}

static int infoTreeSize(lsm_db *db, int *pnOld, int *pnNew){
  ShmHeader *pShm = db->pShmhdr;
  TreeHeader *p = &pShm->hdr1;

  /* The following code suffers from two race conditions, as it accesses and
  ** trusts the contents of shared memory without verifying checksums:
................................................................................
      char **pzVal = va_arg(ap, char **);
      rc = lsmInfoLogStructure(pDb, pzVal);
      break;
    }

    case LSM_INFO_FREELIST: {
      char **pzVal = va_arg(ap, char **);
      rc = infoFreelist(pDb, pzVal);
      break;
    }

    case LSM_INFO_CHECKPOINT_SIZE: {
      int *pnByte = va_arg(ap, int *);
      rc = lsmCheckpointSize(pDb, pnByte);
      break;

Changes to src/lsm_sorted.c

4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
....
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
    assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 );
    lsmDbSnapshotSetLevel(pDb->pWorker, pNext);
    sortedFreeLevel(pDb->pEnv, pNew);
  }else{
    if( pDel ) pDel->iRoot = 0;

#if 0
    lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 1, "new-toplevel");
#endif

    if( freelist.nEntry ){
      Freelist *p = &pDb->pWorker->freelist;
      lsmFree(pDb->pEnv, p->aEntry);
      memcpy(p, &freelist, sizeof(freelist));
      freelist.aEntry = 0;
................................................................................
      /* Clean up the MergeWorker object initialized above. If no error
      ** has occurred, invoke the work-hook to inform the application that
      ** the database structure has changed. */
      mergeWorkerShutdown(&mergeworker, &rc);
      if( rc==LSM_OK ) sortedInvokeWorkHook(pDb);

#if 0
      lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 1, "work");
#endif
      assertBtreeOk(pDb, &pLevel->lhs);
      assertRunInOrder(pDb, &pLevel->lhs);

      /* If bFlush is true and the database is no longer considered "full",
      ** break out of the loop even if nRemaining is still greater than
      ** zero. The caller has an in-memory tree to flush to disk.  */







|







 







|







4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
....
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
    assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 );
    lsmDbSnapshotSetLevel(pDb->pWorker, pNext);
    sortedFreeLevel(pDb->pEnv, pNew);
  }else{
    if( pDel ) pDel->iRoot = 0;

#if 0
    lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "new-toplevel");
#endif

    if( freelist.nEntry ){
      Freelist *p = &pDb->pWorker->freelist;
      lsmFree(pDb->pEnv, p->aEntry);
      memcpy(p, &freelist, sizeof(freelist));
      freelist.aEntry = 0;
................................................................................
      /* Clean up the MergeWorker object initialized above. If no error
      ** has occurred, invoke the work-hook to inform the application that
      ** the database structure has changed. */
      mergeWorkerShutdown(&mergeworker, &rc);
      if( rc==LSM_OK ) sortedInvokeWorkHook(pDb);

#if 0
      lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "work");
#endif
      assertBtreeOk(pDb, &pLevel->lhs);
      assertRunInOrder(pDb, &pLevel->lhs);

      /* If bFlush is true and the database is no longer considered "full",
      ** break out of the loop even if nRemaining is still greater than
      ** zero. The caller has an in-memory tree to flush to disk.  */

Changes to src/lsm_unix.c

186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  off_t iSz;
  int prc;
  PosixFile *p = (PosixFile *)pFile;
  struct stat buf;

  if( p->pMap ){
    munmap(p->pMap, p->nMap);
    p->pMap = 0;
    p->nMap = 0;
  }

  memset(&buf, 0, sizeof(buf));
  prc = fstat(p->fd, &buf);
  if( prc!=0 ) return LSM_IOERR_BKPT;
  iSz = buf.st_size;
  if( iSz<iMin ){







|
|







186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  off_t iSz;
  int prc;
  PosixFile *p = (PosixFile *)pFile;
  struct stat buf;

  if( p->pMap ){
    munmap(p->pMap, p->nMap);
    *ppOut = p->pMap = 0;
    *pnOut = p->nMap = 0;
  }

  memset(&buf, 0, sizeof(buf));
  prc = fstat(p->fd, &buf);
  if( prc!=0 ) return LSM_IOERR_BKPT;
  iSz = buf.st_size;
  if( iSz<iMin ){

Changes to tool/lsmperf.tcl

186
187
188
189
190
191
192
193
194

195
196
197
198
199
200
201
  append script $data3
  append script $data4

  append script "pause -1\n"
  exec_gnuplot_script $script $zPng
}

do_write_test x.png 100 50000 50000 20 {
  lsm safety=0

}


  #lsm "mmap=1 multi_proc=0 page_size=4096 block_size=2097152 autocheckpoint=4194000"
  #lsm-mt    "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=4196000"

# lsm     "safety=1 multi_proc=0"







|
|
>







186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
  append script $data3
  append script $data4

  append script "pause -1\n"
  exec_gnuplot_script $script $zPng
}

do_write_test x.png 100 50000 0 20 {
  lsm-mt "threads=2 multi_proc=0"
  leveldb leveldb
}


  #lsm "mmap=1 multi_proc=0 page_size=4096 block_size=2097152 autocheckpoint=4194000"
  #lsm-mt    "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=4196000"

# lsm     "safety=1 multi_proc=0"