/* ** 2016-06-07 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This is a utility program that computes an SHA1 hash on the content ** of an SQLite database. ** ** The hash is computed over just the content of the database. Free ** space inside of the database file, and alternative on-disk representations ** of the same content (ex: UTF8 vs UTF16) do not affect the hash. So, ** for example, the database file page size, encoding, and auto_vacuum setting ** can all be changed without changing the hash. */ #include #include #include #include #include #include #include "sqlite3.h" /* Context for the SHA1 hash */ typedef struct SHA1Context SHA1Context; struct SHA1Context { unsigned int state[5]; unsigned int count[2]; unsigned char buffer[64]; }; /* ** All global variables are gathered into the "g" singleton. */ struct GlobalVars { const char *zArgv0; /* Name of program */ unsigned fDebug; /* Debug flags */ sqlite3 *db; /* The database connection */ SHA1Context cx; /* SHA1 hash context */ } g; /* ** Debugging flags */ #define DEBUG_FULLTRACE 0x00000001 /* Trace hash to stderr */ /****************************************************************************** ** The Hash Engine ** ** Modify these routines (and appropriate state fields in global variable 'g') ** in order to compute a different (better?) hash of the database. */ /* * blk0() and blk() perform the initial expand. * I got the idea of expanding during the round function from SSLeay * * blk0le() for little-endian and blk0be() for big-endian. */ #if __GNUC__ && (defined(__i386__) || defined(__x86_64__)) /* * GCC by itself only generates left rotates. Use right rotates if * possible to be kinder to dinky implementations with iterative rotate * instructions. */ #define SHA_ROT(op, x, k) \ ({ unsigned int y; asm(op " %1,%0" : "=r" (y) : "I" (k), "0" (x)); y; }) #define rol(x,k) SHA_ROT("roll", x, k) #define ror(x,k) SHA_ROT("rorl", x, k) #else /* Generic C equivalent */ #define SHA_ROT(x,l,r) ((x) << (l) | (x) >> (r)) #define rol(x,k) SHA_ROT(x,k,32-(k)) #define ror(x,k) SHA_ROT(x,32-(k),k) #endif #define blk0le(i) (block[i] = (ror(block[i],8)&0xFF00FF00) \ |(rol(block[i],8)&0x00FF00FF)) #define blk0be(i) block[i] #define blk(i) (block[i&15] = rol(block[(i+13)&15]^block[(i+8)&15] \ ^block[(i+2)&15]^block[i&15],1)) /* * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1 * * Rl0() for little-endian and Rb0() for big-endian. Endianness is * determined at run-time. */ #define Rl0(v,w,x,y,z,i) \ z+=((w&(x^y))^y)+blk0le(i)+0x5A827999+rol(v,5);w=ror(w,2); #define Rb0(v,w,x,y,z,i) \ z+=((w&(x^y))^y)+blk0be(i)+0x5A827999+rol(v,5);w=ror(w,2); #define R1(v,w,x,y,z,i) \ z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=ror(w,2); #define R2(v,w,x,y,z,i) \ z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=ror(w,2); #define R3(v,w,x,y,z,i) \ z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=ror(w,2); #define R4(v,w,x,y,z,i) \ z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=ror(w,2); /* * Hash a single 512-bit block. This is the core of the algorithm. */ #define a qq[0] #define b qq[1] #define c qq[2] #define d qq[3] #define e qq[4] void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]){ unsigned int qq[5]; /* a, b, c, d, e; */ static int one = 1; unsigned int block[16]; memcpy(block, buffer, 64); memcpy(qq,state,5*sizeof(unsigned int)); /* Copy g.cx.state[] to working vars */ /* a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; */ /* 4 rounds of 20 operations each. Loop unrolled. */ if( 1 == *(unsigned char*)&one ){ Rl0(a,b,c,d,e, 0); Rl0(e,a,b,c,d, 1); Rl0(d,e,a,b,c, 2); Rl0(c,d,e,a,b, 3); Rl0(b,c,d,e,a, 4); Rl0(a,b,c,d,e, 5); Rl0(e,a,b,c,d, 6); Rl0(d,e,a,b,c, 7); Rl0(c,d,e,a,b, 8); Rl0(b,c,d,e,a, 9); Rl0(a,b,c,d,e,10); Rl0(e,a,b,c,d,11); Rl0(d,e,a,b,c,12); Rl0(c,d,e,a,b,13); Rl0(b,c,d,e,a,14); Rl0(a,b,c,d,e,15); }else{ Rb0(a,b,c,d,e, 0); Rb0(e,a,b,c,d, 1); Rb0(d,e,a,b,c, 2); Rb0(c,d,e,a,b, 3); Rb0(b,c,d,e,a, 4); Rb0(a,b,c,d,e, 5); Rb0(e,a,b,c,d, 6); Rb0(d,e,a,b,c, 7); Rb0(c,d,e,a,b, 8); Rb0(b,c,d,e,a, 9); Rb0(a,b,c,d,e,10); Rb0(e,a,b,c,d,11); Rb0(d,e,a,b,c,12); Rb0(c,d,e,a,b,13); Rb0(b,c,d,e,a,14); Rb0(a,b,c,d,e,15); } R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); /* Add the working vars back into context.state[] */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; } /* Initialize the SHA1 hash */ static void hash_init(void){ /* SHA1 initialization constants */ g.cx.state[0] = 0x67452301; g.cx.state[1] = 0xEFCDAB89; g.cx.state[2] = 0x98BADCFE; g.cx.state[3] = 0x10325476; g.cx.state[4] = 0xC3D2E1F0; g.cx.count[0] = g.cx.count[1] = 0; } /* Add new content to the SHA1 hash */ static void hash_step(const unsigned char *data, unsigned int len){ unsigned int i, j; j = g.cx.count[0]; if( (g.cx.count[0] += len << 3) < j ){ g.cx.count[1] += (len>>29)+1; } j = (j >> 3) & 63; if( (j + len) > 63 ){ (void)memcpy(&g.cx.buffer[j], data, (i = 64-j)); SHA1Transform(g.cx.state, g.cx.buffer); for(; i + 63 < len; i += 64){ SHA1Transform(g.cx.state, &data[i]); } j = 0; }else{ i = 0; } (void)memcpy(&g.cx.buffer[j], &data[i], len - i); } /* Add padding and compute and output the message digest. */ static void hash_finish(const char *zName){ unsigned int i; unsigned char finalcount[8]; unsigned char digest[20]; static const char zEncode[] = "0123456789abcdef"; char zOut[41]; for (i = 0; i < 8; i++){ finalcount[i] = (unsigned char)((g.cx.count[(i >= 4 ? 0 : 1)] >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ } hash_step((const unsigned char *)"\200", 1); while ((g.cx.count[0] & 504) != 448){ hash_step((const unsigned char *)"\0", 1); } hash_step(finalcount, 8); /* Should cause a SHA1Transform() */ for (i = 0; i < 20; i++){ digest[i] = (unsigned char)((g.cx.state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); } for(i=0; i<20; i++){ zOut[i*2] = zEncode[(digest[i]>>4)&0xf]; zOut[i*2+1] = zEncode[digest[i] & 0xf]; } zOut[i*2]= 0; printf("%s %s\n", zOut, zName); } /* End of the hashing logic *******************************************************************************/ /* ** Print an error resulting from faulting command-line arguments and ** abort the program. */ static void cmdlineError(const char *zFormat, ...){ va_list ap; fprintf(stderr, "%s: ", g.zArgv0); va_start(ap, zFormat); vfprintf(stderr, zFormat, ap); va_end(ap); fprintf(stderr, "\n\"%s --help\" for more help\n", g.zArgv0); exit(1); } /* ** Print an error message for an error that occurs at runtime, then ** abort the program. */ static void runtimeError(const char *zFormat, ...){ va_list ap; fprintf(stderr, "%s: ", g.zArgv0); va_start(ap, zFormat); vfprintf(stderr, zFormat, ap); va_end(ap); fprintf(stderr, "\n"); exit(1); } /* ** Prepare a new SQL statement. Print an error and abort if anything ** goes wrong. */ static sqlite3_stmt *db_vprepare(const char *zFormat, va_list ap){ char *zSql; int rc; sqlite3_stmt *pStmt; zSql = sqlite3_vmprintf(zFormat, ap); if( zSql==0 ) runtimeError("out of memory"); rc = sqlite3_prepare_v2(g.db, zSql, -1, &pStmt, 0); if( rc ){ runtimeError("SQL statement error: %s\n\"%s\"", sqlite3_errmsg(g.db), zSql); } sqlite3_free(zSql); return pStmt; } static sqlite3_stmt *db_prepare(const char *zFormat, ...){ va_list ap; sqlite3_stmt *pStmt; va_start(ap, zFormat); pStmt = db_vprepare(zFormat, ap); va_end(ap); return pStmt; } /* ** Compute the hash for all rows of the query formed from the printf-style ** zFormat and its argument. */ static void hash_one_query(const char *zFormat, ...){ va_list ap; sqlite3_stmt *pStmt; /* The query defined by zFormat and "..." */ int nCol; /* Number of columns in the result set */ int i; /* Loop counter */ /* Prepare the query defined by zFormat and "..." */ va_start(ap, zFormat); pStmt = db_vprepare(zFormat, ap); va_end(ap); nCol = sqlite3_column_count(pStmt); /* Compute a hash over the result of the query */ while( SQLITE_ROW==sqlite3_step(pStmt) ){ for(i=0; i=0; j--){ x[j] = u & 0xff; u >>= 8; } hash_step((const unsigned char*)"1",1); hash_step(x,8); if( g.fDebug & DEBUG_FULLTRACE ){ fprintf(stderr, "INT %s\n", sqlite3_column_text(pStmt,i)); } break; } case SQLITE_FLOAT: { sqlite3_uint64 u; int j; unsigned char x[8]; double r = sqlite3_column_double(pStmt,i); memcpy(&u, &r, 8); for(j=7; j>=0; j--){ x[j] = u & 0xff; u >>= 8; } hash_step((const unsigned char*)"2",1); hash_step(x,8); if( g.fDebug & DEBUG_FULLTRACE ){ fprintf(stderr, "FLOAT %s\n", sqlite3_column_text(pStmt,i)); } break; } case SQLITE_TEXT: { int n = sqlite3_column_bytes(pStmt, i); const unsigned char *z = sqlite3_column_text(pStmt, i); hash_step((const unsigned char*)"3", 1); hash_step(z, n); if( g.fDebug & DEBUG_FULLTRACE ){ fprintf(stderr, "TEXT '%s'\n", sqlite3_column_text(pStmt,i)); } break; } case SQLITE_BLOB: { int n = sqlite3_column_bytes(pStmt, i); const unsigned char *z = sqlite3_column_blob(pStmt, i); hash_step((const unsigned char*)"4", 1); hash_step(z, n); if( g.fDebug & DEBUG_FULLTRACE ){ fprintf(stderr, "BLOB (%d bytes)\n", n); } break; } } } } sqlite3_finalize(pStmt); } /* ** Print sketchy documentation for this utility program */ static void showHelp(void){ printf("Usage: %s [options] FILE ...\n", g.zArgv0); printf( "Compute a SHA1 hash on the content of database FILE. System tables such as\n" "sqlite_stat1, sqlite_stat4, and sqlite_sequence are omitted from the hash.\n" "Options:\n" " --debug N Set debugging flags to N (experts only)\n" " --like PATTERN Only hash tables whose name is LIKE the pattern\n" " --schema-only Only hash the schema - omit table content\n" " --without-schema Only hash table content - omit the schema\n" ); } int main(int argc, char **argv){ const char *zDb = 0; /* Name of the database currently being hashed */ int i; /* Loop counter */ int rc; /* Subroutine return code */ char *zErrMsg; /* Error message when opening database */ sqlite3_stmt *pStmt; /* An SQLite query */ const char *zLike = 0; /* LIKE pattern of tables to hash */ int omitSchema = 0; /* True to compute hash on content only */ int omitContent = 0; /* True to compute hash on schema only */ int nFile = 0; /* Number of input filenames seen */ g.zArgv0 = argv[0]; sqlite3_config(SQLITE_CONFIG_SINGLETHREAD); for(i=1; i