/ Check-in [c883c4d3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the string formatter (used by printf()) so that the width and precision of string substitution operators refer to characters instead of bytes when the alternate-form-2 flag ("!") is used. Also fix the %c substition to always work within unicode, regardless of the alternate-form-2 flag.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:c883c4d33f4cd722982aca17eeb355d19eeed793991d923b5494d173b1d0c73a
User & Date: drh 2018-02-19 22:46:19
Context
2018-02-20
13:46
Remove a lot of the text describing extended format options from the documentation on sqlite3_mprintf() and friends, since that information is now covered by the separate printf.html document. Provide links to that other document. No changes to code. check-in: 99eec556 user: drh tags: trunk
2018-02-19
22:46
Enhance the string formatter (used by printf()) so that the width and precision of string substitution operators refer to characters instead of bytes when the alternate-form-2 flag ("!") is used. Also fix the %c substition to always work within unicode, regardless of the alternate-form-2 flag. check-in: c883c4d3 user: drh tags: trunk
21:58
Printing a value of 0 using %c terminates the string. Closed-Leaf check-in: 255612f0 user: drh tags: printf-enhancement
16:34
Only try to use the geteuid() interface on unix if HAVE_FCHOWN is defined. This fixes the build for vxWorks, we are told. check-in: 38f654dc user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Show Whitespace Changes Patch

Changes to src/printf.c.

   620    620           buf[0] = '%';
   621    621           bufpt = buf;
   622    622           length = 1;
   623    623           break;
   624    624         case etCHARX:
   625    625           if( bArgList ){
   626    626             bufpt = getTextArg(pArgList);
   627         -          c = bufpt ? bufpt[0] : 0;
          627  +          length = 1;
          628  +          if( bufpt ){
          629  +            buf[0] = c = *(bufpt++);
          630  +            if( (c&0xc0)==0xc0 ){
          631  +              while( length<4 && (bufpt[0]&0xc0)==0x80 ){
          632  +                buf[length++] = *(bufpt++);
          633  +              }
          634  +            }
          635  +          }else{
          636  +            buf[0] = 0;
          637  +          }
          638  +        }else{
          639  +          unsigned int ch = va_arg(ap,unsigned int);
          640  +          if( ch<0x00080 ){
          641  +            buf[0] = ch & 0xff;
          642  +            length = 1;
          643  +          }else if( ch<0x00800 ){
          644  +            buf[0] = 0xc0 + (u8)((ch>>6)&0x1f);
          645  +            buf[1] = 0x80 + (u8)(ch & 0x3f);
          646  +            length = 2;
          647  +          }else if( ch<0x10000 ){
          648  +            buf[0] = 0xe0 + (u8)((ch>>12)&0x0f);
          649  +            buf[1] = 0x80 + (u8)((ch>>6) & 0x3f);
          650  +            buf[2] = 0x80 + (u8)(ch & 0x3f);
          651  +            length = 3;
   628    652           }else{
   629         -          c = va_arg(ap,int);
          653  +            buf[0] = 0xf0 + (u8)((ch>>18) & 0x07);
          654  +            buf[1] = 0x80 + (u8)((ch>>12) & 0x3f);
          655  +            buf[2] = 0x80 + (u8)((ch>>6) & 0x3f);
          656  +            buf[3] = 0x80 + (u8)(ch & 0x3f);
          657  +            length = 4;
          658  +          }
   630    659           }
   631    660           if( precision>1 ){
   632    661             width -= precision-1;
   633    662             if( width>1 && !flag_leftjustify ){
   634    663               sqlite3AppendChar(pAccum, width-1, ' ');
   635    664               width = 0;
   636    665             }
   637         -          sqlite3AppendChar(pAccum, precision-1, c);
          666  +          while( precision-- > 1 ){
          667  +            sqlite3StrAccumAppend(pAccum, buf, length);
   638    668           }
   639         -        length = 1;
   640         -        buf[0] = c;
          669  +        }
   641    670           bufpt = buf;
   642         -        break;
          671  +        flag_altform2 = 1;
          672  +        goto adjust_width_for_utf8;
   643    673         case etSTRING:
   644    674         case etDYNSTRING:
   645    675           if( bArgList ){
   646    676             bufpt = getTextArg(pArgList);
   647    677             xtype = etSTRING;
   648    678           }else{
   649    679             bufpt = va_arg(ap,char*);
................................................................................
   650    680           }
   651    681           if( bufpt==0 ){
   652    682             bufpt = "";
   653    683           }else if( xtype==etDYNSTRING ){
   654    684             zExtra = bufpt;
   655    685           }
   656    686           if( precision>=0 ){
          687  +          if( flag_altform2 ){
          688  +            /* Set length to the number of bytes needed in order to display
          689  +            ** precision characters */
          690  +            unsigned char *z = (unsigned char*)bufpt;
          691  +            while( precision-- > 0 && z[0] ){
          692  +              SQLITE_SKIP_UTF8(z);
          693  +            }
          694  +            length = (int)(z - (unsigned char*)bufpt);
          695  +          }else{
   657    696             for(length=0; length<precision && bufpt[length]; length++){}
          697  +          }
   658    698           }else{
   659    699             length = 0x7fffffff & (int)strlen(bufpt);
   660    700           }
          701  +      adjust_width_for_utf8:
          702  +        if( flag_altform2 && width>0 ){
          703  +          /* Adjust width to account for extra bytes in UTF-8 characters */
          704  +          int ii = length - 1;
          705  +          while( ii>=0 ) if( (bufpt[ii--] & 0xc0)==0x80 ) width++;
          706  +        }
   661    707           break;
   662         -      case etSQLESCAPE:           /* Escape ' characters */
   663         -      case etSQLESCAPE2:          /* Escape ' and enclose in '...' */
   664         -      case etSQLESCAPE3: {        /* Escape " characters */
          708  +      case etSQLESCAPE:           /* %q: Escape ' characters */
          709  +      case etSQLESCAPE2:          /* %Q: Escape ' and enclose in '...' */
          710  +      case etSQLESCAPE3: {        /* %w: Escape " characters */
   665    711           int i, j, k, n, isnull;
   666    712           int needQuote;
   667    713           char ch;
   668    714           char q = ((xtype==etSQLESCAPE3)?'"':'\'');   /* Quote character */
   669    715           char *escarg;
   670    716   
   671    717           if( bArgList ){
   672    718             escarg = getTextArg(pArgList);
   673    719           }else{
   674    720             escarg = va_arg(ap,char*);
   675    721           }
   676    722           isnull = escarg==0;
   677    723           if( isnull ) escarg = (xtype==etSQLESCAPE2 ? "NULL" : "(NULL)");
          724  +        /* For %q, %Q, and %w, the precision is the number of byte (or
          725  +        ** characters if the ! flags is present) to use from the input.
          726  +        ** Because of the extra quoting characters inserted, the number
          727  +        ** of output characters may be larger than the precision.
          728  +        */
   678    729           k = precision;
   679    730           for(i=n=0; k!=0 && (ch=escarg[i])!=0; i++, k--){
   680    731             if( ch==q )  n++;
          732  +          if( flag_altform2 && (ch&0xc0)==0xc0 ){
          733  +            while( (escarg[i+1]&0xc0)==0x80 ){ i++; }
          734  +          }
   681    735           }
   682    736           needQuote = !isnull && xtype==etSQLESCAPE2;
   683    737           n += i + 3;
   684    738           if( n>etBUFSIZE ){
   685    739             bufpt = zExtra = sqlite3Malloc( n );
   686    740             if( bufpt==0 ){
   687    741               setStrAccumError(pAccum, STRACCUM_NOMEM);
................................................................................
   696    750           for(i=0; i<k; i++){
   697    751             bufpt[j++] = ch = escarg[i];
   698    752             if( ch==q ) bufpt[j++] = ch;
   699    753           }
   700    754           if( needQuote ) bufpt[j++] = q;
   701    755           bufpt[j] = 0;
   702    756           length = j;
   703         -        /* The precision in %q and %Q means how many input characters to
   704         -        ** consume, not the length of the output...
   705         -        ** if( precision>=0 && precision<length ) length = precision; */
   706         -        break;
          757  +        goto adjust_width_for_utf8;
   707    758         }
   708    759         case etTOKEN: {
   709    760           Token *pToken;
   710    761           if( (pAccum->printfFlags & SQLITE_PRINTF_INTERNAL)==0 ) return;
   711    762           pToken = va_arg(ap, Token*);
   712    763           assert( bArgList==0 );
   713    764           if( pToken && pToken->n ){
................................................................................
   738    789           assert( xtype==etINVALID );
   739    790           return;
   740    791         }
   741    792       }/* End switch over the format type */
   742    793       /*
   743    794       ** The text of the conversion is pointed to by "bufpt" and is
   744    795       ** "length" characters long.  The field width is "width".  Do
   745         -    ** the output.
          796  +    ** the output.  Both length and width are in bytes, not characters,
          797  +    ** at this point.  If the "!" flag was present on string conversions
          798  +    ** indicating that width and precision should be expressed in characters,
          799  +    ** then the values have been translated prior to reaching this point.
   746    800       */
   747    801       width -= length;
   748    802       if( width>0 ){
   749    803         if( !flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
   750    804         sqlite3StrAccumAppend(pAccum, bufpt, length);
   751    805         if( flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
   752    806       }else{

Changes to test/printf2.test.

   144    144   do_execsql_test printf2-4.9 {
   145    145     SELECT printf('|%,d|%,d|',123456789,-123456789);
   146    146   } {|123,456,789|-123,456,789|}
   147    147   do_execsql_test printf2-4.10 {
   148    148     SELECT printf('|%,d|%,d|',1234567890,-1234567890);
   149    149   } {|1,234,567,890|-1,234,567,890|}
   150    150   
          151  +# 2018-02-19.  Unicode characters with %c
          152  +do_execsql_test printf2-5.100 {
          153  +  SELECT printf('(%8c)',char(11106));
          154  +} {{(       ⭢)}}
          155  +do_execsql_test printf2-5.101 {
          156  +  SELECT printf('(%-8c)',char(11106));
          157  +} {{(⭢       )}}
          158  +do_execsql_test printf2-5.102 {
          159  +  SELECT printf('(%5.3c)',char(1492));
          160  +} {{(  ההה)}}
          161  +do_execsql_test printf2-5.103 {
          162  +  SELECT printf('(%-5.3c)',char(1492));
          163  +} {{(ההה  )}}
          164  +do_execsql_test printf2-5.104 {
          165  +  SELECT printf('(%3.3c)',char(1492));
          166  +} {{(ההה)}}
          167  +do_execsql_test printf2-5.105 {
          168  +  SELECT printf('(%-3.3c)',char(1492));
          169  +} {{(ההה)}}
          170  +do_execsql_test printf2-5.104 {
          171  +  SELECT printf('(%2c)',char(1513));
          172  +} {{( ש)}}
          173  +do_execsql_test printf2-5.106 {
          174  +  SELECT printf('(%-2c)',char(1513));
          175  +} {{(ש )}}
          176  +
          177  +# 2018-02-19.  Unicode characters with the "!" flag in %s and friends.
          178  +do_execsql_test printf2-6.100 {
          179  +  SELECT printf('(%!.3s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          180  +} {(הנה)}
          181  +do_execsql_test printf2-6.101 {
          182  +  SELECT printf('(%.6s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          183  +} {(הנה)}
          184  +do_execsql_test printf2-6.102 {
          185  +  SELECT printf('(%!5.3s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          186  +} {{(  הנה)}}
          187  +do_execsql_test printf2-6.103 {
          188  +  SELECT printf('(%8.6s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          189  +} {{(  הנה)}}
          190  +do_execsql_test printf2-6.104 {
          191  +  SELECT printf('(%!-5.3s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          192  +} {{(הנה  )}}
          193  +do_execsql_test printf2-6.105 {
          194  +  SELECT printf('(%-8.6s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          195  +} {{(הנה  )}}
          196  +do_execsql_test printf2-6.106 {
          197  +  SELECT printf('(%!.3Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          198  +} {('הנה')}
          199  +do_execsql_test printf2-6.107 {
          200  +  SELECT printf('(%.6Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          201  +} {('הנה')}
          202  +do_execsql_test printf2-6.108 {
          203  +  SELECT printf('(%!7.3Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          204  +} {{(  'הנה')}}
          205  +do_execsql_test printf2-6.109 {
          206  +  SELECT printf('(%10.6Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
          207  +} {{(  'הנה')}}
   151    208   
   152    209   
   153    210   finish_test