/ Check-in [c883c4d3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the string formatter (used by printf()) so that the width and precision of string substitution operators refer to characters instead of bytes when the alternate-form-2 flag ("!") is used. Also fix the %c substition to always work within unicode, regardless of the alternate-form-2 flag.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:c883c4d33f4cd722982aca17eeb355d19eeed793991d923b5494d173b1d0c73a
User & Date: drh 2018-02-19 22:46:19
Context
2018-02-20
13:46
Remove a lot of the text describing extended format options from the documentation on sqlite3_mprintf() and friends, since that information is now covered by the separate printf.html document. Provide links to that other document. No changes to code. check-in: 99eec556 user: drh tags: trunk
2018-02-19
22:46
Enhance the string formatter (used by printf()) so that the width and precision of string substitution operators refer to characters instead of bytes when the alternate-form-2 flag ("!") is used. Also fix the %c substition to always work within unicode, regardless of the alternate-form-2 flag. check-in: c883c4d3 user: drh tags: trunk
21:58
Printing a value of 0 using %c terminates the string. Closed-Leaf check-in: 255612f0 user: drh tags: printf-enhancement
16:34
Only try to use the geteuid() interface on unix if HAVE_FCHOWN is defined. This fixes the build for vxWorks, we are told. check-in: 38f654dc user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/printf.c.

620
621
622
623
624
625
626

627






628



629



















630
631
632
633
634
635
636
637

638
639
640

641
642

643
644
645
646
647
648
649
...
650
651
652
653
654
655
656









657

658
659
660






661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677





678
679
680



681
682
683
684
685
686
687
...
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
...
738
739
740
741
742
743
744
745



746
747
748
749
750
751
752
        buf[0] = '%';
        bufpt = buf;
        length = 1;
        break;
      case etCHARX:
        if( bArgList ){
          bufpt = getTextArg(pArgList);

          c = bufpt ? bufpt[0] : 0;






        }else{



          c = va_arg(ap,int);



















        }
        if( precision>1 ){
          width -= precision-1;
          if( width>1 && !flag_leftjustify ){
            sqlite3AppendChar(pAccum, width-1, ' ');
            width = 0;
          }
          sqlite3AppendChar(pAccum, precision-1, c);

        }
        length = 1;
        buf[0] = c;

        bufpt = buf;
        break;

      case etSTRING:
      case etDYNSTRING:
        if( bArgList ){
          bufpt = getTextArg(pArgList);
          xtype = etSTRING;
        }else{
          bufpt = va_arg(ap,char*);
................................................................................
        }
        if( bufpt==0 ){
          bufpt = "";
        }else if( xtype==etDYNSTRING ){
          zExtra = bufpt;
        }
        if( precision>=0 ){









          for(length=0; length<precision && bufpt[length]; length++){}

        }else{
          length = 0x7fffffff & (int)strlen(bufpt);
        }






        break;
      case etSQLESCAPE:           /* Escape ' characters */
      case etSQLESCAPE2:          /* Escape ' and enclose in '...' */
      case etSQLESCAPE3: {        /* Escape " characters */
        int i, j, k, n, isnull;
        int needQuote;
        char ch;
        char q = ((xtype==etSQLESCAPE3)?'"':'\'');   /* Quote character */
        char *escarg;

        if( bArgList ){
          escarg = getTextArg(pArgList);
        }else{
          escarg = va_arg(ap,char*);
        }
        isnull = escarg==0;
        if( isnull ) escarg = (xtype==etSQLESCAPE2 ? "NULL" : "(NULL)");





        k = precision;
        for(i=n=0; k!=0 && (ch=escarg[i])!=0; i++, k--){
          if( ch==q )  n++;



        }
        needQuote = !isnull && xtype==etSQLESCAPE2;
        n += i + 3;
        if( n>etBUFSIZE ){
          bufpt = zExtra = sqlite3Malloc( n );
          if( bufpt==0 ){
            setStrAccumError(pAccum, STRACCUM_NOMEM);
................................................................................
        for(i=0; i<k; i++){
          bufpt[j++] = ch = escarg[i];
          if( ch==q ) bufpt[j++] = ch;
        }
        if( needQuote ) bufpt[j++] = q;
        bufpt[j] = 0;
        length = j;
        /* The precision in %q and %Q means how many input characters to
        ** consume, not the length of the output...
        ** if( precision>=0 && precision<length ) length = precision; */
        break;
      }
      case etTOKEN: {
        Token *pToken;
        if( (pAccum->printfFlags & SQLITE_PRINTF_INTERNAL)==0 ) return;
        pToken = va_arg(ap, Token*);
        assert( bArgList==0 );
        if( pToken && pToken->n ){
................................................................................
        assert( xtype==etINVALID );
        return;
      }
    }/* End switch over the format type */
    /*
    ** The text of the conversion is pointed to by "bufpt" and is
    ** "length" characters long.  The field width is "width".  Do
    ** the output.



    */
    width -= length;
    if( width>0 ){
      if( !flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
      sqlite3StrAccumAppend(pAccum, bufpt, length);
      if( flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
    }else{







>
|
>
>
>
>
>
>
|
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







|
>
|
<
<
>

|
>







 







>
>
>
>
>
>
>
>
>
|
>



>
>
>
>
>
>

|
|
|













>
>
>
>
>



>
>
>







 







|
<
<
<







 







|
>
>
>







620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668


669
670
671
672
673
674
675
676
677
678
679
...
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
...
750
751
752
753
754
755
756
757



758
759
760
761
762
763
764
...
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
        buf[0] = '%';
        bufpt = buf;
        length = 1;
        break;
      case etCHARX:
        if( bArgList ){
          bufpt = getTextArg(pArgList);
          length = 1;
          if( bufpt ){
            buf[0] = c = *(bufpt++);
            if( (c&0xc0)==0xc0 ){
              while( length<4 && (bufpt[0]&0xc0)==0x80 ){
                buf[length++] = *(bufpt++);
              }
            }
          }else{
            buf[0] = 0;
          }
        }else{
          unsigned int ch = va_arg(ap,unsigned int);
          if( ch<0x00080 ){
            buf[0] = ch & 0xff;
            length = 1;
          }else if( ch<0x00800 ){
            buf[0] = 0xc0 + (u8)((ch>>6)&0x1f);
            buf[1] = 0x80 + (u8)(ch & 0x3f);
            length = 2;
          }else if( ch<0x10000 ){
            buf[0] = 0xe0 + (u8)((ch>>12)&0x0f);
            buf[1] = 0x80 + (u8)((ch>>6) & 0x3f);
            buf[2] = 0x80 + (u8)(ch & 0x3f);
            length = 3;
          }else{
            buf[0] = 0xf0 + (u8)((ch>>18) & 0x07);
            buf[1] = 0x80 + (u8)((ch>>12) & 0x3f);
            buf[2] = 0x80 + (u8)((ch>>6) & 0x3f);
            buf[3] = 0x80 + (u8)(ch & 0x3f);
            length = 4;
          }
        }
        if( precision>1 ){
          width -= precision-1;
          if( width>1 && !flag_leftjustify ){
            sqlite3AppendChar(pAccum, width-1, ' ');
            width = 0;
          }
          while( precision-- > 1 ){
            sqlite3StrAccumAppend(pAccum, buf, length);
          }


        }
        bufpt = buf;
        flag_altform2 = 1;
        goto adjust_width_for_utf8;
      case etSTRING:
      case etDYNSTRING:
        if( bArgList ){
          bufpt = getTextArg(pArgList);
          xtype = etSTRING;
        }else{
          bufpt = va_arg(ap,char*);
................................................................................
        }
        if( bufpt==0 ){
          bufpt = "";
        }else if( xtype==etDYNSTRING ){
          zExtra = bufpt;
        }
        if( precision>=0 ){
          if( flag_altform2 ){
            /* Set length to the number of bytes needed in order to display
            ** precision characters */
            unsigned char *z = (unsigned char*)bufpt;
            while( precision-- > 0 && z[0] ){
              SQLITE_SKIP_UTF8(z);
            }
            length = (int)(z - (unsigned char*)bufpt);
          }else{
            for(length=0; length<precision && bufpt[length]; length++){}
          }
        }else{
          length = 0x7fffffff & (int)strlen(bufpt);
        }
      adjust_width_for_utf8:
        if( flag_altform2 && width>0 ){
          /* Adjust width to account for extra bytes in UTF-8 characters */
          int ii = length - 1;
          while( ii>=0 ) if( (bufpt[ii--] & 0xc0)==0x80 ) width++;
        }
        break;
      case etSQLESCAPE:           /* %q: Escape ' characters */
      case etSQLESCAPE2:          /* %Q: Escape ' and enclose in '...' */
      case etSQLESCAPE3: {        /* %w: Escape " characters */
        int i, j, k, n, isnull;
        int needQuote;
        char ch;
        char q = ((xtype==etSQLESCAPE3)?'"':'\'');   /* Quote character */
        char *escarg;

        if( bArgList ){
          escarg = getTextArg(pArgList);
        }else{
          escarg = va_arg(ap,char*);
        }
        isnull = escarg==0;
        if( isnull ) escarg = (xtype==etSQLESCAPE2 ? "NULL" : "(NULL)");
        /* For %q, %Q, and %w, the precision is the number of byte (or
        ** characters if the ! flags is present) to use from the input.
        ** Because of the extra quoting characters inserted, the number
        ** of output characters may be larger than the precision.
        */
        k = precision;
        for(i=n=0; k!=0 && (ch=escarg[i])!=0; i++, k--){
          if( ch==q )  n++;
          if( flag_altform2 && (ch&0xc0)==0xc0 ){
            while( (escarg[i+1]&0xc0)==0x80 ){ i++; }
          }
        }
        needQuote = !isnull && xtype==etSQLESCAPE2;
        n += i + 3;
        if( n>etBUFSIZE ){
          bufpt = zExtra = sqlite3Malloc( n );
          if( bufpt==0 ){
            setStrAccumError(pAccum, STRACCUM_NOMEM);
................................................................................
        for(i=0; i<k; i++){
          bufpt[j++] = ch = escarg[i];
          if( ch==q ) bufpt[j++] = ch;
        }
        if( needQuote ) bufpt[j++] = q;
        bufpt[j] = 0;
        length = j;
        goto adjust_width_for_utf8;



      }
      case etTOKEN: {
        Token *pToken;
        if( (pAccum->printfFlags & SQLITE_PRINTF_INTERNAL)==0 ) return;
        pToken = va_arg(ap, Token*);
        assert( bArgList==0 );
        if( pToken && pToken->n ){
................................................................................
        assert( xtype==etINVALID );
        return;
      }
    }/* End switch over the format type */
    /*
    ** The text of the conversion is pointed to by "bufpt" and is
    ** "length" characters long.  The field width is "width".  Do
    ** the output.  Both length and width are in bytes, not characters,
    ** at this point.  If the "!" flag was present on string conversions
    ** indicating that width and precision should be expressed in characters,
    ** then the values have been translated prior to reaching this point.
    */
    width -= length;
    if( width>0 ){
      if( !flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
      sqlite3StrAccumAppend(pAccum, bufpt, length);
      if( flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
    }else{

Changes to test/printf2.test.

144
145
146
147
148
149
150

























151































152

153
do_execsql_test printf2-4.9 {
  SELECT printf('|%,d|%,d|',123456789,-123456789);
} {|123,456,789|-123,456,789|}
do_execsql_test printf2-4.10 {
  SELECT printf('|%,d|%,d|',1234567890,-1234567890);
} {|1,234,567,890|-1,234,567,890|}




























































finish_test







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>

144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
do_execsql_test printf2-4.9 {
  SELECT printf('|%,d|%,d|',123456789,-123456789);
} {|123,456,789|-123,456,789|}
do_execsql_test printf2-4.10 {
  SELECT printf('|%,d|%,d|',1234567890,-1234567890);
} {|1,234,567,890|-1,234,567,890|}

# 2018-02-19.  Unicode characters with %c
do_execsql_test printf2-5.100 {
  SELECT printf('(%8c)',char(11106));
} {{(       ⭢)}}
do_execsql_test printf2-5.101 {
  SELECT printf('(%-8c)',char(11106));
} {{(⭢       )}}
do_execsql_test printf2-5.102 {
  SELECT printf('(%5.3c)',char(1492));
} {{(  ההה)}}
do_execsql_test printf2-5.103 {
  SELECT printf('(%-5.3c)',char(1492));
} {{(ההה  )}}
do_execsql_test printf2-5.104 {
  SELECT printf('(%3.3c)',char(1492));
} {{(ההה)}}
do_execsql_test printf2-5.105 {
  SELECT printf('(%-3.3c)',char(1492));
} {{(ההה)}}
do_execsql_test printf2-5.104 {
  SELECT printf('(%2c)',char(1513));
} {{( ש)}}
do_execsql_test printf2-5.106 {
  SELECT printf('(%-2c)',char(1513));
} {{(ש )}}

# 2018-02-19.  Unicode characters with the "!" flag in %s and friends.
do_execsql_test printf2-6.100 {
  SELECT printf('(%!.3s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {(הנה)}
do_execsql_test printf2-6.101 {
  SELECT printf('(%.6s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {(הנה)}
do_execsql_test printf2-6.102 {
  SELECT printf('(%!5.3s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {{(  הנה)}}
do_execsql_test printf2-6.103 {
  SELECT printf('(%8.6s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {{(  הנה)}}
do_execsql_test printf2-6.104 {
  SELECT printf('(%!-5.3s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {{(הנה  )}}
do_execsql_test printf2-6.105 {
  SELECT printf('(%-8.6s)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {{(הנה  )}}
do_execsql_test printf2-6.106 {
  SELECT printf('(%!.3Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {('הנה')}
do_execsql_test printf2-6.107 {
  SELECT printf('(%.6Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {('הנה')}
do_execsql_test printf2-6.108 {
  SELECT printf('(%!7.3Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {{(  'הנה')}}
do_execsql_test printf2-6.109 {
  SELECT printf('(%10.6Q)','הנה מה־טוב ומה־נעים שבת אחים גם־יחד');
} {{(  'הנה')}}


finish_test