e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B
In message “Re: [ruby-dev:33838] e$BJ8;zNs=hM}$N9bB.2=e(B ?”
on Mon, 18 Feb 2008 16:10:34 +0900, Martin D.
[email protected] writes:
|e$B$?$@$N0lNc$G$9$,!“@h=5$N6bMKF|$K>>K$5$s$K8+$;$?$H$-$Ke(B
|50e$BIC$0$i$$$+$+$C$?%W%m%0%i%$O$=$N8e$Ne(B commit e$B$G$*$=$/$J$j!"e(B |65e$BIC$0$i$$$+$+$C$F$$$^$9!#%W%m%0%i%
$O0J2<$NDL$je(B
|(e$B0JA0e(B Wolfgang Na’dasi-Donner e$B$,e(B ruby-core e$B$K=P$7$?e(B
|e$B$b$N$,%Y!<%9$K$J$C$F$$$^$9!#e(B)
|e$BFC$K6bMKF|$NM<J}$”$?$jJ8;zNs=hM}$de(B UTF-8 e$B$N=hM}$rAa$/$9$ke(B
|commit e$B$,?'!9$“$j$^$7$?$N$G!”$J$<$GCY$/$J$k$N$+$r$A$g$C$He(B
|e$B5?Ld$K;W$C$F$$$^$9!#e(B
strlene$B$G$Oe(Bcoderangee$B$,@_Dj$5$l$J$$$+$i7k6Ie(BUTF-8e$B$N:GE,2=$,F/$+e(B
e$B$J$$$+$i$N$h$&$G$9!#BP:v$H$7$F$Oe(B
- UTF-8e$B:GE,2=$re(BENC_CODERANGE_VALIDe$B$G$J$/$F$bF0$/$h$&$K$9$ke(B
(e$BFq$7$=$&e(B)
- strlene$B$Ge(Bcoderange_scan()e$BAjEv$rF1;~$K9T$&e(B
e$B$N$$$:$l$+$,9M$($i$l$^$9!#$H$j$"$($:!"8e<T$NBP:v$r<BAu$7$F$_e(B
e$B$?$H$3$m!"e(BMartine$B$5$s$NNc$G$Oe(B23.95se$B$,e(B2.89se$B$K$J$j$^$7$?!#e(B
e$B%Q%C%A$G$9!#$@$l$+$,%/%j!<%s%"%C%W$7$F$/$l$k$H$&$l$7$$!#e(B
diff --git a/string.c b/string.c
— a/string.c
+++ b/string.c
@@ -619,10 +619,64 @@ rb_enc_strlen(const char *p, const char *e,
rb_encoding *enc)
return c;
}
+long
+rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int
*cr)
+{
- long c;
- const char *q;
- int ret;
-
- *cr = 0;
- if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
-
return (e - p) / rb_enc_mbminlen(enc);
- }
- else if (rb_enc_asciicompat(enc)) {
- *cr = ENC_CODERANGE_7BIT;
-
c = 0;
-
while (p < e) {
-
if (ISASCII(*p)) {
-
q = search_nonascii(p, e);
-
if (!q) {
-
return c + (e - p);
- }
-
c += q - p;
-
p = q;
-
}
-
ret = rb_enc_precise_mbclen(p, e, enc);
-
if (MBCLEN_CHARFOUND_P(ret)) {
- if (*cr != ENC_CODERANGE_BROKEN)
-
*cr = ENC_CODERANGE_VALID;
- p += MBCLEN_CHARFOUND_LEN(ret);
-
}
-
else {
- *cr = ENC_CODERANGE_BROKEN;
- p++;
-
}
-
c++;
-
}
-
return c;
- }
-
- for (c=0; p<e; c++) {
- ret = rb_enc_precise_mbclen(p, e, enc);
- if (MBCLEN_CHARFOUND_P(ret)) {
-
if (*cr != ENC_CODERANGE_BROKEN)
- *cr = ENC_CODERANGE_VALID;
-
p += MBCLEN_CHARFOUND_LEN(ret);
- }
- else {
-
*cr = ENC_CODERANGE_BROKEN;
-
p++;
- }
- }
- return c;
+}
-
static long
str_strlen(VALUE str, rb_encoding *enc)
{
const char *p, *e;
-
int n, cr;
if (single_byte_optimizable(str)) return RSTRING_LEN(str);
if (!enc) enc = STR_ENC_GET(str);
@@ -661,7 +715,11 @@ str_strlen(VALUE str, rb_encoding *enc)
return len;
}
#endif
- return rb_enc_strlen(p, e, enc);
/*