e$B$J$+$@$G$9!#e(B
String#encodee$B$r$?$a$7$K<BAu$7$F$_$^$7$?!#e(B
e$B$J$<$+e(B"sjis"e$B$GEPO?$5$l$F$$$k$N$Ke(B"Shift_JIS"e$B$H=P$F$-$?$j$9$k$N$G!“e(B
e$BJLL>$rIU$1$i$l$k$h$&$K$7$Fe(Brb_enc_name()e$B$K$”$o$;$F$_$^$7$?!#e(B
Index: encoding.c
— encoding.c (revision 13347)
+++ encoding.c (working copy)
@@ -24,4 +24,5 @@ struct rb_encoding_entry {
static struct rb_encoding_entry *enc_table;
static int enc_table_size;
+static st_table *enc_table_alias;
void
@@ -44,10 +45,21 @@ rb_enc_register(const char *name, rb_enc
void
+rb_enc_alias(const char *alias, const char *orig)
+{
- if (!enc_table_alias) {
- enc_table_alias = st_init_strtable();
- }
- st_insert(enc_table_alias, (st_data_t)alias, (st_data_t)orig);
+}
+void
rb_enc_init(void)
{
rb_enc_register(“ascii”, ONIG_ENCODING_ASCII);
- rb_enc_register(“sjis”, ONIG_ENCODING_SJIS);
- rb_enc_register(“shift_jis”, ONIG_ENCODING_SJIS);
rb_enc_register(“euc-jp”, ONIG_ENCODING_EUC_JP);
rb_enc_register(“utf-8”, ONIG_ENCODING_UTF8); - rb_enc_alias(“binary”, “ascii”);
- rb_enc_alias(“sjis”, “shift_jis”);
}
@@ -68,8 +80,10 @@ rb_enc_find(const char *name)
{
int i;
-
st_data_t alias = 0;
if (!enc_table) {
rb_enc_init();
} -
find:
for (i=0; i<enc_table_size; i++) {
if (strcmp(name, enc_table[i].name) == 0) {
@@ -77,4 +91,10 @@ rb_enc_find(const char *name)
}
} -
if (!alias && enc_table_alias) {
-
if (st_lookup(enc_table_alias, (st_data_t)name, &alias)) {
-
name = (const char *)alias;
-
goto find;
-
}
-
}
return ONIG_ENCODING_ASCII;
}
Index: string.c
===================================================================
— string.c (revision 13347)
+++ string.c (working copy)
@@ -194,5 +194,5 @@ rb_tainted_str_new2(const char *ptr)
static VALUE
-str_new3(VALUE klass, VALUE str)
+str_new_shared(VALUE klass, VALUE str)
{
VALUE str2 = str_alloc(klass);
@@ -210,9 +210,17 @@ str_new3(VALUE klass, VALUE str)
FL_SET(str2, ELTS_SHARED);
}
-
rb_enc_copy((VALUE)str2, str);
return str2;
}
+static VALUE
+str_new3(VALUE klass, VALUE str)
+{
- VALUE str2 = str_new_shared(klass, str);
- rb_enc_copy(str2, str);
- return str2;
+}
VALUE
rb_str_new3(VALUE str)
@@ -401,4 +409,21 @@ rb_str_init(int argc, VALUE *argv, VALUE
}
+static VALUE
+rb_str_encode(VALUE str, VALUE encode)
+{
- int idx = rb_enc_get_index(str);
- rb_encoding *enc = rb_enc_find(StringValueCStr(encode));
- VALUE str2;
- if (idx && rb_enc_from_index(idx) != enc) {
- rb_raise(rb_eArgError, “cannot convert encoding from %s to %s”,
-
rb_enc_name(rb_enc_from_index(idx)), rb_enc_name(enc));
- }
- str2 = str_new_shared(rb_obj_class(str), str);
- rb_enc_associate(str2, enc);
- OBJ_INFECT(str2, str);
- return str2;
+}
static long
str_strlen(VALUE str, rb_encoding *enc)
@@ -5429,4 +5454,5 @@ Init_String(void)
rb_define_method(rb_cString, "encoding", str_encoding, 0);
-
rb_define_method(rb_cString, “encode”, rb_str_encode, 1);
id_to_s = rb_intern(“to_s”);
Index: include/ruby/encoding.h
===================================================================
— include/ruby/encoding.h (revision 13347)
+++ include/ruby/encoding.h (working copy)
@@ -28,4 +28,5 @@ typedef OnigEncodingType rb_encoding;
int rb_enc_to_index(rb_encoding*);
+int rb_enc_get_index(VALUE obj);
rb_encoding* rb_enc_get(VALUE);
rb_encoding* rb_enc_check(VALUE,VALUE);
@@ -65,4 +66,5 @@ int rb_enc_codelen(int, rb_encoding*);
#define rb_enc_prev_char(s,p,enc) (char
)onigenc_get_prev_char_head(enc,(UChar)s,(UChar*)p)
+#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII©
#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)