Issue #9816 has been updated by Yui NARUSE.
熟考の結果、Gem::Versionと仕様をあわせました。
理由は、
- Gem::Versionでこれを使ってくれればオブジェクトの生成数が減る
- 2.2.0-preview1のようなRubyのバージョンの比較ができる
からです。
順序のイメージは Prereleases sort between real releases (newest to oldest)
のような感じです
- 1.0
- 1.0.b1
- 1.0.a.2
- 0.9
diff --git a/string.c b/string.c
index bec0bfd..e2b3c6f 100644
--- a/string.c
+++ b/string.c
@@ -2605,6 +2605,232 @@ rb_str_casecmp(VALUE str1, VALUE str2)
return INT2FIX(-1);
}
+static int
+version_string_p(VALUE str)
+{
+ const char *p = RSTRING_PTR(str);
+ const char *e = RSTRING_END(str);
+
+ if (!rb_enc_asciicompat(STR_ENC_GET(str))) return FALSE;
+
+ if (!ISDIGIT(*p)) return FALSE;
+ do { if (++p >= e) return TRUE; } while (ISDIGIT(*p));
+
+ while (*p == '.') {
+ if (++p >= e) return FALSE;
+ if (!ISALNUM(*p)) return FALSE;
+ do { if (++p >= e) return TRUE; } while (ISALNUM(*p));
+ }
+
+ if (*p != '-') return FALSE;
+ do {
+ if (++p >= e) return FALSE;
+ if (!ISALNUM(*p) && *p != '-') return FALSE;
+ do { if (++p >= e) return TRUE; } while (ISALNUM(*p) || *p == '-');
+ } while (*p == '.');
+
+ return FALSE;
+}
+
+/* return value: whether end of nueric part is EOS
+ * sp: first nonzero digit
+ * ep: end of digits
+ */
+static void
+search_numerical_str(const char **sp, const char **ep)
+{
+ const char *p = *sp;
+ const char *e = *ep;
+ assert(p < e);
+ for (;;) {
+ if (*p != '0') break;
+ p++;
+ if (p == e) {
+ *sp = p;
+ goto finish;
+ }
+ }
+ *sp = p;
+ assert(p < e);
+ for (;;) {
+ if (!ISDIGIT(*p)) break;
+ p++;
+ if (p == e) {
+ goto finish;
+ }
+ }
+finish:
+ *ep = p;
+ return;
+}
+
+static VALUE
+numerical_compare(const char **pp1, const char *p1end, const char
**pp2, const char *p2end)
+{
+ const char *s1 = *pp1, *p1=p1end, *s2 = *pp2, *p2=p2end;
+ ptrdiff_t len1, len2;
+ int r;
+
+ search_numerical_str(&s1, &p1);
+ search_numerical_str(&s2, &p2);
+
+ /* compre digits length */
+ len1 = p1 - s1;
+ len2 = p2 - s2;
+ if (len1 != len2) return INT2FIX(len1 < len2 ? -1 : 1);
+
+ /* compre numeric value */
+ r = memcmp(s1, s2, len1);
+ if (r) return r < 0 ? INT2FIX(-1) : INT2FIX(1);
+
+ *pp1 = p1;
+ *pp2 = p2;
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * str.versioncmp(other_str) -> -1, 0, +1 or nil
+ *
+ * Compare strings as version strings.
+ *
+ * "a1".versioncmp("a1") #=> 0
+ * "aa".versioncmp("a1") #=> 1
+ * "a1".versioncmp("aa") #=> -1
+ * "a1".versioncmp("a01") #=> -1
+ * "2.1.2".numericcmp("2.1.10") #=> 1
+ */
+
+static VALUE
+rb_str_versioncmp(VALUE str1, VALUE str2)
+{
+ const char *p, *pe, *q, *qe;
+
+ StringValue(str2);
+ if (!version_string_p(str1)) {
+ rb_raise(rb_eArgError, "receiver is not version string
'%+"PRIsVALUE"'", str1);
+ }
+ if (!version_string_p(str2)) {
+ rb_raise(rb_eArgError, "argument is not version string
'%+"PRIsVALUE"'", str2);
+ }
+
+ p = RSTRING_PTR(str1); pe = RSTRING_END(str1);
+ q = RSTRING_PTR(str2); qe = RSTRING_END(str2);
+
+ for (;;) {
+ if (*p == '-') {
+hyphen_left:
+ if (*q == '-') goto next_char;
+ while (*q == '.') {
+ if (++q == qe) return INT2FIX(1);
+ }
+ if (*q != 'p') return INT2FIX(ISDIGIT(*q) || 'p' < *q ? -1 : 1);
+ if (++q == qe) return INT2FIX(1);
+ if (*q != 'r') return INT2FIX(ISDIGIT(*q) || 'r' < *q ? -1 : 1);
+ if (++q == qe) return INT2FIX(1);
+ if (*q != 'e') return INT2FIX(ISDIGIT(*q) || 'e' < *q ? -1 : 1);
+ if (++q == qe) return INT2FIX(1);
+ if (*q != '.') {
+ if (*q == '-') {
+ p++;
+ goto hyphen_right;
+ }
+ else if (ISALPHA(*q)) return INT2FIX(-1);
+ q--; /* DIGIT */
+ }
+ }
+ else if (*q == '-') {
+hyphen_right:
+ if (*p == '-') goto next_char;
+ while (*p == '.') {
+ if (++p == pe) return INT2FIX(-1);
+ }
+ if (*p != 'p') return INT2FIX(ISDIGIT(*p) || 'p' < *p ? 1 : -1);
+ if (++p == pe) return INT2FIX(-1);
+ if (*p != 'r') return INT2FIX(ISDIGIT(*p) || 'r' < *p ? 1 : -1);
+ if (++p == pe) return INT2FIX(-1);
+ if (*p != 'e') return INT2FIX(ISDIGIT(*p) || 'e' < *p ? 1 : -1);
+ if (++p == pe) return INT2FIX(-1);
+ if (*p == '-') {
+ q++;
+ goto hyphen_left;
+ }
+ else if (ISALPHA(*p)) return INT2FIX(1);
+ else if (ISDIGIT(*p)) {
+ p--; /* DIGIT */
+ }
+ }
+ else if (ISDIGIT(*p)) {
+ if (ISDIGIT(*q)) {
+ VALUE r = numerical_compare(&p, pe, &q, qe);
+ if(!NIL_P(r)) return r;
+ goto incremented;
+ }
+ else {
+ return INT2FIX(1);
+ }
+ }
+ else if (ISDIGIT(*q)) {
+ return INT2FIX(-1);
+ }
+ else if (ISALPHA(*p)) {
+ if (ISALPHA(*q)) {
+ for (;;) {
+ if (*p != *q) return INT2FIX(*p < *q ? -1 : 1);
+ p++;
+ q++;
+ if (p == pe) {
+ if (q == qe) return INT2FIX(0);
+ if (ISALPHA(*q)) return INT2FIX(-1);
+ goto incremented;
+ }
+ else if (q == qe) {
+ if (ISALPHA(*p)) return INT2FIX(1);
+ goto incremented;
+ }
+ else if (ISALPHA(*p)) {
+ if (!ISALPHA(*q)) return INT2FIX(1);
+ }
+ else if (ISALPHA(*q)) return INT2FIX(-1);
+ else goto incremented;
+ }
+ continue;
+ }
+ else return INT2FIX(1);
+ }
+ else if (ISALPHA(*q)) {
+ return INT2FIX(-1);
+ }
+ else rb_bug("%s %s",p,q);
+
+next_char:
+ p++;
+ q++;
+
+incremented:
+ while (*p == '.' && ++p != pe);
+ while (*q == '.' && ++q != qe);
+ if (p == pe) {
+ if (q == qe) return INT2FIX(0);
+ if (ISDIGIT(*q)) {
+ return INT2FIX(-1);
+ }
+ else /*if (ISALPHA(*q) || *q == '-')*/ {
+ return INT2FIX(1);
+ }
+ }
+ else if (q == qe) {
+ if (ISDIGIT(*p)) {
+ return INT2FIX(1);
+ }
+ else /*if (ISALPHA(*p) || *p == '-')*/ {
+ return INT2FIX(-1);
+ }
+ }
+ }
+ UNREACHABLE;
+}
+
#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub,
offset, 0)
static long
@@ -8778,6 +9004,7 @@ Init_String(void)
rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
rb_define_method(rb_cString, "hash", rb_str_hash_m, 0);
rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
+ rb_define_method(rb_cString, "versioncmp", rb_str_versioncmp, 1);
rb_define_method(rb_cString, "+", rb_str_plus, 1);
rb_define_method(rb_cString, "*", rb_str_times, 1);
rb_define_method(rb_cString, "%", rb_str_format_m, 1);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index e8decc0..9e92fb7 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2112,6 +2112,41 @@ def test_casecmp
assert_equal(1, "\u3042B".casecmp("\u3042a"))
end
+ def test_versioncmp
+ require "rubygems"
+ ary = %w[
+ 1
+ 2
+ 10
+ 1.a
+ 1.a-a
+ 1.a--
+ 1.a--.-
+ 1.a-1
+ 1.a.q
+ 1.a--a
+ 1.a--1
+ 1.a.pre.a
+ 1.a-pre.a
+ 1.a.pre-a
+ 1.a1
+ 1.a2
+ 1.aa
+ 1.b
+ 1.01
+ 1.1
+ 1.1a
+ 1.1-a
+ 1.1-b
+ 1.1q
+ 1.2
+ 1.10
+ ]
+ ary.product(ary) do |a, b|
+ assert_equal(Gem::Version.new(a)<=>Gem::Version.new(b),
a.versioncmp(b), "#{a.dump}, #{b.dump}")
+ end
+ end
+
def test_upcase2
assert_equal("\u3042AB", "\u3042aB".upcase)
end
Feature #9816: 文字列内の数字を数値として比較するメソッド
- Author: Yui NARUSE
- Status: Assigned
- Priority: Normal
- Assignee: Yukihiro M.
- Category: core
- Target version:
文字列内の数字を数値として比較するメソッドを追加しませんか
そのような比較は一般的な用途としてはGUIシェルのファイラーが比較に用いており、
Windows では StrCmpLogicalW が、OS X では
NSString:compare:options:へのNSNumericSearch定数が提供されています。
上記のような処理自体はさほど難しいものではありませんが、Rubyレベルで実装すると大量のオブジェクトを作ってしまいます。
例えば
Gem::Version.new("2.1.10".freeze)<=>Gem::Version.new("2.1.9".freeze)
は47個、
"2.1.10".freeze.split('.').map(&:to_i)<=>"2.1.9".freeze.split('.').map(&:to_i)
だと16個のオブジェクトを作ります。
"2.1.10".freeze.numericcmp"2.1.9".freeze
ならば、もちろんオブジェクトは一つも作りません。
なお、上記の例でも示唆していますが、本メソッドは Ruby のバージョン表記の TEENY が2桁になった場合の比較に用いることができます。
パッチは以下の通りです。
なお、メソッド名は String#numericcmp としています。
(String#casecmpを念頭に置いた)
diff --git a/string.c b/string.c
index c589c80..66f667f 100644
--- a/string.c
+++ b/string.c
@@ -2569,6 +2569,131 @@ rb_str_casecmp(VALUE str1, VALUE str2)
return INT2FIX(-1);
}
+VALUE
+numerical_compare(const char **pp1, const char *p1end, const char
**pp2, const char *p2end)
+{
+ const char *s1 = *pp1, *p1, *s2 = *pp2, *p2;
+ ptrdiff_t len1, len2;
+ int r;
+
+ while (s1 < p1end && *s1 == '0') s1++;
+ p1 = s1;
+ while (p1 < p1end && ISDIGIT(*p1)) p1++;
+ len1 = p1 - s1;
+
+ while (s2 < p2end && *s2 == '0') s2++;
+ p2 = s2;
+ while (p2 < p2end && ISDIGIT(*p2)) p2++;
+ len2 = p2 - s2;
+
+ if (len1 != len2) {
+ return INT2FIX(len1 < len2 ? -1 : 1);
+ }
+
+ r = memcmp(s1, s2, len1);
+ if (r) return r < 0 ? INT2FIX(-1) : INT2FIX(1);
+
+ len1 = s1 - *pp1;
+ len2 = s2 - *pp2;
+ if (len1 != len2) {
+ return INT2FIX(len1 < len2 ? -1 : 1);
+ }
+
+ *pp1 = p1;
+ *pp2 = p2;
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * str.numericcmp(other_str) -> -1, 0, +1 or nil
+ *
+ * Variant of <code>String#<=></code>, which considers digits in
strings
+ * are numeric value..
+ *
+ * "a1".numericcmp("a1") #=> 0
+ * "aa".numericcmp("a1") #=> 1
+ * "a1".numericcmp("aa") #=> -1
+ * "a1".numericcmp("a01") #=> -1
+ * "2.1.2".numericcmp("2.1.10") #=> 1
+ */
+
+static VALUE
+rb_str_numericcmp(VALUE str1, VALUE str2)
+{
+ long len;
+ rb_encoding *enc;
+ const char *p1, *p1end, *p2, *p2end;
+
+ StringValue(str2);
+ enc = rb_enc_compatible(str1, str2);
+ if (!enc) {
+ return Qnil;
+ }
+
+ p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
+ p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
+ if (single_byte_optimizable(str1) && single_byte_optimizable(str2))
{
+ while (p1 < p1end && p2 < p2end) {
+ if (ISDIGIT(*p1)) {
+ if (ISDIGIT(*p2)) {
+ VALUE r = numerical_compare(&p1, p1end, &p2, p2end);
+ if (!NIL_P(r)) return r;
+ }
+ else {
+ return INT2FIX(-1);
+ }
+ }
+ else if (ISDIGIT(*p2)) {
+ return INT2FIX(1);
+ }
+ if (*p1 != *p2) return INT2FIX(*p1 < *p2 ? -1 : 1);
+ p1++;
+ p2++;
+ }
+ }
+ else {
+ while (p1 < p1end && p2 < p2end) {
+ int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
+ int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
+
+ if (0 <= c1 && 0 <= c2) {
+ if (ISDIGIT(*p1)) {
+ if (ISDIGIT(*p2)) {
+ VALUE r = numerical_compare(&p1, p1end, &p2, p2end);
+ if (!NIL_P(r)) return r;
+ }
+ else {
+ return INT2FIX(-1);
+ }
+ }
+ else if (ISDIGIT(*p2)) {
+ return INT2FIX(1);
+ }
+ if (*p1 != *p2) return INT2FIX(*p1 < *p2 ? -1 : 1);
+ p1++;
+ p2++;
+ }
+ else {
+ int r;
+ l1 = rb_enc_mbclen(p1, p1end, enc);
+ l2 = rb_enc_mbclen(p2, p2end, enc);
+ len = l1 < l2 ? l1 : l2;
+ r = memcmp(p1, p2, len);
+ if (r != 0)
+ return INT2FIX(r < 0 ? -1 : 1);
+ if (l1 != l2)
+ return INT2FIX(l1 < l2 ? -1 : 1);
+ }
+ p1 += l1;
+ p2 += l2;
+ }
+ }
+ if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0);
+ if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
+ return INT2FIX(-1);
+}
+
static long
rb_str_index(VALUE str, VALUE sub, long offset)
{
@@ -8721,6 +8846,7 @@ Init_String(void)
rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
rb_define_method(rb_cString, "hash", rb_str_hash_m, 0);
rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
+ rb_define_method(rb_cString, "numericcmp", rb_str_numericcmp, 1);
rb_define_method(rb_cString, "+", rb_str_plus, 1);
rb_define_method(rb_cString, "*", rb_str_times, 1);
rb_define_method(rb_cString, "%", rb_str_format_m, 1);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 8366424..f9c788b 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2104,6 +2104,29 @@ class TestString < Test::Unit::TestCase
assert_equal(1, "\u3042B".casecmp("\u3042a"))
end
+ def test_numericcmp
+ assert_equal(-1, "2.1.0".numericcmp("2.1.1"))
+ assert_equal(-1, "2.1.9".numericcmp("2.1.10"))
+ assert_equal( 0, "a1".numericcmp("a1"))
+ assert_equal( 1, "aa".numericcmp("a1"))
+ assert_equal(-1, "a1".numericcmp("aa"))
+ assert_equal(-1, "a1".numericcmp("a01"))
+ assert_equal(-1, "a0001".numericcmp("a00001"))
+ assert_equal( 0, "a1a".numericcmp("a1a"))
+ assert_equal( 1, "a1b".numericcmp("a1a"))
+ assert_equal(-1, "a9a".numericcmp("a10a"))
+ assert_equal( 1, "b".numericcmp("a"))
+ assert_equal( 0, "\u30421".numericcmp("\u30421"))
+ assert_equal( 1, "\u3042\u3042".numericcmp("\u30421"))
+ assert_equal(-1, "\u30421".numericcmp("\u3042\u3042"))
+ assert_equal(-1, "\u30421".numericcmp("\u304201"))
+ assert_equal(-1, "\u30420001".numericcmp("\u304200001"))
+ assert_equal( 0, "\u30421\u3042".numericcmp("\u30421\u3042"))
+ assert_equal( 1, "\u30421\u3044".numericcmp("\u30421\u3042"))
+ assert_equal(-1, "\u30429\u3042".numericcmp("\u304210\u3042"))
+ assert_equal( 1, "\u3044".numericcmp("\u3042"))
+ end
+
def test_upcase2
assert_equal("\u3042AB", "\u3042aB".upcase)
end