Enumerable#gather_each

In article [email protected],
Tanaka A. [email protected] writes:

e$B$H$"$k$N$G!“e(Bslice e$B$b:o=|$N0UL#$K;H$&$3$H$O$”$k$h$&$G$9!#e(B
e$B%a%=%C%IL>$re(B

  • Enumerable#slice_by
  • Enumerable#slice_before
    e$B$KJQ$($?%Q%C%A$r$D$1$^$9!#e(B

e$B$3$N%Q%C%A$G$O!"e(Bslice_by e$B$NMWAG$N=|5n$Oe(B nil e$B$He(B
:separator
e$B$K3d$jEv$F$F$"$j$^$9!#e(Bfalse e$B$O30$7$F$
$^$7$?!#e(B

e$B$"$H!"e(Bsingleton e$B$H$$$&8l$OB>$N0UL#$K;H$&$3$H$,B?$$$N$Ge(B
:_alone e$B$KJQ$($^$7$?!#e(B

% svn diff --diff-cmd diff -x ‘-u -p’
Index: enum.c

— enum.c (revision 23381)
+++ enum.c (working copy)
@@ -1793,6 +1793,301 @@ enum_cycle(int argc, VALUE argv, VALUE
return Qnil; /
not reached */
}

+struct sliceby_arg {

  • VALUE categorize;
  • VALUE state;
  • VALUE prev_value;
  • VALUE prev_elts;
  • VALUE yielder;
    +};

+static VALUE
+sliceby_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
+{

  • struct sliceby_arg *argp = (struct sliceby_arg *)_argp;
  • VALUE v;
  • VALUE alone = ID2SYM(rb_intern(“_alone”));
  • VALUE separator = ID2SYM(rb_intern(“_separator”));
  • ENUM_WANT_SVALUE();
  • if (NIL_P(argp->state))
  •    v = rb_funcall(argp->categorize, rb_intern("call"), 1, i);
    
  • else
  •    v = rb_funcall(argp->categorize, rb_intern("call"), 2, i, 
    

argp->state);
+

  • if (v == alone) {
  •    if (!NIL_P(argp->prev_value)) {
    
  •        rb_funcall(argp->yielder, rb_intern("<<"), 1, 
    

rb_assoc_new(argp->prev_value, argp->prev_elts));

  •        argp->prev_value = argp->prev_elts = Qnil;
    
  •    }
    
  •    rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(v, 
    

rb_ary_new3(1, i)));

  • }
  • else if (NIL_P(v) || v == separator) {
  •    if (!NIL_P(argp->prev_value)) {
    
  •        rb_funcall(argp->yielder, rb_intern("<<"), 1, 
    

rb_assoc_new(argp->prev_value, argp->prev_elts));

  •        argp->prev_value = argp->prev_elts = Qnil;
    
  •    }
    
  • }
  • else if (SYMBOL_P(v) && rb_id2name(SYM2ID(v))[0] == ‘_’) {
  • rb_raise(rb_eRuntimeError, “symbol begins with an underscore is
    reserved”);
  • }
  • else {
  •    if (NIL_P(argp->prev_value)) {
    
  •        argp->prev_value = v;
    
  •        argp->prev_elts = rb_ary_new3(1, i);
    
  •    }
    
  •    else {
    
  •        if (rb_equal(argp->prev_value, v)) {
    
  •            rb_ary_push(argp->prev_elts, i);
    
  •        }
    
  •        else {
    
  •            rb_funcall(argp->yielder, rb_intern("<<"), 1, 
    

rb_assoc_new(argp->prev_value, argp->prev_elts));

  •            argp->prev_value = v;
    
  •            argp->prev_elts = rb_ary_new3(1, i);
    
  •        }
    
  •    }
    
  • }
  • return Qnil;
    +}

+static VALUE
+sliceby_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
+{

  • VALUE enumerable;
  • struct sliceby_arg arg;
  • enumerable = rb_ivar_get(enumerator,
    rb_intern(“sliceby_enumerable”));
  • arg.categorize = rb_ivar_get(enumerator,
    rb_intern(“sliceby_categorize”));
  • arg.state = rb_ivar_get(enumerator,
    rb_intern(“sliceby_initial_state”));
  • arg.prev_value = Qnil;
  • arg.prev_elts = Qnil;
  • arg.yielder = yielder;
  • if (!NIL_P(arg.state))
  •    arg.state = rb_obj_dup(arg.state);
    
  • rb_block_call(enumerable, id_each, 0, 0, sliceby_ii, (VALUE)&arg);
  • if (!NIL_P(arg.prev_elts))
  •    rb_funcall(arg.yielder, rb_intern("<<"), 1, 
    

rb_assoc_new(arg.prev_value, arg.prev_elts));

  • return Qnil;
    +}

+/*

    • call-seq:
    • enum.slice_by {|elt| ... } => enumerator
      
    • enum.slice_by(initial_state) {|elt, state| ... } => enumerator
      
    • Creates an enumerator for each chunked elements.
    • The elements which have same block value are chunked.
    • The result enumerator yields the block value and an array of
      chunked elements.
    • So “each” method can be called as follows.
    • enum.slice_by {|elt| key }.each {|key, ary| … }
    • For example, consecutive even numbers and odd numbers can be
    • splitted as follows.
    • [5, 3, 3, 5, 2, 8, 0, 6, 0, 3].slice_by {|n|
    •  n.even?
      
    • }.each {|even, ary|
    •  p [even, ary]
      
    • }
    • #=> [false, [5, 3, 3, 5]]
    • [true, [2, 8, 0, 6, 0]]

    • [false, [3]]

    • The following key values has special meaning:
      • nil and :_separator specifies that the elements are dropped.
      • :_alone specifies that the element should be chunked as a
        singleton.
    • Other symbols which begins an underscore are reserved.
    • nil and :_separator can be used to ignore some elements.
    • For example, the sequence of hyphens in svn log can be eliminated
      as follows.
    • sep = “-”*72 + “\n”
    • IO.popen(“svn log README”) {|f|
    •  f.slice_by {|line|
      
    •    line != sep || nil
      
    •  }.each {|_, lines|
      
    •    pp lines
      
    •  }
      
    • }
    • #=> [“r20018 | knu | 2008-10-29 13:20:42 +0900 (Wed, 29 Oct 2008)
      | 2 lines\n”,
    • “\n”,

    • “* README, README.ja: Update the portability section.\n”,

    • “\n”]

    • ["r16725 | knu | 2008-05-31 23:34:23 +0900 (Sat, 31 May 2008)

| 2 lines\n",

    • “\n”,

    • “* README, README.ja: Add a note about default C flags.\n”,

    • “\n”]

    • If the block needs to maintain state over multiple elements,
    • initial_state argument can be used.
    • If non-nil value is given,
    • it is duplicated for each “each” method invocation of the
      enumerator.
    • The duplicated object is passed to 2nd argument of the block for
      “slice_by” method…
  • */
    +static VALUE
    +enum_slice_by(int argc, VALUE *argv, VALUE enumerable)
    +{
  • VALUE initial_state;
  • VALUE enumerator;
  • rb_scan_args(argc, argv, “01”, &initial_state);
  • enumerator = rb_obj_alloc(rb_cEnumerator);
  • rb_ivar_set(enumerator, rb_intern(“sliceby_enumerable”),
    enumerable);
  • rb_ivar_set(enumerator, rb_intern(“sliceby_categorize”),
    rb_block_proc());
  • rb_ivar_set(enumerator, rb_intern(“sliceby_initial_state”),
    initial_state);
  • rb_block_call(enumerator, rb_intern(“initialize”), 0, 0, sliceby_i,
    enumerator);
  • return enumerator;
    +}

+struct slicebefore_arg {

  • VALUE separator_p;
  • VALUE state;
  • VALUE prev_elts;
  • VALUE yielder;
    +};

+static VALUE
+slicebefore_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
+{

  • struct slicebefore_arg *argp = (struct slicebefore_arg *)_argp;
  • VALUE header_p;
  • ENUM_WANT_SVALUE();
  • if (NIL_P(argp->state))
  •    header_p = rb_funcall(argp->separator_p, rb_intern("call"), 1, 
    

i);

  • else
  •    header_p = rb_funcall(argp->separator_p, rb_intern("call"), 2, 
    

i, argp->state);

  • if (RTEST(header_p)) {
  •    if (!NIL_P(argp->prev_elts))
    
  •        rb_funcall(argp->yielder, rb_intern("<<"), 1, 
    

argp->prev_elts);

  •    argp->prev_elts = rb_ary_new3(1, i);
    
  • }
  • else {
  •    if (NIL_P(argp->prev_elts))
    
  •        argp->prev_elts = rb_ary_new3(1, i);
    
  •    else
    
  •        rb_ary_push(argp->prev_elts, i);
    
  • }
  • return Qnil;
    +}

+static VALUE
+slicebefore_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
+{

  • VALUE enumerable;
  • struct slicebefore_arg arg;
  • enumerable = rb_ivar_get(enumerator,
    rb_intern(“slicebefore_enumerable”));
  • arg.separator_p = rb_ivar_get(enumerator,
    rb_intern(“slicebefore_separator_p”));
  • arg.state = rb_ivar_get(enumerator,
    rb_intern(“slicebefore_initial_state”));
  • arg.prev_elts = Qnil;
  • arg.yielder = yielder;
  • if (!NIL_P(arg.state))
  •    arg.state = rb_obj_dup(arg.state);
    
  • rb_block_call(enumerable, id_each, 0, 0, slicebefore_ii,
    (VALUE)&arg);
  • if (!NIL_P(arg.prev_elts))
  •    rb_funcall(arg.yielder, rb_intern("<<"), 1, arg.prev_elts);
    
  • return Qnil;
    +}

+/*

    • call-seq:
    • enum.slice_before {|elt| ... } => enumerator
      
    • enum.slice_before(initial_state) {|elt, state| ... } => 
      

enumerator

    • Creates an enumerator for each chunked elements.
    • The chunked elements begins an element which the block returns true
      value.
    • The result enumerator yields the chunked elements as an array.
    • So “each” method can be called as follows.
    • enum.slice_before {|elt| bool }.each {|ary| … }
    • For example, iteration over ChangeLog entries can be implemented as
      follows.
    • iterate over ChangeLog entries.

    • open(“ChangeLog”) {|f|
    •  f.slice_before {|line| /\A\S/ =~ line }.each {|e| pp e}
      
    • }
    • If the block needs to maintain state over multiple elements,
    • initial_state argument can be used.
    • If non-nil value is given,
    • it is duplicated for each “each” method invocation of the
      enumerator.
    • The duplicated object is passed to 2nd argument of the block for
      “slice_before” method…
    • For example, monotonically increasing elements can be chunked as
      follows.
    • a = [2, 5, 2, 1, 4, 3, 1, 2, 8, 0]
    • enum = a.slice_before(n: 0) {|elt, h|
    •  prev = h[:n]
      
    •  h[:n] = elt
      
    •  prev > elt
      
    • }
    • enum.each {|ary| p ary }
    • #=> [2, 5]
    • [2]

    • [1, 4]

    • [3]

    • [1, 2, 8]

    • [0]

    • parse mbox

    • open(“mbox”) {|f|
    •  f.slice_before {|line|
      
    •    line.start_with? "From "
      
    •  }.each {|mail|
      
    •    unix_from = mail.shift
      
    •    i = mail.index("\n")
      
    •    header = mail[0...i]
      
    •    body = mail[(i+1)..-1]
      
    •    fields = header.slice_before {|line| !" \t".include?(line[0]) 
      

}.to_a

    •    p unix_from
      
    •    pp fields
      
    •    pp body
      
    •  }
      
    • }
    • split mails in mbox (slice before Unix From line after an empty

line)

    • open(“mbox”) {|f|
    •  f.slice_before(emp: true) {|line,h|
      
    •  prevemp = h[:emp]
      
    •  h[:emp] = line == "\n"
      
    •  prevemp && line.start_with?("From ")
      
    • }.each {|mail|
    •  pp mail
      
    • }
  • */
    +static VALUE
    +enum_slice_before(int argc, VALUE *argv, VALUE enumerable)
    +{
  • VALUE initial_state, enumerator;
  • rb_scan_args(argc, argv, “01”, &initial_state);
  • enumerator = rb_obj_alloc(rb_cEnumerator);
  • rb_ivar_set(enumerator, rb_intern(“slicebefore_enumerable”),
    enumerable);
  • rb_ivar_set(enumerator, rb_intern(“slicebefore_separator_p”),
    rb_block_proc());
  • rb_ivar_set(enumerator, rb_intern(“slicebefore_initial_state”),
    initial_state);
  • rb_block_call(enumerator, rb_intern(“initialize”), 0, 0,
    slicebefore_i, enumerator);
  • return enumerator;
    +}

/*

  • The Enumerable mixin provides collection classes with
  • several traversal and searching methods, and with the ability to
    @@ -1852,6 +2147,8 @@ Init_Enumerable(void)
    rb_define_method(rb_mEnumerable, “drop”, enum_drop, 1);
    rb_define_method(rb_mEnumerable, “drop_while”, enum_drop_while, 0);
    rb_define_method(rb_mEnumerable, “cycle”, enum_cycle, -1);
  • rb_define_method(rb_mEnumerable, “slice_by”, enum_slice_by, -1);

  • rb_define_method(rb_mEnumerable, “slice_before”, enum_slice_before,
    -1);

    id_eqq = rb_intern(“===”);
    id_each = rb_intern(“each”);

In article [email protected],
Yukihiro M. [email protected] writes:

slice_bye$B$H$$$&%a%=%C%IL>$K0[O@$O$“$j$^$;$s!#e(B
:_separatore$B$H$+e(B:_alonee$B$H$+$O$A$g$C$H0cOB46$,$”$j$^$9!#e(B
e$B%I%-%e%a%s%H$KNc$,:$C$F$$$J$$$+$i$+$b$7$l$^$;$s!#e(B

:_separator e$B$He(B nil e$B$OF1$85!G=$G!"e(Bnil
e$B$r;H$&Nc$O:$C$F$$$^$9!#e(B
:_separator e$B$K=q$-49$($k$H$3$&$J$j$^$9!#e(B

 sep = "-"*72 + "\n"
 IO.popen("svn log README") {|f|
   f.slice_by {|line|
     line != sep || :_separator
   }.each {|_, lines|
     pp lines
   }
 }

e$B$J$!"e(Bseparator e$B$H$$$&8l$rA$V$K$"$?$C$F$Oe(B delete e$B$de(B
reject
e$B$J$I$$$/$D$+9M$($^$7$?$,!"F~NO$rGK2uE*$K=q$-49$($k$b$N$G$O$Je(B
e$B$$$H$$$&E@$H!“F~NO$NMWAG$,C1$K>C$($k$N$G$O$J$/!”$=$NMWAG$r$Oe(B
e$B$5$s$@MWAG$r$^$H$a$k$3$H$O$J$$$H$$$&E@$r9MN8$7$F$$$^$9!#e(B

:_alone e$B$NNc$@$H!"$?$H$($P!"e(Bchkbuild e$B$N%m%0$re(B diff
e$B$9$k$Ne(B
e$B$KI,MW$K$J$C$?$b$N$r4JN,2=$9$l$P!"e(B

pat = /\A[A-Z][A-Za-z0-9_]+(::[A-Z][A-Za-z0-9_]+)*#/
open(filename) {|f|
f.slice_by {|line| pat =~ line ? $& : :_alone }.each {|key, lines|
if key != :_alone
print lines.sort.join(‘’)
else
print lines.join(‘’)
end
}
}

e$B$H$$$&$H$3$m$G$7$g$&$+!#e(B
e$B$3$l$O!"e(Bpat e$B$K%^%C%A$9$k$b$N$,Ey$7$$O"B3$7$?9T$re(B sort
e$B$9$k$3e(B
e$B$H$K$h$C$F!“e(B minitest e$B$,%F%9%H$N=g=x$r%i%s%@%`$KJQ$($F$be(B
diff e$B$GJQ2=$r8!=P$7$J$/$9$k$h$&$K$9$k!”$H$$$&$b$N$G$9!#e(B

e$BNc$,:$C$F$$$J$$B>$K0cOB46$O$"$j$^$9$+e(B?

slice_beforee$B$K$b0cOB46$,$"$j$^$9!#e(B
ruby-coree$B$GJ9$$$F$_$k$Y$-$J$N$+$7$i!#$=$l$H$be(Bbikeshede$B$G<}=&e(B
e$B$,$D$+$J$/$J$C$A$c$&$+$7$i!#e(B

e$B$I$N$h$&$J0cOB46$G$9$+e(B?

e$B$=$3$,ITL@$@$H!"$I$&$$$&$b$N$rA@$C$FC5$;$P$$$$$N$+$o$+$j$^$;e(B
e$B$s!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m$G$9e(B

In message “Re: [ruby-dev:38501] Re: Enumerable#gather_each”
on Wed, 20 May 2009 00:13:51 +0900, Tanaka A. [email protected]
writes:

|e$B%a%=%C%IL>$re(B
|* Enumerable#slice_by
|* Enumerable#slice_before
|e$B$KJQ$($?%Q%C%A$r$D$1$^$9!#e(B
|
|e$B$3$N%Q%C%A$G$O!“e(Bslice_by e$B$NMWAG$N=|5n$Oe(B nil e$B$He(B :separator
|e$B$K3d$jEv$F$F$"$j$^$9!#e(Bfalse e$B$O30$7$F$
$^$7$?!#e(B
|
|e$B$”$H!"e(Bsingleton e$B$H$$$&8l$OB>$N0UL#$K;H$&$3$H$,B?$$$N$Ge(B
|:_alone e$B$KJQ$($^$7$?!#e(B

slice_bye$B$H$$$&%a%=%C%IL>$K0[O@$O$“$j$^$;$s!#e(B
:_separatore$B$H$+e(B:_alonee$B$H$+$O$A$g$C$H0cOB46$,$”$j$^$9!#e(B
e$B%I%-%e%a%s%H$KNc$,:$C$F$$$J$$$+$i$+$b$7$l$^$;$s!#e(B

slice_beforee$B$K$b0cOB46$,$"$j$^$9!#e(B
ruby-coree$B$GJ9$$$F$_$k$Y$-$J$N$+$7$i!#$=$l$H$be(Bbikeshede$B$G<}=&e(B
e$B$,$D$+$J$/$J$C$A$c$&$+$7$i!#e(B

In article [email protected],
Yukihiro M. [email protected] writes:

e$B$3$l$K$D$$$F$OH?BP$7$^$;$s$N$G!“$3$l$i$be(BRDoce$B$KDI2C$7$F$b$i$(e(B
e$B$^$;$s$+!#$A$g$C$H>iD92a$.$k$+$J$”!#e(B

e$B%I%-%e%a%s%H$KB-$9$N$OLdBj$"$j$^$;$s!#e(B

e$B$=$&$G$9$M!#e(B_bye$B7O$G%7%s%%k$G@)8f$9$k$H8@$&A0Nc$,$J$$!"$H$$e(B
e$B$&$3$H0J30$K$O!“5$$K$J$kE@$O$”$j$^$;$s!#e(B

e$B$?$7$+$K$=$NA0Nc$O$J$$5$$,$7$^$9$M!#e(B

e$B$=$&$G$9$h$M!#BP1~$9$k$b$N$,e(Bslice_bye$B$G$"$k;~$K!"e(B_bye$B$He(B_before
e$B$H$$$&$N$,!"0UL#E*$KBP1~$7$J$$5$$,$9$k$H$$$&$N$,0cOB46$N:,8;e(B
e$B$N$h$&$J5$$,$7$^$9!#e(B

e$B$G$O!"$I$A$i$+$re(B slice e$B$8$c$J$/$Fe(B chunk
e$B$K$9$k$N$O$I$&$G$7$g$&$+!#e(B

chunk_by e$B$He(B slice_before
e$B$"$k$$$Oe(B
slice_by e$B$He(B chunk_before
e$B$H$$$&$3$H$G$9$,!#e(B

e$B$^$D$b$He(B e$B$f$-$R$m!w>e3$$G$9e(B

In message “Re: [ruby-dev:38512] Re: Enumerable#gather_each”
on Thu, 21 May 2009 16:28:17 +0900, Tanaka A. [email protected]
writes:

|In article [email protected],
| Yukihiro M. [email protected] writes:
|
|> slice_bye$B$H$$$&%a%=%C%IL>$K0[O@$O$“$j$^$;$s!#e(B
|> :_separatore$B$H$+e(B:_alonee$B$H$+$O$A$g$C$H0cOB46$,$”$j$^$9!#e(B
|> e$B%I%-%e%a%s%H$KNc$,:$C$F$$$J$$$+$i$+$b$7$l$^$;$s!#e(B
|
|:_separator e$B$He(B nil e$B$OF1$85!G=$G!"e(Bnil e$B$r;H$&Nc$O:$C$F$$$^$9!#e(B
|:separator e$B$K=q$-49$($k$H$3$&$J$j$^$9!#e(B
|
| sep = "-"72 + “\n”
| IO.popen(“svn log README”) {|f|
| f.slice_by {|line|
| line != sep || :separator
| }.each {|
, lines|
| pp lines
| }
| }
|
|e$B$J$
!“e(Bseparator e$B$H$$$&8l$rA*$V$K$”$?$C$F$Oe(B delete e$B$de(B reject
|e$B$J$I$$$/$D$+9M$($^$7$?$,!"F~NO$rGK2uE*$K=q$-49$($k$b$N$G$O$Je(B
|e$B$$$H$$$&E@$H!“F~NO$NMWAG$,C1$K>C$($k$N$G$O$J$/!”$=$NMWAG$r$Oe(B
|e$B$5$s$@MWAG$r$^$H$a$k$3$H$O$J$$$H$$$&E@$r9MN8$7$F$$$^$9!#e(B
|
|:alone e$B$NNc$@$H!"$?$H$($P!"e(Bchkbuild e$B$N%m%0$re(B diff e$B$9$k$Ne(B
|e$B$KI,MW$K$J$C$?$b$N$r4JN,2=$9$l$P!"e(B
|
| pat = /\A[A-Z][A-Za-z0-9
]+(::[A-Z][A-Za-z0-9
]+)*#/
| open(filename) {|f|
| f.slice_by {|line| pat =~ line ? $& : :_alone }.each {|key, lines|
| if key != :_alone
| print lines.sort.join(‘’)
| else
| print lines.join(‘’)
| end
| }
| }
|
|e$B$H$$$&$H$3$m$G$7$g$&$+!#e(B

e$B$3$l$K$D$$$F$OH?BP$7$^$;$s$N$G!“$3$l$i$be(BRDoce$B$KDI2C$7$F$b$i$(e(B
e$B$^$;$s$+!#$A$g$C$H>iD92a$.$k$+$J$”!#e(B

|e$BNc$,:$C$F$$$J$$B>$K0cOB46$O$"$j$^$9$+e(B?

e$B$=$&$G$9$M!#e(B_bye$B7O$G%7%s%%k$G@)8f$9$k$H8@$&A0Nc$,$J$$!"$H$$e(B
e$B$&$3$H0J30$K$O!“5$$K$J$kE@$O$”$j$^$;$s!#e(B

|> slice_beforee$B$K$b0cOB46$,$“$j$^$9!#e(B
|> ruby-coree$B$GJ9$$$F$_$k$Y$-$J$N$+$7$i!#$=$l$H$be(Bbikeshede$B$G<}=&e(B
|> e$B$,$D$+$J$/$J$C$A$c$&$+$7$i!#e(B
|
|e$B$I$N$h$&$J0cOB46$G$9$+e(B?
|
|e$B$=$3$,ITL@$@$H!”$I$&$$$&$b$N$rA@$C$FC5$;$P$$$$$N$+$o$+$j$^$;e(B
|e$B$s!#e(B

e$B$=$&$G$9$h$M!#BP1~$9$k$b$N$,e(Bslice_bye$B$G$"$k;~$K!"e(B_bye$B$He(B_before
e$B$H$$$&$N$,!"0UL#E*$KBP1~$7$J$$5$$,$9$k$H$$$&$N$,0cOB46$N:,8;e(B
e$B$N$h$&$J5$$,$7$^$9!#e(B

                            e$B$^$D$b$He(B e$B$f$-$R$me(B /:|)

In article [email protected],
Tanaka A. [email protected] writes:

e$B;~4V$,6u$$$F$7$^$C$?$s$G$9$,!"Nc$rB-$7$F!"L>A0$rD4@0$7$F$_$^$7$?!#e(B

enum.chunk {|elt| … } => enumerator
enum.chunk(initial_state) {|elt, state| … } => enumerator
enum.slice_before {|elt| … } => enumerator
enum.slice_before(initial_state) {|elt, state| … } => enumerator

e$B$H$$$&e(B API e$B$K$J$C$F$$$^$9!#e(B

e$B$7$P$i$/BT$C$FH?BP$O$J$$$h$&$J$N$G!"F~$l$^$7$?!#e(B

In article [email protected],
Yukihiro M. [email protected] writes:

e$B$3$l$K$D$$$F$OH?BP$7$^$;$s$N$G!“$3$l$i$be(BRDoce$B$KDI2C$7$F$b$i$(e(B
e$B$^$;$s$+!#$A$g$C$H>iD92a$.$k$+$J$”!#e(B

e$B;~4V$,6u$$$F$7$^$C$?$s$G$9$,!"Nc$rB-$7$F!"L>A0$rD4@0$7$F$_$^$7$?!#e(B

enum.chunk {|elt| … } => enumerator
enum.chunk(initial_state) {|elt, state| … } => enumerator
enum.slice_before {|elt| … } => enumerator
enum.slice_before(initial_state) {|elt, state| … } => enumerator

e$B$H$$$&e(B API e$B$K$J$C$F$$$^$9!#e(B

e$B$I$&$G$7$g$&e(B?

e$B$J$*!“e(Bchunk
e$B$H$$$&8l$rF0;l$H$7$F;H$&MQK!$O5)$@$1$I$”$k$h$&$G$9!#e(B
wordnet
e$B$G$=$&$$$&$3$H$rD4$Y$k$3$H$,$G$-$k$3$H$K5$$,$D$$$?$s$G$9$,!"e(B
wn chunk -famlv e$B$H$9$k$He(B
chunk used as a verb is rare (polysemy count = 2)
e$B$H=P$F$-$^$9!#e(B(partition, sort, zip e$B$HF1$8CM$G$9!#e(B)

e$B4X?tL>$,%a%=%C%IL>$H$"$C$F$J$$$N$O8e$GD>$9M=Dj$G$9!#e(B

% svn diff --diff-cmd diff -x ‘-u -p’
Index: enum.c

— enum.c (revision 24769)
+++ enum.c (working copy)
@@ -1802,6 +1802,316 @@ enum_cycle(int argc, VALUE argv, VALUE
return Qnil; /
not reached */
}

+struct sliceby_arg {

  • VALUE categorize;
  • VALUE state;
  • VALUE prev_value;
  • VALUE prev_elts;
  • VALUE yielder;
    +};

+static VALUE
+sliceby_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
+{

  • struct sliceby_arg *argp = (struct sliceby_arg *)_argp;
  • VALUE v;
  • VALUE alone = ID2SYM(rb_intern(“_alone”));
  • VALUE separator = ID2SYM(rb_intern(“_separator”));
  • ENUM_WANT_SVALUE();
  • if (NIL_P(argp->state))
  •    v = rb_funcall(argp->categorize, rb_intern("call"), 1, i);
    
  • else
  •    v = rb_funcall(argp->categorize, rb_intern("call"), 2, i, 
    

argp->state);
+

  • if (v == alone) {
  •    if (!NIL_P(argp->prev_value)) {
    
  •        rb_funcall(argp->yielder, rb_intern("<<"), 1, 
    

rb_assoc_new(argp->prev_value, argp->prev_elts));

  •        argp->prev_value = argp->prev_elts = Qnil;
    
  •    }
    
  •    rb_funcall(argp->yielder, rb_intern("<<"), 1, rb_assoc_new(v, 
    

rb_ary_new3(1, i)));

  • }
  • else if (NIL_P(v) || v == separator) {
  •    if (!NIL_P(argp->prev_value)) {
    
  •        rb_funcall(argp->yielder, rb_intern("<<"), 1, 
    

rb_assoc_new(argp->prev_value, argp->prev_elts));

  •        argp->prev_value = argp->prev_elts = Qnil;
    
  •    }
    
  • }
  • else if (SYMBOL_P(v) && rb_id2name(SYM2ID(v))[0] == ‘_’) {
  • rb_raise(rb_eRuntimeError, “symbol begins with an underscore is
    reserved”);
  • }
  • else {
  •    if (NIL_P(argp->prev_value)) {
    
  •        argp->prev_value = v;
    
  •        argp->prev_elts = rb_ary_new3(1, i);
    
  •    }
    
  •    else {
    
  •        if (rb_equal(argp->prev_value, v)) {
    
  •            rb_ary_push(argp->prev_elts, i);
    
  •        }
    
  •        else {
    
  •            rb_funcall(argp->yielder, rb_intern("<<"), 1, 
    

rb_assoc_new(argp->prev_value, argp->prev_elts));

  •            argp->prev_value = v;
    
  •            argp->prev_elts = rb_ary_new3(1, i);
    
  •        }
    
  •    }
    
  • }
  • return Qnil;
    +}

+static VALUE
+sliceby_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
+{

  • VALUE enumerable;
  • struct sliceby_arg arg;
  • enumerable = rb_ivar_get(enumerator,
    rb_intern(“sliceby_enumerable”));
  • arg.categorize = rb_ivar_get(enumerator,
    rb_intern(“sliceby_categorize”));
  • arg.state = rb_ivar_get(enumerator,
    rb_intern(“sliceby_initial_state”));
  • arg.prev_value = Qnil;
  • arg.prev_elts = Qnil;
  • arg.yielder = yielder;
  • if (!NIL_P(arg.state))
  •    arg.state = rb_obj_dup(arg.state);
    
  • rb_block_call(enumerable, id_each, 0, 0, sliceby_ii, (VALUE)&arg);
  • if (!NIL_P(arg.prev_elts))
  •    rb_funcall(arg.yielder, rb_intern("<<"), 1, 
    

rb_assoc_new(arg.prev_value, arg.prev_elts));

  • return Qnil;
    +}

+/*

    • call-seq:
    • enum.chunk {|elt| ... } => enumerator
      
    • enum.chunk(initial_state) {|elt, state| ... } => enumerator
      
    • Creates an enumerator for each chunked elements.
    • The elements which have same block value are chunked.
    • The result enumerator yields the block value and an array of
      chunked elements.
    • So “each” method can be called as follows.
    • enum.chunk {|elt| key }.each {|key, ary| … }
    • For example, consecutive even numbers and odd numbers can be
    • splitted as follows.
    • [5, 3, 3, 5, 2, 8, 0, 6, 0, 3].chunk {|n|
    •  n.even?
      
    • }.each {|even, ary|
    •  p [even, ary]
      
    • }
    • #=> [false, [5, 3, 3, 5]]
    • [true, [2, 8, 0, 6, 0]]

    • [false, [3]]

    • The following key values has special meaning:
      • nil and :_separator specifies that the elements are dropped.
      • :_alone specifies that the element should be chunked as a
        singleton.
    • Other symbols which begins an underscore are reserved.
    • nil and :_separator can be used to ignore some elements.
    • For example, the sequence of hyphens in svn log can be eliminated
      as follows.
    • sep = “-”*72 + “\n”
    • IO.popen(“svn log README”) {|f|
    •  f.chunk {|line|
      
    •    line != sep || nil
      
    •  }.each {|_, lines|
      
    •    pp lines
      
    •  }
      
    • }
    • #=> [“r20018 | knu | 2008-10-29 13:20:42 +0900 (Wed, 29 Oct 2008)
      | 2 lines\n”,
    • “\n”,

    • “* README, README.ja: Update the portability section.\n”,

    • “\n”]

    • ["r16725 | knu | 2008-05-31 23:34:23 +0900 (Sat, 31 May 2008)

| 2 lines\n",

    • “\n”,

    • “* README, README.ja: Add a note about default C flags.\n”,

    • “\n”]

    • :_alone can be used to pass through bunch of elements.
    • For example, sort consective lines formed as Foo#bar and
    • pass other lines, chunk can be used as follows.
    • pat = /\A[A-Z][A-Za-z0-9_]+#/
    • open(filename) {|f|
    •  f.chunk {|line| pat =~ line ? $& : :_alone }.each {|key, lines|
      
    •    if key != :_alone
      
    •      print lines.sort.join('')
      
    •    else
      
    •      print lines.join('')
      
    •    end
      
    •  }
      
    • }
    • If the block needs to maintain state over multiple elements,
    • initial_state argument can be used.
    • If non-nil value is given,
    • it is duplicated for each “each” method invocation of the
      enumerator.
    • The duplicated object is passed to 2nd argument of the block for
      “chunk” method…
  • */
    +static VALUE
    +enum_chunk(int argc, VALUE *argv, VALUE enumerable)
    +{
  • VALUE initial_state;
  • VALUE enumerator;
  • rb_scan_args(argc, argv, “01”, &initial_state);
  • enumerator = rb_obj_alloc(rb_cEnumerator);
  • rb_ivar_set(enumerator, rb_intern(“sliceby_enumerable”),
    enumerable);
  • rb_ivar_set(enumerator, rb_intern(“sliceby_categorize”),
    rb_block_proc());
  • rb_ivar_set(enumerator, rb_intern(“sliceby_initial_state”),
    initial_state);
  • rb_block_call(enumerator, rb_intern(“initialize”), 0, 0, sliceby_i,
    enumerator);
  • return enumerator;
    +}

+struct slicebefore_arg {

  • VALUE separator_p;
  • VALUE state;
  • VALUE prev_elts;
  • VALUE yielder;
    +};

+static VALUE
+slicebefore_ii(VALUE i, VALUE _argp, int argc, VALUE *argv)
+{

  • struct slicebefore_arg *argp = (struct slicebefore_arg *)_argp;
  • VALUE header_p;
  • ENUM_WANT_SVALUE();
  • if (NIL_P(argp->state))
  •    header_p = rb_funcall(argp->separator_p, rb_intern("call"), 1, 
    

i);

  • else
  •    header_p = rb_funcall(argp->separator_p, rb_intern("call"), 2, 
    

i, argp->state);

  • if (RTEST(header_p)) {
  •    if (!NIL_P(argp->prev_elts))
    
  •        rb_funcall(argp->yielder, rb_intern("<<"), 1, 
    

argp->prev_elts);

  •    argp->prev_elts = rb_ary_new3(1, i);
    
  • }
  • else {
  •    if (NIL_P(argp->prev_elts))
    
  •        argp->prev_elts = rb_ary_new3(1, i);
    
  •    else
    
  •        rb_ary_push(argp->prev_elts, i);
    
  • }
  • return Qnil;
    +}

+static VALUE
+slicebefore_i(VALUE yielder, VALUE enumerator, int argc, VALUE *argv)
+{

  • VALUE enumerable;
  • struct slicebefore_arg arg;
  • enumerable = rb_ivar_get(enumerator,
    rb_intern(“slicebefore_enumerable”));
  • arg.separator_p = rb_ivar_get(enumerator,
    rb_intern(“slicebefore_separator_p”));
  • arg.state = rb_ivar_get(enumerator,
    rb_intern(“slicebefore_initial_state”));
  • arg.prev_elts = Qnil;
  • arg.yielder = yielder;
  • if (!NIL_P(arg.state))
  •    arg.state = rb_obj_dup(arg.state);
    
  • rb_block_call(enumerable, id_each, 0, 0, slicebefore_ii,
    (VALUE)&arg);
  • if (!NIL_P(arg.prev_elts))
  •    rb_funcall(arg.yielder, rb_intern("<<"), 1, arg.prev_elts);
    
  • return Qnil;
    +}

+/*

    • call-seq:
    • enum.slice_before {|elt| ... } => enumerator
      
    • enum.slice_before(initial_state) {|elt, state| ... } => 
      

enumerator

    • Creates an enumerator for each chunked elements.
    • The chunked elements begins an element which the block returns true
      value.
    • The result enumerator yields the chunked elements as an array.
    • So “each” method can be called as follows.
    • enum.slice_before {|elt| bool }.each {|ary| … }
    • For example, iteration over ChangeLog entries can be implemented as
      follows.
    • iterate over ChangeLog entries.

    • open(“ChangeLog”) {|f|
    •  f.slice_before {|line| /\A\S/ =~ line }.each {|e| pp e}
      
    • }
    • If the block needs to maintain state over multiple elements,
    • initial_state argument can be used.
    • If non-nil value is given,
    • it is duplicated for each “each” method invocation of the
      enumerator.
    • The duplicated object is passed to 2nd argument of the block for
      “slice_before” method…
    • For example, monotonically increasing elements can be chunked as
      follows.
    • a = [2, 5, 2, 1, 4, 3, 1, 2, 8, 0]
    • enum = a.slice_before(n: 0) {|elt, h|
    •  prev = h[:n]
      
    •  h[:n] = elt
      
    •  prev > elt
      
    • }
    • enum.each {|ary| p ary }
    • #=> [2, 5]
    • [2]

    • [1, 4]

    • [3]

    • [1, 2, 8]

    • [0]

    • parse mbox

    • open(“mbox”) {|f|
    •  f.slice_before {|line|
      
    •    line.start_with? "From "
      
    •  }.each {|mail|
      
    •    unix_from = mail.shift
      
    •    i = mail.index("\n")
      
    •    header = mail[0...i]
      
    •    body = mail[(i+1)..-1]
      
    •    fields = header.slice_before {|line| !" \t".include?(line[0]) 
      

}.to_a

    •    p unix_from
      
    •    pp fields
      
    •    pp body
      
    •  }
      
    • }
    • split mails in mbox (slice before Unix From line after an empty

line)

    • open(“mbox”) {|f|
    •  f.slice_before(emp: true) {|line,h|
      
    •  prevemp = h[:emp]
      
    •  h[:emp] = line == "\n"
      
    •  prevemp && line.start_with?("From ")
      
    • }.each {|mail|
    •  pp mail
      
    • }
  • */
    +static VALUE
    +enum_slice_before(int argc, VALUE *argv, VALUE enumerable)
    +{
  • VALUE initial_state, enumerator;
  • rb_scan_args(argc, argv, “01”, &initial_state);
  • enumerator = rb_obj_alloc(rb_cEnumerator);
  • rb_ivar_set(enumerator, rb_intern(“slicebefore_enumerable”),
    enumerable);
  • rb_ivar_set(enumerator, rb_intern(“slicebefore_separator_p”),
    rb_block_proc());
  • rb_ivar_set(enumerator, rb_intern(“slicebefore_initial_state”),
    initial_state);
  • rb_block_call(enumerator, rb_intern(“initialize”), 0, 0,
    slicebefore_i, enumerator);
  • return enumerator;
    +}

/*

  • call-seq:
  • enum.join(sep=$,)    -> str
    

@@ -1881,6 +2191,8 @@ Init_Enumerable(void)
rb_define_method(rb_mEnumerable, “drop_while”, enum_drop_while, 0);
rb_define_method(rb_mEnumerable, “cycle”, enum_cycle, -1);
rb_define_method(rb_mEnumerable, “join”, enum_join, -1);

  • rb_define_method(rb_mEnumerable, “chunk”, enum_chunk, -1);

  • rb_define_method(rb_mEnumerable, “slice_before”, enum_slice_before,
    -1);

    id_eqq = rb_intern(“===”);
    id_each = rb_intern(“each”);
    Index: test/ruby/test_enum.rb
    ===================================================================
    — test/ruby/test_enum.rb (revision 24769)
    +++ test/ruby/test_enum.rb (working copy)
    @@ -315,4 +315,74 @@ class TestEnumerable < Test::Unit::TestC
    ensure
    $, = ofs
    end

  • def test_chunk

  • e = [].chunk {|elt| true }

  • assert_equal([], e.to_a)

  • e = @obj.chunk {|elt| elt & 2 == 0 ? false : true }

  • assert_equal([[false, [1]], [true, [2, 3]], [false, [1]], [true,
    [2]]], e.to_a)

  • e = @obj.chunk(acc: 0) {|elt, h| h[:acc] += elt; h[:acc].even? }

  • assert_equal([[false, [1,2]], [true, [3]], [false, [1,2]]], e.to_a)

  • assert_equal([[false, [1,2]], [true, [3]], [false, [1,2]]], e.to_a)

this tests h is duplicated.

  • hs = [{}]
  • e = [:foo].chunk(hs[0]) {|elt, h|
  •  hs << h
    
  •  true
    
  • }
  • assert_equal([[true, [:foo]]], e.to_a)
  • assert_equal([[true, [:foo]]], e.to_a)
  • assert_equal([{}, {}, {}], hs)
  • assert_not_same(hs[0], hs[1])
  • assert_not_same(hs[0], hs[2])
  • assert_not_same(hs[1], hs[2])
  • e = @obj.chunk {|elt| elt < 3 ? :_alone : true }
  • assert_equal([[:_alone, [1]],
  •              [:_alone, [2]],
    
  •              [true, [3]],
    
  •              [:_alone, [1]],
    
  •              [:_alone, [2]]], e.to_a)
    
  • e = @obj.chunk {|elt| elt == 3 ? :_separator : true }
  • assert_equal([[true, [1, 2]],
  •              [true, [1, 2]]], e.to_a)
    
  • e = @obj.chunk {|elt| elt == 3 ? nil : true }
  • assert_equal([[true, [1, 2]],
  •              [true, [1, 2]]], e.to_a)
    
  • e = @obj.chunk {|elt| :_foo }
  • assert_raise(RuntimeError) { e.to_a }
  • end
  • def test_slice_before
  • e = [].slice_before {|elt| true }
  • assert_equal([], e.to_a)
  • e = @obj.slice_before {|elt| elt.even? }
  • assert_equal([[1], [2,3,1], [2]], e.to_a)
  • e = @obj.slice_before {|elt| elt.odd? }
  • assert_equal([[1,2], [3], [1,2]], e.to_a)
  • e = @obj.slice_before(acc: 0) {|elt, h| h[:acc] += elt;
    h[:acc].even? }
  • assert_equal([[1,2], [3,1,2]], e.to_a)
  • assert_equal([[1,2], [3,1,2]], e.to_a) # this tests h is
    duplicated.
  • hs = [{}]
  • e = [:foo].slice_before(hs[0]) {|elt, h|
  •  hs << h
    
  •  true
    
  • }
  • assert_equal([[:foo]], e.to_a)
  • assert_equal([[:foo]], e.to_a)
  • assert_equal([{}, {}, {}], hs)
  • assert_not_same(hs[0], hs[1])
  • assert_not_same(hs[0], hs[2])
  • assert_not_same(hs[1], hs[2])
  • end

end