Simply writing Java that converts the double-byte Unicode Java
outputStrings to single-byte /once/ instead of everytime there’s a
print statement, and following the usual Java pattern of wrapping
output with a BufferedOutputStream, reduces that to
Again, OCaml is basically as fast as C. Note that C and OCaml get slower
moving to 64-bit but Java gets >7x faster. Amazingly, OCaml’s
interpreted
bytecode is faster than Java on 32-bit, whilst being >160x faster to
compile!
Here’s my latest OCaml (I think we could revert to the simpler
list-based
permuter because no significant time is spent generating the
permutations):
let rec fact n = if n=0 then 1 else n*fact(n-1)
let size = 5
(* Permutation generator *)
let p = Array.make size 0
let xx = Array.init size (fun i -> if i<size-1 then 0 else -1)
let gen_perm() =
let i = ref (size - 1) in
while !i > 0 && xx.(!i) = !i do
xx.(!i) <- 0;
decr i
done;
if !i = 0 then 1 else begin
xx.(!i) <- xx.(!i) + 1;
p.(0) <- 1;
for i=0 to size - 1 do
p.(i) <- p.(i - xx.(i));
p.(i - xx.(i)) <- i+1
done;
0
end
let n = fact size
(* Permutations *)
let perms = Array.init n (fun _ -> ignore(gen_perm()); Array.copy p)
let incompat =
let rec aux (px : int array) py i =
i<size && (px.(i) = py.(i) || aux px py (i+1)) in
Array.init n (fun x -> Array.init n (fun y -> aux perms.(x) perms.(y)
0))
let join list = String.concat “” (Array.to_list (Array.map string_of_int
list))
let output_strings = Array.map join perms
let board = Array.make size 0
let op = String.make (size*(size+1)) ‘:’
let rec add_a_row row =
if row=size then begin
for i=0 to size-1 do
String.blit output_strings.(board.(i)) 0 op (i*(size+1)) size
done;
print_string op
end else
for latest = 0 to n - 1 do
let prev_row = ref 0 in
let incompat = incompat.(latest) in
while !prev_row < row && not incompat.(board.(!prev_row)) do
incr prev_row;
done;
if !prev_row = row then begin
board.(row) <- latest;
add_a_row (row + 1)
end
done
let () =
op.[size*(size+1)-1] <- ‘\n’;
add_a_row 0
let gen_perm() =
p.(i - xx.(i)) <- i+1
let rec aux (px : int array) py i =
let rec add_a_row row =
incr prev_row;
done;
if !prev_row = row then begin
board.(row) <- latest;
add_a_row (row + 1)
end
done
let () =
op.[size*(size+1)-1] <- ‘\n’;
add_a_row 0
Here’s a faster version of my program.
Eliminated a “not” in a loop by replacing the array of booleans
that shows incompatibility of two rows with an array that shows
compatibility.
Borrowed you idea of creating an output line ahead of time and
modifying it in place.
(* permutation code by Eric C. Cooper *)
let rec distribute elt = function
| (hd :: tl) as list -> (elt :: list) ::
(List.map (fun x -> hd :: x) (distribute elt tl))
| [] -> [ [elt] ]
let rec permute = function
| x :: rest -> List.flatten (List.map (distribute x) (permute rest))
| [] -> [ [] ] ;;
let list = [ 1; 2; 3; 4; 5 ]
let size = List.length list
let perms = Array.of_list (permute list)
let n = Array.length perms
(* Used to determine if one row is compatible with another. *)
let compatible = Array.make_matrix n n true ;;
for x = 0 to n - 1 do
for y = 0 to n - 1 do
compatible.(x).(y) <-
List.for_all2 ( <> ) perms.(x) perms.(y)
done
done
let join list = String.concat “” (List.map string_of_int list)
let output_strings = Array.map join perms
(* For speed, create a string that’s the length of the lines
that we’ll print; the :'s that aren’t needed as separators
will later be overwritten. )
let output_line = String.make (size(size+1)-1) ‘:’ ^ “\n”
let board = Array.make size 0
let rec add_a_row row =
if row = size then
( for i=0 to size-1 do
String.blit
output_strings.(board.(i)) 0 (* source )
output_line (i(size+1)) (* dest )
size
done;
print_string output_line
)
else
for latest = 0 to n - 1 do
( Create a changeable thing (variable). )
let prev_row = ref 0 in
( The ! below fetches the variable’s value. *)
while (!prev_row < row) &&
(compatible.(latest).(board.(!prev_row))) do
incr prev_row
done;
if !prev_row = row then
( board.(row) <- latest ; add_a_row (row + 1) )
done
;;
That version is on Peter H.'s website, perhaps he’ll replace it
with the newer version on Friday.
Curiously the two Java implementations on Peter’s site perform the same
on
my machine in x86 and they are >32x slower than OCaml. I know Java is
slow
but that is ridiculous…
On Fri, 04 Aug 2006 01:53:41 +0900, Peter H. wrote:
have will be enough so I am looking at removing the rotational and
mirrored versions of the grids from the output.
Why would you want to store all this data? If you want to store the 7x7
version, you will need 3187.2 TB to store all the squares. To store
only the reduced squares you still need 920 MB. To store the reduced
squares for 8x8 you will need 35.6TB. (I used the wikipedia article
on Latin square - Wikipedia to compute these numbers).
My program can easily show only the reduced squares by removing the
permutations from the solutions method, and it will be a lot faster too.
If you want to do this for large squares it would be better to rewrite
my
algorithm in C. But I am interested how this data could ever be useful.
Eliminated a “not” in a loop by replacing the array of booleans
that shows incompatibility of two rows with an array that shows
compatibility.
Heh, should’ve thought of that.
Borrowed you idea of creating an output line ahead of time and
modifying it in place.
Timings:
1.11 yours
1.04 mine
I get the opposite order:
0.394s yours
0.390s mine
but your timings are probably more accurate because your computer is so
slow.
You can make it slightly faster by factoring out the CSE “compatible
(latest)”, giving:
0.382s
That’s actually faster than the C here. Woohoo!
Using bitvectors may also improve things, depending if skipping a bunch
of
comparisons in that loop is significant. However, I think it is time we
searched for a new task to optimise. Ray tracer anyone?
(* permutation code by Eric C. Cooper *)
let rec distribute elt = function
(hd :: tl) as list -> (elt :: list) ::
(List.map (fun x -> hd :: x) (distribute elt tl))
| [] -> [ [elt] ]
let rec permute = function
x :: rest -> List.flatten (List.map (distribute x) (permute rest))
| [] -> [ [] ]
let list = [ 1; 2; 3; 4; 5 ]
let size = List.length list
let perms = Array.of_list (permute list)
let n = Array.length perms
(* Boolean array used to determine if one row is
compatible with another. *)
let compatible = Array.make_matrix n n true ;;
Array.iteri (fun x ex ->
Array.iteri (fun y ey ->
compatible.(x).(y) <- List.for_all2 (<>) ex ey) perms ) perms
let join list = String.concat “” (List.map string_of_int list)
let output_strings = Array.map join perms
(* For speed, create a string that’s the length of the lines
that we’ll print; the :'s that aren’t needed as separators
will later be overwritten. )
let output_line = String.make (size(size+1)-1) ‘:’ ^ “\n”
let board = Array.make size 0
(* A recursive function. )
let rec add_a_row row =
if row = size then
( for i=0 to size-1 do
String.blit
output_strings.(board.(i)) 0 ( source )
output_line (i(size+1)) (* dest )
size
done;
print_string output_line
)
else
for latest = 0 to n - 1 do
let compatible_slice = compatible.(latest) in
( Create a changeable thing (variable). )
let prev_row = ref 0 in
( The ! below fetches the variable’s value. *)
while !prev_row < row &&
compatible_slice.(board.(!prev_row)) do
incr prev_row
done;
if !prev_row = row then
( board.(row) <- latest ; add_a_row (row + 1) )
done
;;
Peter H.'s measurements were displayed on his website before you
asked for the Java source code - they show the current Java program
taking ~2.15x the time of the current OCaml program, on his machine.
That version is on Peter H.'s website, perhaps he’ll replace it
with the newer version on Friday.
Curiously the two Java implementations on Peter’s site perform the same on
my machine in x86 and they are >32x slower than OCaml. I know Java is slow
but that is ridiculous…