Merge sort with erlang, elixir, python

Source code examples on Github Wikipedia article about merge sort algorithm.

Erlang implementation

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env escript
%% -*- erlang -*-
%%! -smp enable -sname p9
% vim:syn=erlang

-mode(compile).

main(_) ->
    {A,B,C} = erlang:timestamp(),
    random:seed(A,B,C),
    L = [random:uniform(10000)||_<-lists:seq(1,100000)],
    m(L),
    io:format("Done").

%--------------- merge sort implementation --------------------------

m([L]) -> [L]; 
m(L)   ->
    {L1,L2} = lists:split(length(L) div 2, L),
    merge(m(L1), m(L2)).

merge(L1, L2)    -> merge(L1, L2, []).
merge([], L2, A) -> A++L2;
merge(L1, [], A) -> A++L1;
merge([H1|T1], [H2|T2], A) when H2>=H1 -> merge(T1, [H2|T2], A++[H1]);
merge([H1|T1], [H2|T2], A) when H1>H2  -> merge([H1|T1], T2, A++[H2]).

erlang has efficient built-in merge function for sorted lists and we can rewrite implementation:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/env escript
%% -*- erlang -*-
%%! -smp enable -sname p9
% vim:syn=erlang

-mode(compile).

main(_) ->
    {A,B,C} = erlang:timestamp(),
    random:seed(A,B,C),
    L = [random:uniform(10000)||_<-lists:seq(1,5000000)],
    M = m(L),
    io:format("Done ~p", [M]).

%--------------- merge sort implementation --------------------------

m([L]) -> [L]; 
m(L)   ->
    {L1,L2} = lists:split(length(L) div 2, L),
    lists:merge(m(L1), m(L2)).

almost same implementation with Elixir:

1
2
3
4
5
6
7
8
9
10
defmodule Sort do

    def m_s([l]), do: [l]
    def m_s(list) do
        {l, r} = Enum.split(list, div(length(list), 2))        
        :lists.merge(m_s(l), m_s(r))
    end

end

Merge sort algorith easily parallelizes. This is erlang implementation of merge sort, it using same number of concurrent processes as number of system processors:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env escript
%% -*- erlang -*-
%%! -smp enable -sname p9
% vim:syn=erlang

-mode(compile).

main(_) ->
    {A,B,C} = erlang:timestamp(),
    random:seed(A,B,C),
    L = [random:uniform(10000)||_<-lists:seq(1,5000000)],
    _M = m(L, erlang:system_info(schedulers) ),
    io:format("Done ~p", ["M"]).

%---------------parallel merge sort implementation --------------------------

m([L],_) -> [L]; 
m(L, N) when N > 1  -> 
    {L1,L2} = lists:split(length(L) div 2, L),
    {Parent, Ref} = {self(), make_ref()},
    spawn(fun()-> Parent ! {l1, Ref, m(L1, N-2)} end), 
    spawn(fun()-> Parent ! {l2, Ref, m(L2, N-2)} end), 
    {L1R, L2R} = receive_results(Ref, undefined, undefined),
    lists:merge(L1R, L2R);
m(L, _) -> {L1,L2} = lists:split(length(L) div 2, L), lists:merge(m(L1, 0), m(L2, 0)).

receive_results(Ref, L1, L2) ->
    receive
        {l1, Ref, L1R} when L2 == undefined -> receive_results(Ref, L1R, L2);
        {l2, Ref, L2R} when L1 == undefined -> receive_results(Ref, L1, L2R);
        {l1, Ref, L1R} -> {L1R, L2};
        {l2, Ref, L2R} -> {L1, L2R}
    after 5000 -> receive_results(Ref, L1, L2)
    end.

so, i have virtual machine with 2 cores and this are measurements for regular and parallel merge sorts erlang implementations (sorting 5 million list of integers):

1
2
3
4
5
6
7
8
9
vagrant@mblog:~/pr_euler/algorithms/mergesort$ time ./merge2.erl
real    0m9.124s
user    0m7.708s
sys     0m0.408s
vagrant@mblog:~/pr_euler/algorithms/mergesort$ time ./merge3.erl
real    0m5.245s
user    0m6.560s
sys     0m0.444s
vagrant@mblog:~/pr_euler/algorithms/mergesort$

and Python recursive implementation of merge sort:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/usr/bin/python3

import random
from heapq import merge

def m(l):
    if len(l) <= 1: return l
    middle = len(l) // 2
    left, right = l[:middle], l[middle:]
    left  = m(left)
    right = m(right) 

    return list(merge(left, right))

l = [random.randint(1,10000) for _ in range(1,5000000)]

m(l)
print("Done")

my python recursive implementation very slow :

1
2
3
4
5
6
vagrant@mblog:~/pr_euler/algorithms/mergesort$ time ./merge1.py
Done

real    0m38.014s
user    0m37.480s
sys     0m0.252s