%
% Dividing into roughly equal sized groups in in MiniZinc.
%
% From or-exchange:
% "dividing into roughly equal sized groups, with a sorted list"
% http://www.or-exchange.com/questions/4398/dividing-into-roughly-equal-sized-groups-with-a-sorted-list
% """
% I have a problem, and it seems like it should be something that someone
% has studied before. I have a sorted list of N elements, and I want to divide
% them into K groups, by choosing K-1 split points between them. There may be
% elements with the same value, and we want to have items with same value in
% the same group. Find K groups as close in size to round(N/K) as possible.
%
% For example, divide these 32 elements in to 4 groups of size 8:
%
% 1 1 1 1 2 2 3 3 3 3 3 3 3 3 4 4 4 4 5 5 5 5 5 6 6 6 6 7 8 9 10 10
%
% One solution would be these 3 break points:
%
% 1 1 1 1 2 2 | 3 3 3 3 3 3 3 3 | 4 4 4 4 5 5 5 5 5 | 6 6 6 6 7 8 9 10 10
%[ 6 14 23 ]
%[ 6 elts 8 elts 9 elts 9 elts ]
%
% 1 1 1 1 2 2 = 6 elements, error = abs(8-6)=2
% 3 3 3 3 3 3 3 3 = 8 elements, error = abs(8-8)=0
% 4 4 4 4 5 5 5 5 5 = 9 elements, error = abs(8-9)=1
% 6 6 6 6 7 8 9 10 10 = 9 elements, error = abs(8-9)=1
%
% total error = 4
%
% Does this look familiar to anyone? I'd like an approximation algorithm if possible.
%
% Thanks, Craig Schmidt
% """
%
% Note: It's not completely clear in the problem description if k should be
% fixed or variable. This model assumes that k is fixed.
%
%
%
% This MiniZinc model was created by Hakan Kjellerstrand, hakank@bonetmail.com
% See also my MiniZinc page: http://www.hakank.org/minizinc/
%
include "globals.mzn";
int: n;
array[1..n] of int: a;
int: k; % k groups
int: gsize = round(int2float(n) / int2float(k)); % group size
% Get the occurrences of each values (for the nval check).
array[min(a)..max(a)] of int: occur = array1d(min(a)..max(a), [ sum([ 1 | j in 1..n where a[j] = i]) | i in min(a)..max(a) ]);
set of int: occur2 = { i | i in min(a)..max(a) where occur[i] > 0 };
% number of distinct values in a
int: nval = card(occur2);
% The valid breaks:
set of int: valid_breaks = {j-1 | j in 2..n where a[j] != a[j-1]};
% Non-valid breaks
set of int: non_valid_breaks = {j-1 | j in 2..n where a[j] = a[j-1]};
% constraint
% trace("occur:" ++ show(occur) ++ "\n" ++
% "occur2: " ++ show(occur2) ++ "\n"
% % ++ "valid_breaks: " ++ show(valid_breaks) ++ "\n"
% % ++ "non_valid_breaks: " ++ show(non_valid_breaks) ++ "\n"
% , 1=1)
% ;
%
% decision variables
%
% Use this with Chuffed:
% array[1..k-1] of var 1..n: x; % where the break points should be.
% Using this is much faster for some solvers (e.g. G12/LazyFD),
% though Chuffed don't like it.
array[1..k-1] of var valid_breaks: x; % where the break points should be.
% Note: We can not assume anything about the upper bounds
% of s and z, since there might be just n elements of the same
% value in a. In that case there can be no solution since the
% description have a hard constraint that elements with the same
% values should be in the same bucket. See below (nval) for a fix
% for this.
% Later note: Using an upper bound like 2*gsize give much better results.
% array[1..k] of var 1..2*gsize: s; % number of elements in each group
array[1..k] of var 1..n: s; % number of elements in each group
% sum of errors
% Using an upper bound like 2*gsize give much better results
% var 0..2*gsize: z = sum(i in 1..k) ( abs(s[i]-gsize) );
var 0..n: z = sum(i in 1..k) ( abs(s[i]-gsize) );
% solve satisfy;
% solve minimize z; % Chuffed
% solve minimize max([abs(s[i]) | i in 1..k ]);
solve :: int_search(x, first_fail, indomain_split, complete) minimize z; % Gecode, JaCoP, etc
% solve :: int_search(x, first_fail, indomain_min, complete) minimize z;
% solve :: int_search(x, smallest, indomain_min, complete) minimize z;
% solve :: seq_search(
% [
% int_search(x, smallest, indomain_split, complete),
% int_search(s, first_fail, indomain_split, complete),
% ])
% minimize z;
%
% sanity check of input
%
constraint
assert(forall(i in 2..n) (a[i-1]<= a[i]), "input array a must be sorted")
;
constraint
trace("nval: " ++ show(nval) ++ "\n", 1=1)
;
%
% Count the number of elements in the groups.
%
constraint
s[1] = x[1]
/\
forall(i in 2..k-1) (
s[i] = x[i]-x[i-1]
)
/\
s[k] = n-x[k-1]
;
%
% all elements with the same values must be in the same bucket.
% Note: This is a hard constraint, which may - as we noted above -
% give some problems if the number of unique values < k.
%
% We can fix that with the requirement that there must be k or more
% different values.
constraint
% count the number of different values in a
if nval >= k then
% forall(j in 2..n) (
% a[j-1] = a[j] -> not(exists(p in 1..k-1) ( x[p] = j-1) )
% % a[j-1] = a[j] -> forall(p in 1..k-1) (x[p] != j-1 )
% )
% /\
forall(j in non_valid_breaks, p in 1..k-1) (
x[p] != j
)
else
trace("There are less distinct values in a than k.\n", 1=1)
/\
true
endif
;
%% some redundant constraints
% constraint
% sum(s) = n
% /\ alldifferent(x)
% /\ increasing(x)
% ;
output [
"gsize: " ++ show(gsize) ++ "\n" ++
"nval: " ++ show(nval) ++ "\n" ++
"z: " ++ show(z) ++ "\n" ++
"x: " ++ show(x) ++ "\n" ++
"s: " ++ show(s) ++ "\n"
]
++
[
"s[" ++ show(i) ++ "]:" ++ show(s[i]) ++ " (diff:" ++ show(fix(s[i]) - gsize) ++ ")\n"
| i in 1..k
]
++
[
if j = 1 then show(a[i]) ++ " " else "" endif ++
if i = fix(x[j]) then "\n" else "" endif
| i in 1..n, j in 1..k-1
]
++ ["\n"]
;
%
% data
%
% optimal z (for k=4): 4; % This is a unique solution.
% n = 32;
% a = [1,1,1,1,2,2,3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5,5,6,6,6,6,7,8,9,10,10];
% k = 4;
% constraint z = 4;
% Here we have 2 different values but 3 buckets so there cannot be
% all same values in the buckets. This can be fixed if we activate
% the nval constraints.
% optimal z (for k = 3): 1
% There are 3 solutions for k=3 and z = 1.
% n = 10;
% a = [1,1,1,1,1,1,1,2,2,2];
% k = 3;
% constraint z = 1;
%
% Some random problems
%
% perl -le 'my @a = (); for(1..100) { push @a, 1+int rand(100); } print join ",",sort {$a <=> $b} @a'
% k -x '{x@