/* 

  Difference dice in Picat.

  From https://www.reddit.com/r/Probability/comments/1e0sb4j/if_i_roll_20_dice_and_i_want_the_probability_of/
  """
  If I roll 20 dice and I want the probability of all values being different to be greater 
  than 99%, what is the lowest number of sides the dice should have? 

  [From a comment by icuepawns: 
  
   Let the desired number of sides be n. There are n^20 possible outcomes of your 
   20 rolls. The number of outcomes of where all values are different is 
     (n P 20)=n!/(n-20)!

   So we need to find the smallest n such that (n P 20)/(n^20) ≥ 0.99.

   I can't think of a way to quickly compute this by hand, but it's rather 
   easy to write some code to simulate it. The answer I got was 18,912. 

  ]
  """

  This is a hard problem, and I'm not sure this model is correct,
  or - better - what conclusions to draw. So it's perhaps a TODO...


  This program was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import ppl_distributions, ppl_utils.
import util.

main => go.

/*

  * Full problem (20 rolls), with MaN is 10_000, and min_accepted_samples=100.
    It took 1min56s.
    
    ...
    Num accepted samples: 95  Total samples: 832  (0.114%)
    Num accepted samples: 96  Total samples: 858  (0.112%)
    Num accepted samples: 97  Total samples: 861  (0.113%)
    Num accepted samples: 98  Total samples: 871  (0.113%)
    Num accepted samples: 99  Total samples: 874  (0.113%)
    Num accepted samples: 100  Total samples: 881  (0.114%)

    var : d
    Probabilities (truncated):
    9448: 0.0200000000000000
    9908: 0.0100000000000000
    9901: 0.0100000000000000
    9871: 0.0100000000000000
    .........
    4442: 0.0100000000000000
    4352: 0.0100000000000000
    2741: 0.0100000000000000
    2740: 0.0100000000000000
    mean = 7461.37
    min = 2740

    var : rolls pct different
    Probabilities:
    0.99: 1.0000000000000000
    mean = 0.99
    min = 0.99

  * Another run of 20 dice runs but reducing the max number to 5000

    Num accepted samples: 96  Total samples: 4179  (0.023%)
    3952 = 0.99
    Num accepted samples: 97  Total samples: 4359  (0.022%)
    4942 = 0.99
    Num accepted samples: 98  Total samples: 4414  (0.022%)
    2933 = 0.99
    Num accepted samples: 99  Total samples: 4569  (0.022%)
    4491 = 0.99
    Num accepted samples: 100  Total samples: 4582  (0.022%)
    3335 = 0.99
    var : d
    Probabilities (truncated):
    4935: 0.0200000000000000
    4783: 0.0200000000000000
    4695: 0.0200000000000000
    4634: 0.0200000000000000
    .........
    3116: 0.0100000000000000
    2933: 0.0100000000000000
    2708: 0.0100000000000000
    2291: 0.0100000000000000
    mean = 4212.59
    min = 2291
    Percentiles:
    (0.001 2332.2829999999999)
    (0.01 2703.8299999999999)
    (0.025 3019.9250000000002)
    (0.05 3176.5500000000002)
    (0.1 3427.5999999999999)
    (0.25 3745.25)
    (0.5 4393.5)
    (0.75 4695)
    (0.84 4783)
    (0.9 4822.6999999999998)
    (0.95 4909.3500000000004)
    (0.975 4938.6750000000002)
    (0.99 4974.0900000000001)
    (0.999 4982.1090000000004)
    (0.9999 4982.9108999999999)
    (0.99999 4982.9910899999995)

    var : rolls pct different
    Probabilities:
    0.99: 1.0000000000000000

  % Using a range of 20..100_000 and >= 99% (instead of = 99%).

    Num accepted samples: 98  Total samples: 127  (0.772%)
    25441 = 0.99
    Num accepted samples: 99  Total samples: 128  (0.773%)
    16627 = 1.0
    Num accepted samples: 100  Total samples: 129  (0.775%)
    70633 = 1.0
    var : d
    Probabilities (truncated):
    99573: 0.0100000000000000
    98127: 0.0100000000000000
    97916: 0.0100000000000000
    96401: 0.0100000000000000
    .........
    12732: 0.0100000000000000
    12565: 0.0100000000000000
    10611: 0.0100000000000000
    4138: 0.0100000000000000
    mean = 55149.8
    min = 4138
    Percentiles:
    (0.001 4778.8270000000002)
    (0.01 10546.27)
    (0.025 12644.325000000001)
    (0.05 13680.25)
    (0.1 16414.5)
    (0.25 31421.75)
    (0.5 58694.5)
    (0.75 78058)
    (0.84 85141.399999999994)
    (0.9 90103.400000000009)
    (0.95 94139.25)
    (0.975 97196.374999999985)
    (0.99 98141.460000000006)
    (0.999 99429.84599999999)
    (0.9999 99558.684599999993)
    (0.99999 99571.568459999995)

    var : rolls pct different
    Probabilities:
    1.0: 0.7200000000000000
    0.99: 0.2800000000000000
    mean = 0.9972


*/
go ?=>
  reset_store,
  run_model(10_000,$model,[show_probs_trunc,mean,min,show_percentiles,
                           min_accepted_samples=100,show_accepted_samples=true
                           ]),
  nl,
  show_store_lengths,
  % fail,
  nl.
go => true.
  
% This quite slow, but we need at least 100 runs
% to get at least 99%.
roll_dice(D,NumRolls) = Res =>
  N = 100, % Number of runs
  OK = 0,
  Missing = 0, % Speeding up the loop by early fail
  Loop = true,
  foreach(_ in 1..N, break(Loop==false))
    Rolls = dice_n(D,NumRolls),
    if Rolls.remove_dups.len == Rolls.len then
      OK := OK + 1
    else
      Missing := Missing + 1,
      % Fail early.
      % Here we've seen 3 misses, so we can get 99%.
      if Missing > 2 then
        Loop := false
      end
    end
  end,
  Res = OK / N.


model() =>
  NumRolls = 20, % Original problem
  % NumRolls = 10, % Simpler version
  
  % Number of sides i.e. d_<D>
  % D = random_integer1(20000),
  D = discrete_uniform_dist(20,100_000),
  % D = random_integer1(10000),
  % D = random_integer1(5000),

 
  RollsPct = roll_dice(D,NumRolls),

  % observe(abs(RollsPct-0.99) <= 0.01),
  % "probability of all values being different to be greater than 99"
  % Let's make that greater or equal to 99%.
  observe(RollsPct >=0.99 ),


  if observed_ok then
    println(D=RollsPct),  
    add("d",D),
    add("rolls pct different",RollsPct)
  end.


%
% (n P 20)/(n^20) ≥ 0.99.
% 
go_ptest =>
  T = 3,
  OK = false,
  N = T,
  while(N<=20000,OK == false)
    B = binomialf_float(N,T),
    % B = binomialf(N,T), % Exact but too slow...
    NT = N**T,
    Res = B/NT,
    if N mod 1000 == 0 then
      println(N=B=NT=Res)
    end,
    if Res >= 0.99 then
      OK := N
    end,
    N := N + 1
  end,
  println(found=OK),
  nl.