/* 

  Pascal distribution - number of fair coin flips before n heads in Picat.

  From Mathematica PascalDistribution
  """
  The number of fair coin flips before 3 heads:
  heads3 = PascalDistribution[3, 1/2];
  ...
  Find the probability of getting 3 heads in no more than 6 flips:
  Probability[x <= 6, x \[Distributed] heads3]
  -> 
  21/32
  0.65625

  Find the average number of flips before getting 3 heads:
  Mean[heads3]
  -> 
  6
  """

  * Find the probability of getting 3 heads in no more than 6 flips:

    Picat> X = pascal_dist_cdf(3,1/2,6)
    X = 0.65625

  * Find the average number of flips before getting 3 heads:

    Picat> X = pascal_dist_mean(3,1/2)
    X = 6.0

  Cf my Gamble model gamble_pascal_number_of_fair_coin_flips_before_n_heads.rkt

  This program was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import ppl_distributions, ppl_utils, ppl_common_utils.
import util.
% import ordset.

main => go.

/*

  var : len
  Probabilities:
  5: 0.1961000000000000
  4: 0.1870000000000000
  6: 0.1525000000000000
  3: 0.1222000000000000
  7: 0.1164000000000000
  8: 0.0823000000000000
  9: 0.0504000000000000
  10: 0.0367000000000000
  11: 0.0222000000000000
  12: 0.0137000000000000
  13: 0.0082000000000000
  14: 0.0060000000000000
  15: 0.0026000000000000
  16: 0.0013000000000000
  18: 0.0008000000000000
  17: 0.0007000000000000
  20: 0.0003000000000000
  19: 0.0003000000000000
  23: 0.0002000000000000
  21: 0.0001000000000000
  mean = 6.0076

  var : prob
  Probabilities:
  true: 0.6578000000000001
  false: 0.3422000000000000
  mean = 0.6578


  theoretical_mean = 6.0
  theoretical_prob = 0.65625

  PDF:
   5 0.1875 
   4 0.1875 
   6 0.15625 
   3 0.125 
   7 0.1171875 
   8 0.08203125 
   9 0.0546875 
  10 0.03515625 
  11 0.02197265625 
  12 0.013427734375 
  13 0.008056640625 
  14 0.0047607421875 
  15 0.002777099609375 
  16 0.001602172851562 
  17 0.00091552734375 
  18 0.000518798828125 
  19 0.00029182434082 
  20 0.000163078308105 
  21 0.000090599060059 
  22 0.000050067901611 
  23 0.000027537345886 
  24 0.000015079975128 
  25 0.000008225440979 
  26 0.000004470348358 
  27 0.000002421438694 
  28 0.000001307576895 
  29 0.000000704079866 


*/
go ?=>
  N = 3,
  P = 1/2,  
  Check = 6,
  println([n=N,p=P,check=Check]),
  reset_store,
  run_model(10_000,$model(N,P,Check),[show_probs,mean]),
  nl,
  println(theoretical_mean=pascal_dist_mean(N,P)),
  println(theoretical_prob=pascal_dist_cdf(N,P,Check)),
  println("PDF:"),
  pdf_all($pascal_dist(N,P),0.00001,0.999999).sort_down(2).printf_list,
  nl,
  % show_store_lengths,nl,
  % fail,
  nl.
go => true.
  

f(A,P,N) = Res =>
  if A.sum >= N then
    Res = A
  else
    Res = f(A ++ [bern(P)],P,N)
  end.
  
model(N,P,Check) =>
  A = f([],P,N),
  Len = A.len,

  Prob = check(Len <= Check),
  
  add("len",Len),
  add("prob",Prob).


/*
  From Mathematica PascalDistribution
  """
  A coin was flipped 10 times and the 7th head occurred at the 10th flip. 
  Find the probability of such an event if the coin is fair:

  fairD = PascalDistribution[7, 1/2]
  Probability[x == 10, x in fairD]
  -> 
  21/256 
  0.0820313  
  """

  Picat> X = pascal_dist_pdf(7,1/2,10)
  X = 0.08203125


  var : pos 7
  Probabilities:
  0: 0.8248000000000000
  10: 0.0813000000000000
  9: 0.0580000000000000
  8: 0.0275000000000000
  7: 0.0084000000000000
  mean = 1.6138
  HPD intervals:
  HPD interval (0.84): 0.00000000000000..8.00000000000000
  HPD interval (0.9): 0.00000000000000..9.00000000000000
  HPD interval (0.99): 0.00000000000000..10.00000000000000
  HPD interval (0.99999): 0.00000000000000..10.00000000000000

  var : sum
  Probabilities:
  5: 0.2412000000000000
  6: 0.2057000000000000
  4: 0.2041000000000000
  3: 0.1197000000000000
  7: 0.1174000000000000
  8: 0.0471000000000000
  2: 0.0425000000000000
  1: 0.0106000000000000
  9: 0.0096000000000000
  10: 0.0011000000000000
  0: 0.0010000000000000
  mean = 5.0073
  HPD intervals:
  HPD interval (0.84): 3.00000000000000..7.00000000000000
  HPD interval (0.9): 2.00000000000000..7.00000000000000
  HPD interval (0.99): 1.00000000000000..9.00000000000000
  HPD interval (0.99999): 0.00000000000000..10.00000000000000

  var : prob
  Probabilities:
  false: 0.9187000000000000
  true: 0.0813000000000000
  mean = 0.0813
  HPD intervals:
  []

  theoretical_prob = 0.0820312


*/
go2 ?=>
  reset_store,
  run_model(10_000,$model2,[show_probs,mean,
                            show_hpd_intervals,hpd_intervals=[0.84,0.9,0.99,0.99999]]),
  nl,
  println(theoretical_prob=pascal_dist_pdf(7,1/2,10)),
  % fail,
  nl.
go2 => true.
  
 
model2() =>
  N = 10,
  X = bern_n(1/2,N).asum,

  % Find the first position of 7 (or 0 of not found)
  Pos7 = find_first(X,7,0),
  Prob = check(Pos7 == 10),
  
  add("pos 7",Pos7),
  add("sum",X.last),
  add("prob",Prob).


/*
  Mathematica PascalDistribution

  """
  Assuming the coin may not be fair, find the most likely value for p:

  d = PascalDistribution[7, p];
  FindMaximum[PDF[\[ScriptCapitalD], 10], {p, 0.9}]
  ->
  {0.18678, {p -> 0.7}}
  """

  First, a Picat PPL model, with a slightly lower value: 0.66 instead of 0.7.
  (See go4/0 for an alternative version.)

  var : p
  Probabilities (truncated):
  0.971519739690613: 0.0010000000000000
  0.937737726845083: 0.0010000000000000
  0.936471291008889: 0.0010000000000000
  0.935536940574735: 0.0010000000000000
  .........
  0.276420055514606: 0.0010000000000000
  0.274322774361096: 0.0010000000000000
  0.267722446203532: 0.0010000000000000
  0.265853202835762: 0.0010000000000000
  mean = 0.664119
  HPD intervals:
  HPD interval (0.84): 0.48386806331270..0.84779861111626
  HPD interval (0.9): 0.46603181519550..0.88631175834736
  HPD interval (0.99): 0.31052290157043..0.93773772684508
  HPD interval (0.99999): 0.26585320283576..0.97151973969061

  var : post
  Probabilities (truncated):
  12: 0.1230000000000000
  13: 0.1120000000000000
  15: 0.1060000000000000
  11: 0.1060000000000000
  .........
  35: 0.0010000000000000
  33: 0.0010000000000000
  32: 0.0010000000000000
  30: 0.0010000000000000
  mean = 15.896
  HPD intervals:
  HPD interval (0.84): 10.00000000000000..20.00000000000000
  HPD interval (0.9): 10.00000000000000..23.00000000000000
  HPD interval (0.99): 10.00000000000000..38.00000000000000
  HPD interval (0.99999): 10.00000000000000..57.00000000000000


*/
go3 ?=>
  reset_store,
  run_model(10_000,$model3,[show_probs_trunc,mean,
                            show_hpd_intervals,hpd_intervals=[0.84,0.9,0.99,0.99999],
                            min_accepted_samples=1000 % ,show_accepted_samples=true
                            ]),
  nl,
  nl.
go3 => true.
  
 
model3() =>
  N = 10,
  T = 7,

  % P is the unknown and we try to restore it
  P = beta_dist(1,1),

  X = bern_n(P,N).asum,
  Pos7 = find_first(X,T,0),

  observe(Pos7 == 10),

  Post = pascal_dist(N,P),

  if observed_ok then
    add("p",P),
    add("post",Post)
  end.

/*

  Same problem as in model3 but we use pascal_pdf and argmax to get the optimal p.

  Optimal value of p.
  argmax: 0.700 p: 0.186775110949148

  Checking P: 0.65 .. 0.75
  0.65: 0.1765537374808594
  0.66: 0.1801040944032162
  0.67: 0.1829551860868139
  0.68: 0.1850510739859990
  0.69: 0.1863408201882183
  0.70: 0.1867795524000000
  0.71: 0.1863295438561184
  0.72: 0.1849612940029093
  0.73: 0.1826545938431644
  0.74: 0.1793995577460826

*/
go4 =>
  println("Optimal value of p."),
  N = 1000,
  Ps = [pascal_dist_pdf(7,P/N,10) : P in 0..N],
  ArgMax = argmax(Ps).first-1,
  printf("argmax: %.3f p: %.15f\n",ArgMax/N, Ps[ArgMax]),
  nl,
  println("Checking P: 0.65 .. 0.75"),
  foreach(P in 0.65..0.01..0.75)
    printf("%.2f: %.16f\n",P,pascal_dist_pdf(7,P,10))
  end,

  nl.