/* 

  Matching distribution in Picat.

  https://ora.ox.ac.uk/objects/uuid:478fa6d8-bc7f-458d-92f5-7e5d63d9824a/files/mf8a85f79596ca2150078192a755dc020
  """
  The matching distribution
  Tom Fanshawe introduces the probability distribution that forms the basis of 
  elementary card games and tests of extra-sensory perception.

  
  At its most basic, the matching distribution relates to the following scenario. 
  A set of cards numbered 1, 2,…, N is shuffled and dealt in a random order: 
  how many cards occupy their correct position in the sequence (for example, 
  card number 4 being dealt in fourth position)? It arises in other nominal "real
  life" situations when matching up two sets of items: for example, diners in a 
  restaurant with their dinners, hats with hat-wearers, or letters with their 
  intended recipients. 
  ...
  
  Like the Binomial distribution, the matching distribution can take whole number 
  (integer) values between zero and N. However, the lack of independence in the way 
  it is constructed (each card must be used exactly once) distinguishes it from the 
  Binomial and gives rise to some special properties. In its simplest form, with 
  no cards repeated, the probability of observing r correct matches out of N is:

    p(r) = 1/r! * sum(t=0..N-r, (-1)^t/t!

  ... 
  It is impossible to obtain exactly N−1 correct matches. 

  ...

  As N increases, the matching distribution very quickly converges to a special 
  case of another distribution whose mean equals its variance: 
  the Poisson distribution with mean 1.

  ...

  Thus, in an elementary matching task being performed by a human participant, 
  we might wish to see at least four matches, irrespective of N, in order to 
  be persuaded that the participant might not be matching the items completely at
  random. In our earlier example using Zener cards, provided the subject is 
  aware of the composition of the pack, the expected number of matches is 
  five and we would probably need to see at least 10 correct matches before 
  even considering any claims of ESP.5

  """

  Also, see
  Siegrist "Probability Mathematical Statistics and Stochastic Processes",
  section "12.5: The Matching Problem"
  https://stats.libretexts.org/Bookshelves/Probability_Theory/Probability_Mathematical_Statistics_and_Stochastic_Processes_(Siegrist)/12:_Finite_Sampling_Models/12.05:_The_Matching_Problem
  
  See some different runs/models below. 

  Note that for the ESP deck, the distribution is NOT according 
  to this matching distribution: it's 5 sets of 5 different cards so 
  correct guessing is easier.See model4 below for more.

  Cf my Gamble model gamble_matching_distribution.rkt

  This program was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import ppl_distributions, ppl_utils.
import util.

main => go.

/*

  N = 25
  PDF:
  matching_dist_pdf(25, 0): 0.367879441171442
  matching_dist_pdf(25, 1): 0.367879441171442
  matching_dist_pdf(25, 2): 0.183939720585721
  matching_dist_pdf(25, 3): 0.061313240195240
  matching_dist_pdf(25, 4): 0.015328310048810
  matching_dist_pdf(25, 5): 0.003065662009762
  matching_dist_pdf(25, 6): 0.000510943668294
  matching_dist_pdf(25, 7): 0.000072991952613
  matching_dist_pdf(25, 8): 0.000009123994077
  matching_dist_pdf(25, 9): 0.000001013777120
  matching_dist_pdf(25,10): 0.000000101377712
  matching_dist_pdf(25,11): 0.000000009216156
  matching_dist_pdf(25,12): 0.000000000768013
  matching_dist_pdf(25,13): 0.000000000059078
  matching_dist_pdf(25,14): 0.000000000004220
  matching_dist_pdf(25,15): 0.000000000000281
  matching_dist_pdf(25,16): 0.000000000000018
  matching_dist_pdf(25,17): 0.000000000000001
  matching_dist_pdf(25,18): 0.000000000000000
  matching_dist_pdf(25,19): 0.000000000000000
  matching_dist_pdf(25,20): 0.000000000000000
  matching_dist_pdf(25,21): 0.000000000000000
  matching_dist_pdf(25,22): 0.000000000000000
  matching_dist_pdf(25,23): 0.000000000000000
  matching_dist_pdf(25,24): 0.000000000000000
  matching_dist_pdf(25,25): 0.000000000000000

  CDF:
  matching_dist_cdf(25, 0): 0.367879441171442
  matching_dist_cdf(25, 1): 0.735758882342885
  matching_dist_cdf(25, 2): 0.919698602928606
  matching_dist_cdf(25, 3): 0.981011843123846
  matching_dist_cdf(25, 4): 0.996340153172657
  matching_dist_cdf(25, 5): 0.999405815182419
  matching_dist_cdf(25, 6): 0.999916758850712
  matching_dist_cdf(25, 7): 0.999989750803326
  matching_dist_cdf(25, 8): 0.999998874797402
  matching_dist_cdf(25, 9): 0.999999888574522
  matching_dist_cdf(25,10): 0.999999989952234
  matching_dist_cdf(25,11): 0.999999999168389
  matching_dist_cdf(25,12): 0.999999999936402
  matching_dist_cdf(25,13): 0.999999999995480
  matching_dist_cdf(25,14): 0.999999999999700
  matching_dist_cdf(25,15): 0.999999999999982
  matching_dist_cdf(25,16): 0.999999999999999
  matching_dist_cdf(25,17): 1.000000000000000
  matching_dist_cdf(25,18): 1.000000000000000
  matching_dist_cdf(25,19): 1.000000000000000
  matching_dist_cdf(25,20): 1.000000000000000
  matching_dist_cdf(25,21): 1.000000000000000
  matching_dist_cdf(25,22): 1.000000000000000
  matching_dist_cdf(25,23): 1.000000000000000
  matching_dist_cdf(25,24): 1.000000000000000
  matching_dist_cdf(25,25): 1.000000000000000

  Quantile:
  matching_dist_quantile(25,0.0001000000): 0.000000000000000
  matching_dist_quantile(25,0.0010000000): 0.000000000000000
  matching_dist_quantile(25,0.0100000000): 0.000000000000000
  matching_dist_quantile(25,0.0500000000): 0.000000000000000
  matching_dist_quantile(25,0.2500000000): 0.000000000000000
  matching_dist_quantile(25,0.5000000000): 1.000000000000000
  matching_dist_quantile(25,0.7500000000): 2.000000000000000
  matching_dist_quantile(25,0.9000000000): 2.000000000000000
  matching_dist_quantile(25,0.9500000000): 3.000000000000000
  matching_dist_quantile(25,0.9900000000): 4.000000000000000
  matching_dist_quantile(25,0.9990000000): 5.000000000000000
  matching_dist_quantile(25,0.9999000000): 6.000000000000000
  matching_dist_quantile(25,0.9999900000): 8.000000000000000
  matching_dist_quantile(25,0.9999990000): 9.000000000000000
  matching_dist_quantile(25,0.9999999000): 10.000000000000000
  matching_dist_quantile(25,0.9999999900): 11.000000000000000

  Mean: = 1

  Variance: = 1


*/
go ?=>
  N = 25,
  println('N'=N),
  println("PDF:"),
  foreach(I in 0..N)
    printf("matching_dist_pdf(%2d,%2d): %.15f\n",N,I,matching_dist_pdf(N,I))
  end,
  println("\nCDF:"),
  foreach(I in 0..N)
    printf("matching_dist_cdf(%2d,%2d): %.15f\n",N,I,matching_dist_cdf(N,I))
  end,
  println("\nQuantile:"),
  Qs = [0.0001,0.001,0.01,0.05,0.25,0.5,0.75,0.9,0.95,0.99,0.999,0.9999,0.99999,0.999999,0.9999999,0.99999999],
  foreach(Q in Qs)
    printf("matching_dist_quantile(%d,%.10f): %.15f\n",N,Q,matching_dist_quantile(N,Q))
  end,
  println("\nMean:"=matching_dist_mean(N)),
  println("\nVariance:"=matching_dist_variance(N)),  
  nl.
go => true.


/*
  A simple simulation for n=25 and comparing with poisson 1

  var : d
  Probabilities:
  0: 0.3677000000000000
  1: 0.3635000000000000
  2: 0.1907000000000000
  3: 0.0594000000000000
  4: 0.0143000000000000
  5: 0.0033000000000000
  6: 0.0010000000000000
  7: 0.0001000000000000
  mean = 1.0035

  var : p
  Probabilities:
  false: 1.0000000000000000
  mean = [false = 1.0]

  var : pois
  Probabilities:
  0: 0.3732000000000000
  1: 0.3615000000000000
  2: 0.1858000000000000
  3: 0.0636000000000000
  4: 0.0128000000000000
  5: 0.0029000000000000
  6: 0.0002000000000000
  mean = 0.9908


  Note the connection between the matching dist and poisson_dist(1):
  
  Picat> X=[poisson_dist_pdf(1,V)-matching_dist_pdf(100,V) : V in 0..20]
  X = [0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,
       -0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0]

*/
go2 =>
  reset_store,
  run_model(10_000,$model2,[show_probs_trunc,mean]),
  nl,
  % show_store_lengths,
  % fail,
  nl.
go2 => true.
  
model2() =>
  N = 25,
  D = matching_dist(N),
  Pois = poisson_dist(1),
  P = check(D >= 10),
  add("d",D),
  add("pois",Pois),
  add("p",P).


/*
  Simulation of the matching task: 
  How many guesses are correct?

  Here we see that the convergence to poisson 1 is quite fast.

  n = 1
  var : num_correct
  Probabilities:
  1: 1.0000000000000000
  mean = 1.0

  var : p
  Probabilities:
  true: 1.0000000000000000
  mean = [true = 1.0]


  n = 2
  var : num_correct
  Probabilities:
  2: 0.5031000000000000
  0: 0.4969000000000000
  mean = 1.0062

  var : p
  Probabilities:
  true: 0.5031000000000000
  false: 0.4969000000000000
  mean = [true = 0.5031,false = 0.4969]


  n = 3
  var : num_correct
  Probabilities:
  1: 0.4981000000000000
  0: 0.3285000000000000
  3: 0.1734000000000000
  mean = 1.0183

  var : p
  Probabilities:
  true: 0.6715000000000000
  false: 0.3285000000000000
  mean = [true = 0.6715,false = 0.3285]


  n = 4
  var : num_correct
  Probabilities:
  0: 0.3710000000000000
  1: 0.3364000000000000
  2: 0.2493000000000000
  4: 0.0433000000000000
  mean = 1.0082

  var : p
  Probabilities:
  true: 0.6290000000000000
  false: 0.3710000000000000
  mean = [true = 0.629,false = 0.371]


  n = 5
  var : num_correct
  Probabilities:
  1: 0.3867000000000000
  0: 0.3672000000000000
  2: 0.1605000000000000
  3: 0.0784000000000000
  5: 0.0072000000000000
  mean = 0.9789

  var : p
  Probabilities:
  true: 0.6328000000000000
  false: 0.3672000000000000
  mean = [true = 0.6328,false = 0.3672]


  n = 6
  var : num_correct
  Probabilities:
  0: 0.3710000000000000
  1: 0.3604000000000000
  2: 0.1868000000000000
  3: 0.0594000000000000
  4: 0.0207000000000000
  6: 0.0017000000000000
  mean = 1.0052

  var : p
  Probabilities:
  true: 0.6290000000000000
  false: 0.3710000000000000
  mean = [true = 0.629,false = 0.371]


  n = 7
  var : num_correct
  Probabilities:
  1: 0.3785000000000000
  0: 0.3621000000000000
  2: 0.1853000000000000
  3: 0.0589000000000000
  4: 0.0115000000000000
  5: 0.0034000000000000
  7: 0.0003000000000000
  mean = 0.9909

  var : p
  Probabilities:
  true: 0.6379000000000000
  false: 0.3621000000000000
  mean = [true = 0.6379,false = 0.3621]


  n = 8
  var : num_correct
  Probabilities:
  1: 0.3734000000000000
  0: 0.3667000000000000
  2: 0.1815000000000000
  3: 0.0608000000000000
  4: 0.0138000000000000
  5: 0.0031000000000000
  6: 0.0007000000000000
  mean = 0.9937

  var : p
  Probabilities:
  true: 0.6333000000000000
  false: 0.3667000000000000
  mean = [true = 0.6333,false = 0.3667]


  n = 9
  var : num_correct
  Probabilities:
  0: 0.3709000000000000
  1: 0.3675000000000000
  2: 0.1787000000000000
  3: 0.0655000000000000
  4: 0.0136000000000000
  5: 0.0035000000000000
  6: 0.0003000000000000
  mean = 0.9951

  var : p
  Probabilities:
  true: 0.6291000000000000
  false: 0.3709000000000000
  mean = [true = 0.6291,false = 0.3709]


  n = 10
  var : num_correct
  Probabilities:
  1: 0.3709000000000000
  0: 0.3650000000000000
  2: 0.1843000000000000
  3: 0.0600000000000000
  4: 0.0166000000000000
  5: 0.0026000000000000
  6: 0.0005000000000000
  7: 0.0001000000000000
  mean = 1.0026

  var : p
  Probabilities:
  true: 0.6350000000000000
  false: 0.3650000000000000
  mean = [true = 0.635,false = 0.365]


  n = 25
  var : num_correct
  Probabilities:
  0: 0.3645000000000000
  1: 0.3639000000000000
  2: 0.1899000000000000
  3: 0.0621000000000000
  4: 0.0164000000000000
  5: 0.0026000000000000
  6: 0.0005000000000000
  8: 0.0001000000000000
  mean = 1.0124

  var : p
  Probabilities:
  true: 0.6355000000000000
  false: 0.3645000000000000
  mean = [true = 0.6355,false = 0.3645]


  n = 100
  var : num_correct
  Probabilities:
  1: 0.3757000000000000
  0: 0.3709000000000000
  2: 0.1735000000000000
  3: 0.0621000000000000
  4: 0.0142000000000000
  5: 0.0027000000000000
  6: 0.0008000000000000
  7: 0.0001000000000000
  mean = 0.9848

  var : p
  Probabilities:
  true: 0.6291000000000000
  false: 0.3709000000000000
  mean = [true = 0.6291,false = 0.3709]

*/
go3 =>
  member(N,1..10++[25,100]),
  println(n=N),
  reset_store,
  run_model(10_000,$model3(N),[show_probs_trunc,mean]),
  nl,
  % show_store_lengths,
  fail,
  nl.
go3 => true.


model3(N) =>
  True = 1..N,
  % Guess = shuffle(1..N),
  Guess = draw_without_replacement(N,1..N),
  NumCorrect = [1 : I in 1..N, True[I] == Guess[I]].sum,
  P = check(NumCorrect > 0),
  add("num_correct",NumCorrect),
  add("p",P).


/*
  ESP cards

  The ESP deck contains of 5 sets of 5 symbols, so it's more likely to get a match
  by pure guessing. 
  This is an example of the matching problem that is NOT supported by the 
  matching distribution.

  As the simulation shows the expected number of correct guesses
  in this variant is 5, not 1 as in the "pure" variant of 1..25 
  different cards.

  Note: To show that ESP is probable, it would probably requires about 15 or more
  correct guesses. Using the 0.99 quantile, i.e. 10 correct guesses, will 
  not be enough. One should be looking at something like the 0.9999999 quantile
  instead.
  Also, 0 correct guesses is not enough either, since the probability 
  is about 0.0046 for this.

esp_cards = [1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5]
var : num_correct
Probabilities:
5: 0.1942000000000000
4: 0.1792400000000000
6: 0.1618800000000000
3: 0.1367200000000000
7: 0.1111000000000000
2: 0.0726400000000000
8: 0.0640200000000000
9: 0.0316400000000000
1: 0.0253000000000000
10: 0.0122800000000000
11: 0.0046000000000000
0: 0.0044000000000000
12: 0.0015600000000000
13: 0.0003400000000000
15: 0.0000400000000000
14: 0.0000400000000000
mean = 5.0123
Percentiles:
(0.0001 0)
(0.001 0)
(0.01 1)
(0.05 2)
(0.25 4)
(0.5 5)
(0.75 6)
(0.9 8)
(0.95 9)
(0.99 10)
(0.999 12)
(0.9999 13)
(0.99999 15)
(0.999999 15)
(0.9999999 15)
(0.999999999 15)
Histogram:
      0:   220 #                                                            (0.004 / 0.004)
      1:  1265 ########                                                     (0.025 / 0.030)
      2:  3632 ######################                                       (0.073 / 0.102)
      3:  6836 ##########################################                   (0.137 / 0.239)
      4:  8962 #######################################################      (0.179 / 0.418)
      5:  9710 ############################################################ (0.194 / 0.613)
      6:  8094 ##################################################           (0.162 / 0.774)
      7:  5555 ##################################                           (0.111 / 0.885)
      8:  3201 ####################                                         (0.064 / 0.950)
      9:  1582 ##########                                                   (0.032 / 0.981)
     10:   614 ####                                                         (0.012 / 0.993)
     11:   230 #                                                            (0.005 / 0.998)
     12:    78                                                              (0.002 / 1.000)
     13:    17                                                              (0.000 / 1.000)
     14:     2                                                              (0.000 / 1.000)
     15:     2                                                              (0.000 / 1.000)


*/
go4 ?=>
  ESPCards = flatten(rep(5,1..5)),
  println(esp_cards=ESPCards),
  reset_store,
  Percentiles = [0.0001,0.001,0.01,0.05,0.25,0.5,0.75,0.9,0.95,0.99,0.999,0.9999,0.99999,0.999999,0.9999999,0.999999999],
  run_model(50_000,$model4(ESPCards),[show_probs,mean,
                                      show_percentiles,percentiles=Percentiles,
                                      show_histogram]),
  nl.
go4 => true.

model4(ESPCards) =>
  N = ESPCards.len,
  True = draw_without_replacement(N,ESPCards),
  Guess = draw_without_replacement(N,ESPCards),
  NumCorrect = [1 : I in 1..N, True[I] == Guess[I]].sum,
  add("num_correct",NumCorrect).