/* 

  Coincidences II in Picat.

  This is a port (or at least inspired) by old simulations in R
  written in 2003 (in Swedish):
  http://www.hakank.org/sims/coincidence_simulating.html
  and the Swedish blog post "Sammanträffanden - anteckningar vid läsning 
  av Diaconis och Mosteller 'Methods for Studying Coincidences'" 
  ("Coincidences - note from a reading of Diaconis and Mosteller 'Methods for Studying Coincidences'")
  http://www.hakank.org/webblogg/archives/000216.html

  (translated from Swedish)
  """
  The study of coincidences is related to cognitive illusions (which is the current interest). 
  We have bad intuition regarding coincidences which the birthday problems shows: 
  It takes about 23 person for it to be a 50% probability that two persons in this group 
  shares the birthday. Surprising? A common intuition is that it require many more people .
  ...
  Here I simulate some of the most interesting sections in Diaconis' and Mosteller's paper
  'Methods for Studying Coincidences', section "7.1 General-Purpose Models: Birthday Problems" (857ff).
  """ 

  Note: This model implements the more fancy variation where people has more than  
  one attribute that they can have in common.

  Here we show an example with 100 people with the following attributes:
  - the birthday (1..365)
  - number of theater performances during a year (1..500)
  - attending school (1..1000)

  How many of these people has at least one common attribute with some other person?

  The theoretical values (number of persons required for 50% and 95% chance)
  according to Diaconics & Mosteller "Methods for Studying Coincidences"

  p.50 <- 1.2*sqrt(1/sum([1/V : V in Vals]))
  p.95 <- 1.2*sqrt(1/sum([1/V : V in Vals]))
 
  p.50
  Picat> 
  X = 15.83928833289556

  p.95
  Picat> X= 2.5*sqrt(1/sum([1/V : V in [365,500,1000]]))
  X = 32.998517360199088

  For the "birthday paradox" the theoretical values are [22.926,47.7624],
  i.e. about 23 persons are needed for a 50% change of at least one common
  birhtday.

  Cf my Gamble model gamble_coincidences2.rkt

  This program was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import ppl_distributions, ppl_utils.
import util.

main => go.


/*
  The theoretical values of the number of persons required for 50% and 95% chance 
  or a coincidence is according to Diaconics & Mosteller "Methods for Studying Coincidences"
  R code:
    p.50 <- 1.2*sqrt(1/sum(1/num.vals))
    p.95 <- 2.5*sqrt(1/sum(1/num.vals))
*/
theoretical(Ns) = [1.2*T,2.5*T]  =>
  T = sqrt(1/sum([1/V : V in Ns])).


/*
  Here we show an example with 2, 16, 23, 33, and 100 people with the following attributes:
  - the birthday (1..365)
  - number of theater performances during a year (1..500)
  - attending school (1..1000)

  For 16 people it's about 50% chance of at least one coincidence, for 33 people 
  about 95% chance, and for 100 people about 100%.
  One can note that for just 2 people there is a chance - albeit small -
  of at least one coincidence: about 0.0065 (i.e. one hit in 153.8 meetings),
  and in a (busy) day or week there's a lot of meetings...


  theoretical = [15.8393,32.9985]

  num_people = 2
  var : c
  Probabilities:
  0: 0.9931000000000000
  1: 0.0069000000000000
  mean = 0.0069
  HPD intervals:
  HPD interval (0.5): 0.00000000000000..0.00000000000000
  HPD interval (0.84): 0.00000000000000..0.00000000000000
  HPD interval (0.95): 0.00000000000000..0.00000000000000
  HPD interval (0.99): 0.00000000000000..0.00000000000000

  var : p
  Probabilities:
  false: 0.9931000000000000
  true: 0.0069000000000000
  mean = [false = 0.9931,true = 0.0069]
  HPD intervals:
  show_hpd_intervals: data is not numeric


  num_people = 16
  var : c
  Probabilities:
  0: 0.5004000000000000
  1: 0.3519000000000000
  2: 0.1159000000000000
  3: 0.0249000000000000
  4: 0.0052000000000000
  5: 0.0011000000000000
  6: 0.0004000000000000
  8: 0.0001000000000000
  7: 0.0001000000000000
  mean = 0.6886
  HPD intervals:
  HPD interval (0.5): 0.00000000000000..0.00000000000000
  HPD interval (0.84): 0.00000000000000..1.00000000000000
  HPD interval (0.95): 0.00000000000000..2.00000000000000
  HPD interval (0.99): 0.00000000000000..3.00000000000000

  var : p
  Probabilities:
  false: 0.5004000000000000
  true: 0.4996000000000000
  mean = [false = 0.5004,true = 0.4996]
  HPD intervals:
  show_hpd_intervals: data is not numeric


  num_people = 23
  var : c
  Probabilities:
  1: 0.3516000000000000
  2: 0.2440000000000000
  0: 0.2289000000000000
  3: 0.1177000000000000
  4: 0.0396000000000000
  5: 0.0136000000000000
  6: 0.0038000000000000
  7: 0.0005000000000000
  9: 0.0002000000000000
  8: 0.0001000000000000
  mean = 1.448
  HPD intervals:
  HPD interval (0.5): 0.00000000000000..1.00000000000000
  HPD interval (0.84): 0.00000000000000..3.00000000000000
  HPD interval (0.95): 0.00000000000000..4.00000000000000
  HPD interval (0.99): 0.00000000000000..5.00000000000000

  var : p
  Probabilities:
  true: 0.7711000000000000
  false: 0.2289000000000000
  mean = [true = 0.7711,false = 0.2289]
  HPD intervals:
  show_hpd_intervals: data is not numeric


  num_people = 33
  var : c
  Probabilities:
  2: 0.2305000000000000
  3: 0.2288000000000000
  4: 0.1663000000000000
  1: 0.1443000000000000
  5: 0.0962000000000000
  6: 0.0522000000000000
  0: 0.0435000000000000
  7: 0.0246000000000000
  8: 0.0083000000000000
  9: 0.0032000000000000
  10: 0.0014000000000000
  11: 0.0006000000000000
  12: 0.0001000000000000
  mean = 3.0403
  HPD intervals:
  HPD interval (0.5): 1.00000000000000..3.00000000000000
  HPD interval (0.84): 1.00000000000000..5.00000000000000
  HPD interval (0.95): 0.00000000000000..6.00000000000000
  HPD interval (0.99): 0.00000000000000..8.00000000000000

  var : p
  Probabilities:
  true: 0.9565000000000000
  false: 0.0435000000000000
  mean = [true = 0.9565,false = 0.0435]
  HPD intervals:
  show_hpd_intervals: data is not numeric

  num_people = 100

  var : c
  Probabilities:
  27: 0.0749000000000000
  29: 0.0743000000000000
  28: 0.0727000000000000
  26: 0.0702000000000000
  25: 0.0685000000000000
  30: 0.0665000000000000
  31: 0.0636000000000000
  24: 0.0593000000000000
  32: 0.0561000000000000
  23: 0.0482000000000000
  33: 0.0446000000000000
  22: 0.0418000000000000
  34: 0.0405000000000000
  35: 0.0307000000000000
  21: 0.0295000000000000
  36: 0.0229000000000000
  37: 0.0222000000000000
  20: 0.0220000000000000
  38: 0.0145000000000000
  19: 0.0141000000000000
  39: 0.0127000000000000
  18: 0.0107000000000000
  40: 0.0075000000000000
  41: 0.0060000000000000
  17: 0.0060000000000000
  42: 0.0048000000000000
  43: 0.0030000000000000
  16: 0.0025000000000000
  15: 0.0020000000000000
  44: 0.0018000000000000
  14: 0.0015000000000000
  46: 0.0012000000000000
  45: 0.0012000000000000
  48: 0.0006000000000000
  13: 0.0004000000000000
  49: 0.0003000000000000
  47: 0.0003000000000000
  12: 0.0002000000000000
  56: 0.0001000000000000
  52: 0.0001000000000000
  mean = 28.4329
  HPD intervals:
  HPD interval (0.5): 22.00000000000000..29.00000000000000
  HPD interval (0.84): 21.00000000000000..35.00000000000000
  HPD interval (0.95): 17.00000000000000..38.00000000000000
  HPD interval (0.99): 16.00000000000000..43.00000000000000

  var : p
  Probabilities:
  true: 1.0000000000000000
  mean = [true = 1.0]
  HPD intervals:
  show_hpd_intervals: data is not numeric

*/
go ?=>
  Vals = [365,500,1000],
  % The "birthday paradox": for 23 people there's about 50% change that
  % there's at least one common birthday
  % Vals = [365], 
  println(theoretical=theoretical(Vals)),  
  member(NumPeople,[2,16,23,33,100]),
  println(num_people=NumPeople),
  reset_store,
  run_model(10_000,$model(Vals,NumPeople),
            [show_probs,mean,show_hpd_intervals,hpd_intervals=[0.5,0.84,0.95,0.99]]),
  nl,
  fail,
  nl.
go => true.
  
model(Vals,NumPeople) =>
  NumAttributes = Vals.len,
  
  X = [ [ random_integer(Vals[K]) : K in 1..NumAttributes] : _ in 1..NumPeople],
  
  % check all pairs (i < j) and count the number of coincidence.
  C = 0,
  foreach(I in 1..NumPeople)
    foreach(J in I+1..NumPeople)
      foreach(K in 1..NumAttributes)
        if X[I,K] == X[J,K] then
          C := C + 1
        end
      end
    end
  end,

  % Did we get at least one coincidence?
  P = cond(C > 0,true,false),

  add("p",P),
  add("c",C).