/* 

  Clan size in Picat.

  https://brainstellar.com/puzzles/probability/1009
  """
  The people in a country are partitioned into clans. In order to estimate 
  the average size of a clan, a survey is conducted where 1000 randomly 
  selected people are asked to state the size of the clan to which they belong. 
  How does one compute an estimate average clan size from the data collected?

  Solution: This is more of a puzzle-to-ponder rather than a puzzle to learn. 
  In my opinion, best estimator for average is sum( n )/ sum( #n/n), where 
  #n is the number of people with clan size as 'n', and this sum is over all 
  the values of 'n' we receive.
  """

  If a poisson distribution is used, it's enough to pick just one card to 
  get a fairly good estimate (at least on average). Below is an experiment
  using 10 people and we look at just one of the cards.


  Cf my Gamble model gamble_clan_size.rkt

  This program was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import ppl_distributions, ppl_utils.
import util.

main => go.

/*
  Using poisson(100):

  [num_people = 100,num_to_pick = 1]
  var : real avg
  Probabilities (truncated):
  101.370000000000005: 0.0054000000000000
  100.879999999999995: 0.0053000000000000
  101.010000000000005: 0.0051000000000000
  100.980000000000004: 0.0051000000000000
  .........
  97.480000000000004: 0.0001000000000000
  97.450000000000003: 0.0001000000000000
  97.329999999999998: 0.0001000000000000
  96.329999999999998: 0.0001000000000000
  mean = 100.997
  HPD intervals:
  HPD interval (0.84): 99.62000000000000..102.44000000000000

  var : est avg
  Probabilities (truncated):
  103.0: 0.0419000000000000
  101.0: 0.0414000000000000
  102.0: 0.0409000000000000
  100.0: 0.0405000000000000
  .........
  133.0: 0.0001000000000000
  68.0: 0.0001000000000000
  66.0: 0.0001000000000000
  65.0: 0.0001000000000000
  mean = 101.036
  HPD intervals:
  HPD interval (0.84): 85.00000000000000..113.00000000000000

  var : est diff
  Probabilities (truncated):
  2.019999999999996: 0.0017000000000000
  0.349999999999994: 0.0017000000000000
  1.890000000000001: 0.0016000000000000
  2.549999999999997: 0.0015000000000000
  .........
  5.840000000000003: 0.0001000000000000
  5.760000000000005: 0.0001000000000000
  3.400000000000006: 0.0001000000000000
  0.319999999999993: 0.0001000000000000
  mean = 7.93934
  HPD intervals:
  HPD interval (0.84): 0.01000000000001..14.06000000000000

  var : formula
  Probabilities (truncated):
  103.0: 0.0419000000000000
  101.0: 0.0414000000000000
  102.0: 0.0409000000000000
  100.0: 0.0405000000000000
  .........
  133.0: 0.0001000000000000
  68.0: 0.0001000000000000
  66.0: 0.0001000000000000
  65.0: 0.0001000000000000
  mean = 101.036
  HPD intervals:
  HPD interval (0.84): 85.00000000000000..113.00000000000000


  [num_people = 100,num_to_pick = 3]
  var : real avg
  Probabilities (truncated):
  101.140000000000001: 0.0060000000000000
  101.25: 0.0051000000000000
  101.359999999999999: 0.0050000000000000
  101.299999999999997: 0.0050000000000000
  .........
  97.370000000000005: 0.0001000000000000
  97.219999999999999: 0.0001000000000000
  97.189999999999998: 0.0001000000000000
  97.140000000000001: 0.0001000000000000
  mean = 100.999
  HPD intervals:
  HPD interval (0.84): 99.62000000000000..102.39000000000000

  var : est avg
  Probabilities (truncated):
  99.0: 0.0247000000000000
  102.0: 0.0239000000000000
  101.666666666666671: 0.0237000000000000
  101.333333333333329: 0.0233000000000000
  .........
  116.0: 0.0001000000000000
  84.0: 0.0001000000000000
  81.666666666666671: 0.0001000000000000
  79.333333333333329: 0.0001000000000000
  mean = 100.96
  HPD intervals:
  HPD interval (0.84): 92.00000000000000..108.00000000000000

  var : est diff
  Probabilities (truncated):
  1.963333333333324: 0.0011000000000000
  1.223333333333329: 0.0011000000000000
  5.556666666666672: 0.0010000000000000
  5.273333333333326: 0.0010000000000000
  .........
  0.24666666666667: 0.0001000000000000
  0.189999999999998: 0.0001000000000000
  0.086666666666673: 0.0001000000000000
  0.0: 0.0001000000000000
  mean = 4.53313
  HPD intervals:
  HPD interval (0.84): 0.00666666666667..7.95333333333333

  var : formula
  Probabilities (truncated):
  99.0: 0.0247000000000000
  102.0: 0.0239000000000000
  101.666666666666671: 0.0237000000000000
  101.333333333333329: 0.0233000000000000
  .........
  116.0: 0.0001000000000000
  84.0: 0.0001000000000000
  81.666666666666671: 0.0001000000000000
  79.333333333333329: 0.0001000000000000
  mean = 100.96
  HPD intervals:
  HPD interval (0.84): 92.00000000000000..108.00000000000000


*/
go ?=>
  member([NumPeople,NumToPick],[[100,1],[100,3]]),
  println([num_people=NumPeople,num_to_pick=NumToPick]),
  reset_store,
  run_model(10_000,$model(NumPeople,NumToPick),[show_probs_trunc,mean,show_hpd_intervals,hpd_intervals=[0.84]]),
  nl,
  % show_store_lengths,nl,
  fail,
  nl.
go => true.
  
  
model(NumPeople,NumToPick) =>

  AllClans = [1+poisson_dist(100) : _ in 1..NumPeople],
  RealAvg = AllClans.avg,

  % The sample: We take num-to-pick samples for the presented clan cards
  TheSample = take(AllClans,NumToPick),
  EstAvg = TheSample.mean,

  EstDiff = abs(EstAvg-RealAvg),

  % Formula in the Solution part
  % Is gives the same result as EstAvg
  Formula = TheSample.sum / sum(collect(TheSample).values),
  
  add("real avg",RealAvg),
  add("est avg",EstAvg),
  add("est diff",EstDiff),
  add("formula",Formula).    


/*
  Using random_integer(200) (same mean ass poisson(100))

  [num_people = 100,num_to_pick = 1]
  var : real avg
  Probabilities (truncated):
  104.799999999999997: 0.0014000000000000
  103.049999999999997: 0.0014000000000000
  102.859999999999999: 0.0014000000000000
  102.439999999999998: 0.0014000000000000
  .........
  80.549999999999997: 0.0001000000000000
  80.290000000000006: 0.0001000000000000
  80.230000000000004: 0.0001000000000000
  77.709999999999994: 0.0001000000000000
  mean = 100.555
  HPD intervals:
  HPD interval (0.84): 92.17000000000000..108.25000000000000

  var : est avg
  Probabilities (truncated):
  66.0: 0.0068000000000000
  46.0: 0.0067000000000000
  167.0: 0.0066000000000000
  75.0: 0.0065000000000000
  .........
  174.0: 0.0035000000000000
  110.0: 0.0034000000000000
  16.0: 0.0033000000000000
  144.0: 0.0030000000000000
  mean = 99.8411
  HPD intervals:
  HPD interval (0.84): 1.00000000000000..169.00000000000000

  var : est diff
  Probabilities (truncated):
  80.590000000000003: 0.0006000000000000
  63.879999999999995: 0.0006000000000000
  58.980000000000004: 0.0006000000000000
  49.450000000000003: 0.0006000000000000
  .........
  0.060000000000002: 0.0001000000000000
  0.049999999999997: 0.0001000000000000
  0.030000000000001: 0.0001000000000000
  0.019999999999996: 0.0001000000000000
  mean = 49.9975
  HPD intervals:
  HPD interval (0.84): 0.53000000000000..83.73999999999999

  var : formula
  Probabilities (truncated):
  66.0: 0.0068000000000000
  46.0: 0.0067000000000000
  167.0: 0.0066000000000000
  75.0: 0.0065000000000000
  .........
  174.0: 0.0035000000000000
  110.0: 0.0034000000000000
  16.0: 0.0033000000000000
  144.0: 0.0030000000000000
  mean = 99.8411
  HPD intervals:
  HPD interval (0.84): 1.00000000000000..169.00000000000000


  [num_people = 100,num_to_pick = 3]
  var : real avg
  Probabilities (truncated):
  97.920000000000002: 0.0016000000000000
  100.230000000000004: 0.0015000000000000
  101.640000000000001: 0.0014000000000000
  101.629999999999995: 0.0014000000000000
  .........
  80.030000000000001: 0.0001000000000000
  79.689999999999998: 0.0001000000000000
  79.310000000000002: 0.0001000000000000
  75.829999999999998: 0.0001000000000000
  mean = 100.359
  HPD intervals:
  HPD interval (0.84): 92.50000000000000..108.69000000000000

  var : est avg
  Probabilities (truncated):
  88.0: 0.0053000000000000
  112.0: 0.0050000000000000
  103.666666666666671: 0.0049000000000000
  93.333333333333329: 0.0049000000000000
  .........
  11.333333333333334: 0.0001000000000000
  8.333333333333334: 0.0001000000000000
  7.666666666666667: 0.0001000000000000
  7.333333333333333: 0.0001000000000000
  mean = 100.524
  HPD intervals:
  HPD interval (0.84): 47.33333333333334..143.66666666666666

  var : est diff
  Probabilities (truncated):
  34.569999999999993: 0.0005000000000000
  24.469999999999999: 0.0005000000000000
  20.693333333333328: 0.0005000000000000
  19.206666666666678: 0.0005000000000000
  .........
  0.019999999999996: 0.0001000000000000
  0.006666666666675: 0.0001000000000000
  0.00333333333333: 0.0001000000000000
  0.0: 0.0001000000000000
  mean = 26.6404
  HPD intervals:
  HPD interval (0.84): 0.00000000000000..47.09999999999999

  var : formula
  Probabilities (truncated):
  88.0: 0.0053000000000000
  112.0: 0.0050000000000000
  103.666666666666671: 0.0049000000000000
  93.333333333333329: 0.0049000000000000
  .........
  11.333333333333334: 0.0001000000000000
  8.333333333333334: 0.0001000000000000
  7.666666666666667: 0.0001000000000000
  7.333333333333333: 0.0001000000000000
  mean = 100.524
  HPD intervals:
  HPD interval (0.84): 47.33333333333334..143.66666666666666

*/
go2 ?=>
  member([NumPeople,NumToPick],[[100,1],[100,3]]),
  println([num_people=NumPeople,num_to_pick=NumToPick]),
  reset_store,
  run_model(10_000,$model2(NumPeople,NumToPick),[show_probs_trunc,mean,show_hpd_intervals,hpd_intervals=[0.84]]),
  nl,
  % show_store_lengths,nl,
  fail,
  nl.
go2 => true.
  
  
model2(NumPeople,NumToPick) =>

  AllClans = [random_integer1(200) : _ in 1..NumPeople],
  RealAvg = AllClans.avg,

  % The sample: We take num-to-pick samples for the presented clan cards
  TheSample = take(AllClans,NumToPick),
  EstAvg = TheSample.mean,

  EstDiff = abs(EstAvg-RealAvg),

  % Formula in the Solution part
  % Is gives the same result as EstAvg
  Formula = TheSample.sum / sum(collect(TheSample).values),
  
  add("real avg",RealAvg),
  add("est avg",EstAvg),
  add("est diff",EstDiff),
  add("formula",Formula).