/* 

  Poisson horse kicks in Picat.

  https://www.randomservices.org/random/data/HorseKicks.html
  """
  The data below give the number of soilders in the Prussian cavalry killed 
  by horse kicks, by corp membership and by year. The years are from 1875 to 1894, 
  and there are 14 different cavalry corps: the first column corresponds to the 
  guard corp and the other columns to corps 1 through 11, 14, and 15. The data are 
  from Distributome project and are derived from the book by Andrews and Herzberg. 
  The original source of the data is the classic book by von Bortkiewicz (references 
  are given below). The data are famous because they seem to fit the Poisson 
  model reasonably well.

  """
  First from Mathematica:
  """
  ...
  FindDistributionParameters(data, PoissonDistribution(lambda))
  -> 
  (lambda -> 0.7)

  """
  Note that 0.7 is the mean of the data (which is the mean of a poisson distribution).

  """
  Table(PDF(PoissonDistribution(0.7), k), (k, 0, 7))
  -> (0.496585, 0.34761, 0.121663, 0.0283881, 0.00496792, 0.000695509, 0.0000811427, 8.11427*10^-6)
  %*280
  (139.044, 97.3307, 34.0658, 7.94868, 1.39102, 0.194743, 0.02272, 0.002272)
  """


  Cf my Gamble model gamble_poisson_horse_kicks.rkt

  This program was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import ppl_distributions, ppl_utils.
import util.
% import ordset.

main => go.

/*
  Three different versions of observe_abc/2-4:

  * default observe_abc(Data,Y), i.e. [mean,stdev] with scale 1
    (1.6s)

    Num accepted samples: 1000  Total samples: 2468  (0.405%)

    var : lambda
    Probabilities (truncated):
    1.833608378578726: 0.0010000000000000
    1.726920987352226: 0.0010000000000000
    1.709727716497019: 0.0010000000000000
    1.682016427480623: 0.0010000000000000
    .........
    0.007017811763574: 0.0010000000000000
    0.002961847932526: 0.0010000000000000
    0.00068227201732: 0.0010000000000000
    0.00051585957432: 0.0010000000000000
    mean = 0.777815
    HPD intervals:
    HPD interval (0.94): 0.04006611371416..1.50569108198662

    var : post
    Probabilities:
    0: 0.5110000000000000
    1: 0.3180000000000000
    2: 0.1150000000000000
    3: 0.0380000000000000
    4: 0.0140000000000000
    6: 0.0020000000000000
    5: 0.0020000000000000
    mean = 0.74
    HPD intervals:
    HPD interval (0.94): 0.00000000000000..2.00000000000000

    est_lambda = 0.777815

  * observe_abc(Data,Y,1/10) : [mean,stdev] with scale 1/10 
   (12.5s)

   Num accepted samples: 1000  Total samples: 26900  (0.037%)

   var : lambda
   Probabilities (truncated):
   0.911947980947768: 0.0010000000000000
   0.910919679752979: 0.0010000000000000
   0.910816774196372: 0.0010000000000000
   0.89182568941816: 0.0010000000000000
   .........
   0.543652588754731: 0.0010000000000000
   0.538137870159996: 0.0010000000000000
   0.537278492253869: 0.0010000000000000
   0.5275369177235: 0.0010000000000000
   mean = 0.712347
   HPD intervals:
   HPD interval (0.94): 0.58201099586767..0.82999518738594

   var : post
   Probabilities:
   0: 0.5030000000000000
   1: 0.3290000000000000
   2: 0.1180000000000000
   3: 0.0420000000000000
   4: 0.0070000000000000
   5: 0.0010000000000000
   mean = 0.724
   HPD intervals:
   HPD interval (0.94): 0.00000000000000..2.00000000000000

   est_lambda = 0.711207


 * observe_abc(Data,Y,1/20) : [mean,stdev] with scale 1/20
   (38.1s)

   Num accepted samples: 1000  Total samples: 84663  (0.012%)
   var : lambda
   Probabilities (truncated):
   0.890923567531129: 0.0010000000000000
   0.886894466768435: 0.0010000000000000
   0.881238598786871: 0.0010000000000000
   0.878740769288848: 0.0010000000000000
   .........
   0.574180562316524: 0.0010000000000000
   0.572856543852415: 0.0010000000000000
   0.539565436793289: 0.0010000000000000
   0.537588440132136: 0.0010000000000000
   mean = 0.710267
   HPD intervals:
   HPD interval (0.94): 0.59893228327806..0.80393605530445

   var : post
   Probabilities:
   0: 0.5080000000000000
   1: 0.3350000000000000
   2: 0.1220000000000000
   3: 0.0270000000000000
   4: 0.0050000000000000
   5: 0.0030000000000000
   mean = 0.695
   HPD intervals:
   HPD interval (0.94): 0.00000000000000..2.00000000000000

   est_lambda = 0.710267


  * Using the methods from from recommend_abc_stats_explained:
    methods = [median,iqr,mad,skewness]
    "Data is clearly skewed. Using robust statistics (median, IQR, MAD, skewness)."
    (33.2s)

    Note: This is clearly not good...

    Num accepted samples: 1000  Total samples: 8984  (0.111%)

    var : lambda
    Probabilities (truncated):
    0.817242566876692: 0.0010000000000000
    0.796272166444115: 0.0010000000000000
    0.787020471313512: 0.0010000000000000
    0.780158032095133: 0.0010000000000000
    .........
    0.238064494094841: 0.0010000000000000
    0.236494707053758: 0.0010000000000000
    0.227912832157646: 0.0010000000000000
    0.214331636305122: 0.0010000000000000
    mean = 0.501337
    HPD intervals:
    HPD interval (0.94): 0.28958434438779..0.72164790924715

    var : post
    Probabilities:
    0: 0.6280000000000000
    1: 0.2830000000000000
    2: 0.0760000000000000
    3: 0.0130000000000000
    mean = 0.474
    HPD intervals:
    HPD interval (0.94): 0.00000000000000..2.00000000000000

    est_lambda = 0.474
  
  * Mathematica:  FindDistribution[data, 3]:

    {PoissonDistribution[0.70922], 
     BinomialDistribution[15, 0.0472813], 
     NegativeBinomialDistribution[4, 0.849398]
     }

    As above:
    FindDistributionParameters[data, PoissonDistribution[lambda]]
    -> {lambda -> 0.7}

*/
go ?=>
  % Total number of death by horse kicks per year
  %(Data = [3,5,7,9,10,18,6,14,11,9,5,11,15,6,11,17,12,15,8,4],

  % Death per cavalry corp per year
  Data = [
         % A B C D E F G H I J K L M N   
         0,0,0,0,0,0,0,1,1,0,0,0,1,0, % year 1875
         2,0,0,0,1,0,0,0,0,0,0,0,1,1,
         2,0,0,0,0,0,1,1,0,0,1,0,2,0,
         1,2,2,1,1,0,0,0,0,0,1,0,1,0,
         0,0,0,1,1,2,2,0,1,0,0,2,1,0,
         0,3,2,1,1,1,0,0,0,2,1,4,3,0,
         1,0,0,2,1,0,0,1,0,1,0,0,0,0,
         1,2,0,0,0,0,1,0,1,1,2,1,4,1,
         0,0,1,2,0,1,2,1,0,1,0,3,0,0,
         3,0,1,0,0,0,0,1,0,0,2,0,1,1,
         0,0,0,0,0,0,1,0,0,2,0,1,0,1,
         2,1,0,0,1,1,1,0,0,1,0,1,3,0,
         1,1,2,1,0,0,3,2,1,1,0,1,2,0,
         0,1,1,0,0,1,1,0,0,0,0,1,1,0,
         0,0,1,1,0,1,1,0,0,1,2,2,0,2,
         1,2,0,2,0,1,1,2,0,2,1,1,2,2,
         0,0,0,1,1,1,0,1,1,0,3,3,1,0,
         1,3,2,0,1,1,3,0,1,1,0,1,1,0,
         0,1,0,0,0,1,0,2,0,0,1,3,0,0,
         1,0,0,0,0,0,0,0,1,0,1,1,0,0 % year 1894
   ],
  [Methods,Explain] = recommend_abc_stats_explained(Data),
  println(methods=Methods),
  println(Explain),
  reset_store,
  run_model(10_000,$model(Data),[show_probs_trunc,mean,
                           % show_percentiles,
                           show_hpd_intervals,hpd_intervals=[0.94],
                           % show_histogram,
                           min_accepted_samples=1000,show_accepted_samples=true
                          ]),
  nl,
  println(est_lambda=get_store().get("lambda").mean),
  % show_store_lengths,nl,
  % fail,
  nl.
go => true.
  
  
model(Data) =>
  Lambda = uniform(Data.min,Data.max),

  Y = poisson_dist_n(Lambda,Data.len),
  
  % observe_abc(Data,Y), % fast but wrong
  observe_abc(Data,Y,1/10), % fairly correct and fast
  % observe_abc(Data,Y,1/20), % not much better than using scale 1/10
  % observe_abc(Data,Y,1,[median,iqr,mad,skewness]), % slow and not good

  Post = poisson_dist(Lambda),

  if observed_ok then
    add("lambda",Lambda),
    add("post",Post)
  end.


/*
  Number of deaths
  https://mindyourdecisions.com/blog/2013/06/21/what-do-deaths-from-horse-kicks-have-to-do-with-statistics/
  From the book Bulmer "Principles of Statistics":
  https://www.google.se/books/edition/Principles_of_Statistics/BZi8AQAAQBAJ?hl=sv&gbpv=1&dq=prussian+soldier+horse+deaths+poisson&pg=PT125&printsec=frontcover&bshm=rimc/1

  Num deaths  Occurrences   Expected frequence (from the book)
  ------------------------------------------------------------
   0           144            139
   1            91             97 
   2            32             34
   3            11             8
   4             2             1
  >5             0             0


  This model:

  Frequencies of data:
  (map)[0 = 144,1 = 91,2 = 32,3 = 11,4 = 2]
  min_accepted_samples = 1000

  est_lambda = 0.713157

PDF using poisson_dist(0.71315711387860):
 0 0.490094468364676 
 1 0.349514356586817 
 2 0.124629324901294 
 3 0.029626763217082 
 4 0.005282134237365 
 5 0.000753398321568 
 6 0.000089548562102 
 7 0.000009123170586 

PDF using Mathematica value poisson_dist(0.7):
 0 0.49658530379141 
 1 0.347609712653987 
 2 0.121663399428895 
 3 0.028388126533409 
 4 0.004967922143347 
 5 0.000695509100069 
 6 0.000081142728341 
 7 0.000008114272834 

280*PDF using poisson_dist(0.71315711387860):
 0 137.22645114210917 
 1 97.8640198443087 
 2 34.896210972362411 
 3 8.295493700782869 
 4 1.4789975864621 
 5 0.210951530038943 

280*PDF using Mathematica value poisson_dist(0.7):
 0 139.043885061594779 
 1 97.33071954311626 
 2 34.065751840090648 
 3 7.948675429354496 
 4 1.391018200137038 
 5 0.194742548019185 

Distribution using poisson_dist(0.71315711387860):
(map)[0 = 134,1 = 105,2 = 31,3 = 8,4 = 1,5 = 1]

Distribution using Mathematica value poisson_dist(0.7):
(map)[0 = 134,1 = 105,2 = 31,3 = 8,4 = 1,5 = 1]

Comparison with data, Est lambda, Mathematica's 0.7:
0:  data: 144  est: 134 MMa: 126
1:  data:  91  est: 105 MMa: 109
2:  data:  32  est:  31 MMa:  35
3:  data:  11  est:   8 MMa:   8
4:  data:   2  est:   1 MMa:   1


*/
go2 ?=> 
  % Total number of death by horse kicks per year
  %(Data = [3,5,7,9,10,18,6,14,11,9,5,11,15,6,11,17,12,15,8,4],

  % Death per cavalry corp per year
  Data = [
         % A B C D E F G H I J K L M N   
         0,0,0,0,0,0,0,1,1,0,0,0,1,0, % year 1875
         2,0,0,0,1,0,0,0,0,0,0,0,1,1,
         2,0,0,0,0,0,1,1,0,0,1,0,2,0,
         1,2,2,1,1,0,0,0,0,0,1,0,1,0,
         0,0,0,1,1,2,2,0,1,0,0,2,1,0,
         0,3,2,1,1,1,0,0,0,2,1,4,3,0,
         1,0,0,2,1,0,0,1,0,1,0,0,0,0,
         1,2,0,0,0,0,1,0,1,1,2,1,4,1,
         0,0,1,2,0,1,2,1,0,1,0,3,0,0,
         3,0,1,0,0,0,0,1,0,0,2,0,1,1,
         0,0,0,0,0,0,1,0,0,2,0,1,0,1,
         2,1,0,0,1,1,1,0,0,1,0,1,3,0,
         1,1,2,1,0,0,3,2,1,1,0,1,2,0,
         0,1,1,0,0,1,1,0,0,0,0,1,1,0,
         0,0,1,1,0,1,1,0,0,1,2,2,0,2,
         1,2,0,2,0,1,1,2,0,2,1,1,2,2,
         0,0,0,1,1,1,0,1,1,0,3,3,1,0,
         1,3,2,0,1,1,3,0,1,1,0,1,1,0,
         0,1,0,0,0,1,0,2,0,0,1,3,0,0,
         1,0,0,0,0,0,0,0,1,0,1,1,0,0 % year 1894
  ],
  println("Frequencies of data:"),
  DataFreq = collect(Data),
  println(DataFreq),
  reset_store,
  run_model(10_000,$model(Data),[presentation = [],
                                 min_accepted_samples=1000,show_accepted_samples=false
                          ]),
  nl,
  LambdaList = get_store().get("lambda"),
  EstLambda = LambdaList.mean,
  println(est_lambda=EstLambda),
  nl,
  printf("PDF using poisson_dist(%0.14f):\n",EstLambda),
  pdf_all($poisson_dist(EstLambda),0.01,0.999999).printf_list,
  nl,
  printf("PDF using Mathematica value poisson_dist(0.7):\n"),
  pdf_all($poisson_dist(0.7),0.01,0.999999).printf_list,
  nl,
  printf("280*PDF using poisson_dist(%0.14f):\n",EstLambda),
  [V=280*poisson_dist_pdf(EstLambda,V) : V in 0..5].printf_list,
  nl,
  printf("280*PDF using Mathematica value poisson_dist(0.7):\n"),
  [V=280*poisson_dist_pdf(0.7,V) : V in 0..5].printf_list,  

  nl,
  printf("Distribution using poisson_dist(%0.14f):\n",EstLambda),
  EstFreq=poisson_dist_n(EstLambda,Data.len).collect,
  println(EstFreq),
  nl,
  printf("Distribution using Mathematica value poisson_dist(0.7):\n"),
  MmaFreq=poisson_dist_n(0.7,Data.len).collect,
  println(EstFreq),
  nl,
  println("Comparison with data, Est lambda, Mathematica's 0.7:"),
  foreach(V in Data.min..Data.max)
    printf("%d:  data: %3w  est: %3w MMa: %3w\n", V, DataFreq.get(V,0),EstFreq.get(V,0),MmaFreq.get(V,0))
  end,

  % fail,
  nl.