/* 

  Discrete Markov Process - Marsian messages in Picat.

  From Mathematica DiscreteMarkovProcess
  """
  Suppose radio messages from Mars are written in a language that uses only one 
  vowel A and four consonants BCDR, and interspersed in the ordinary communication 
  are some long messages produced by a Markov chain with the following estimated 
  transition matrix:
  sm = {{0, 1/2, 1/2, 0, 0}, 
        {0, 1/3, 0, 1/3, 1/3}, 
        {0, 0, 1/3, 1/3, 1/3}, 
        {1/2, 0, 0, 0, 1/2}, 
        {1/2, 0, 0, 0, 1/2}};
  proc = DiscreteMarkovProcess[1, sm];

  Find the mean distance between consecutive vowels:
  1/PDF[StationaryDistribution[proc], 1]
  -> 9/2
  -> 4.5

  Find the mean distance between consecutive consonants:
  1/(1 - PDF[StationaryDistribution[proc], 1])
  -> 9/7
  -> 1.28571

  Find the mean and variance of the length of consecutive occurrences of B:
  ...
  -> {1.50367, 0.751308}

  {Mean[#], Variance[#]} & [TransformedDistribution[x + 1, x in GeometricDistribution[1 - sm[[2, 2]]]]]
  -> 
  {1.5, 0.75}
  """

  Cf my Gamble model gamble_discrete_markov_process_marsian_messages.rkt

  This program was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

import ppl_distributions, ppl_utils, ppl_common_utils.
import util.
% import ordset.

main => go.

/*
  stationary_dist = [0.222222,0.166667,0.166667,0.111111,0.333333]
  stationary_dist = [2 / 9,1 / 6,1 / 6,1 / 9,1 / 3]
  first_stationary = 0.222222
  Mean distance between consecutive vowels = 4.5
  Mean distance between consecutive consonants = 1.28571
  Mean length of B runs = 1.5

  var : mean length of b runs
  Probabilities (truncated):
  1.5: 0.0360000000000000
  1.4: 0.0140000000000000
  1.428571428571429: 0.0130000000000000
  1.333333333333333: 0.0110000000000000
  .........
  1.173913043478261: 0.0010000000000000
  1.170212765957447: 0.0010000000000000
  1.136363636363636: 0.0010000000000000
  1.128205128205128: 0.0010000000000000
  mean = 1.50359


  Theoretical PDF for B mean runs len:
  q99 = 17
   1 0.666666666666667 
   2 0.222222222222222 
   3 0.074074074074074 
   4 0.024691358024691 
   5 0.008230452674897 
   6 0.002743484224966 
   7 0.000914494741655 
   8 0.000304831580552 
   9 0.000101610526851 
  10 0.000033870175617 
  11 0.000011290058539 
  12 0.000003763352846 
  13 0.000001254450949 
  14 0.000000418150316 
  15 0.000000139383439 
  16 0.000000046461146 
  17 0.000000015487049 
  theoretical_mean = 1.5


*/
go ?=>
  TransitionMatrix = [[0,1/2,1/2,0,0],
                      [0,1/3,0,1/3,1/3],
                      [0,0,1/3,1/3,1/3],
                      [1/2,0,0,0,1/2],
                      [1/2,0,0,0,1/2]],
  Stationary = stationary_dist(TransitionMatrix),
  println(stationary_dist=Stationary),
  println(stationary_dist=Stationary.map(to_rat)),
  FirstStat = Stationary[1],
  println(first_stationary=FirstStat),
  println("Mean distance between consecutive vowels"=(1 / FirstStat)),
  println("Mean distance between consecutive consonants"=(1 / (1-FirstStat))),
  B = 2,
  println("Mean length of B runs"=shifted_geometric_dist_mean(1-TransitionMatrix[B,B])),
  nl,
  reset_store,
  run_model(1000,$model(TransitionMatrix),[show_probs_trunc,mean]),
  nl,
  println("Theoretical PDF for B mean runs len:"),
  Q99 = shifted_geometric_dist_quantile(1-TransitionMatrix[2,2],0.99999999),
  println(q99=Q99),
  [V=shifted_geometric_dist_pdf(1-TransitionMatrix[2,2],V) : V in 1..Q99].sort_down(2).printf_list,
  println(theoretical_mean=shifted_geometric_dist_mean(1-TransitionMatrix[2,2])),
  nl,
  % show_store_lengths,nl,
  % fail,
  nl.
go => true.

positions_of(Val,A) = [ I : I in 1..A.len, A[I] == Val].


model(TransitionMatrix) =>
  [A,B,C,D,R] = [1,2,3,4,5],

  N = 400,

  % Always start with A
  X = markov_chain(TransitionMatrix,one_hot(TransitionMatrix.len,A),N),
  XDiffMean = X.differences.map(abs).mean,
  
  % Positions of Bs
  BPos = positions_of(B,X), 
  Diffs = BPos.differences,
  if Diffs.len > 0 then   
    MeanDiff = Diffs.mean
  else
    MeanDiff = N
  end,

  % Average lengths of consecutive B's
  Runs = get_runs(X),
  BRunsLens = [Run.len : Run in Runs, Run[1] == B],
  BRunsLensMean = BRunsLens.mean,

  % 
  % BCons = 1 + geometric_dist(1-TransitionMatrix[B,B]), 
  BCons = shifted_geometric_dist(1-TransitionMatrix[B,B]), 

  SecondChar = X[2],

  [CountA,CountB,CountC,CountD,CountR] = count_occurrences_list([A,B,C,D,R],X),
  [FirstPosA,FirstPosB,FirstPosC,FirstPosD,FirstPosR] = [find_first_of(X,I) : I in 1..5],

  ProbBra = count_occurrences_sublist([B,R,A],X) / N,
  ProbAbra = count_occurrences_sublist([A,B,R,A],X) / N,
  ProbBarbara = count_occurrences_sublist([B,A,R,B,A,R,A],X) / N,  
  ProbAbracadabra = count_occurrences_sublist([A,B,R,A,C,A,D,A,B,R,A],X) / N,  
  
  add_all([% ["x",X],
           % ["b pos",BPos],
           % ["diffs",Diffs],
           % ["mean diff",MeanDiff],
           % ["x diff mean",XDiffMean],
           % ["b runs",BRuns],
           % ["b runs len",BRunsLens],
           ["mean length of b runs",BRunsLensMean] % ,
           % ["prob bra",ProbBra],
           % ["prob abra",ProbAbra],
           % ["prob barbara",ProbBarbara] % ,
           % ["prob abracadabra",ProbAbracadabra],
           % ["second char",SecondChar],
           % ["count a",CountA],
           % ["count b",CountB],
           % ["count c",CountC],
           % ["count d",CountD],
           % ["count r",CountR],           
           % ["first pos a",FirstPosA],
           % ["first pos b",FirstPosB],
           % ["first pos c",FirstPosC],
           % ["first pos d",FirstPosD],
           % ["first pos r",FirstPosR]
           ]).