/* Discrete Markov Process - Marsian messages in Picat. From Mathematica DiscreteMarkovProcess """ Suppose radio messages from Mars are written in a language that uses only one vowel A and four consonants BCDR, and interspersed in the ordinary communication are some long messages produced by a Markov chain with the following estimated transition matrix: sm = {{0, 1/2, 1/2, 0, 0}, {0, 1/3, 0, 1/3, 1/3}, {0, 0, 1/3, 1/3, 1/3}, {1/2, 0, 0, 0, 1/2}, {1/2, 0, 0, 0, 1/2}}; proc = DiscreteMarkovProcess[1, sm]; Find the mean distance between consecutive vowels: 1/PDF[StationaryDistribution[proc], 1] -> 9/2 -> 4.5 Find the mean distance between consecutive consonants: 1/(1 - PDF[StationaryDistribution[proc], 1]) -> 9/7 -> 1.28571 Find the mean and variance of the length of consecutive occurrences of B: ... -> {1.50367, 0.751308} {Mean[#], Variance[#]} & [TransformedDistribution[x + 1, x in GeometricDistribution[1 - sm[[2, 2]]]]] -> {1.5, 0.75} """ Cf my Gamble model gamble_discrete_markov_process_marsian_messages.rkt This program was created by Hakan Kjellerstrand, hakank@gmail.com See also my Picat page: http://www.hakank.org/picat/ */ import ppl_distributions, ppl_utils, ppl_common_utils. import util. % import ordset. main => go. /* stationary_dist = [0.222222,0.166667,0.166667,0.111111,0.333333] stationary_dist = [2 / 9,1 / 6,1 / 6,1 / 9,1 / 3] first_stationary = 0.222222 Mean distance between consecutive vowels = 4.5 Mean distance between consecutive consonants = 1.28571 Mean length of B runs = 1.5 var : mean length of b runs Probabilities (truncated): 1.5: 0.0360000000000000 1.4: 0.0140000000000000 1.428571428571429: 0.0130000000000000 1.333333333333333: 0.0110000000000000 ......... 1.173913043478261: 0.0010000000000000 1.170212765957447: 0.0010000000000000 1.136363636363636: 0.0010000000000000 1.128205128205128: 0.0010000000000000 mean = 1.50359 Theoretical PDF for B mean runs len: q99 = 17 1 0.666666666666667 2 0.222222222222222 3 0.074074074074074 4 0.024691358024691 5 0.008230452674897 6 0.002743484224966 7 0.000914494741655 8 0.000304831580552 9 0.000101610526851 10 0.000033870175617 11 0.000011290058539 12 0.000003763352846 13 0.000001254450949 14 0.000000418150316 15 0.000000139383439 16 0.000000046461146 17 0.000000015487049 theoretical_mean = 1.5 */ go ?=> TransitionMatrix = [[0,1/2,1/2,0,0], [0,1/3,0,1/3,1/3], [0,0,1/3,1/3,1/3], [1/2,0,0,0,1/2], [1/2,0,0,0,1/2]], Stationary = stationary_dist(TransitionMatrix), println(stationary_dist=Stationary), println(stationary_dist=Stationary.map(to_rat)), FirstStat = Stationary[1], println(first_stationary=FirstStat), println("Mean distance between consecutive vowels"=(1 / FirstStat)), println("Mean distance between consecutive consonants"=(1 / (1-FirstStat))), B = 2, println("Mean length of B runs"=shifted_geometric_dist_mean(1-TransitionMatrix[B,B])), nl, reset_store, run_model(1000,$model(TransitionMatrix),[show_probs_trunc,mean]), nl, println("Theoretical PDF for B mean runs len:"), Q99 = shifted_geometric_dist_quantile(1-TransitionMatrix[2,2],0.99999999), println(q99=Q99), [V=shifted_geometric_dist_pdf(1-TransitionMatrix[2,2],V) : V in 1..Q99].sort_down(2).printf_list, println(theoretical_mean=shifted_geometric_dist_mean(1-TransitionMatrix[2,2])), nl, % show_store_lengths,nl, % fail, nl. go => true. positions_of(Val,A) = [ I : I in 1..A.len, A[I] == Val]. model(TransitionMatrix) => [A,B,C,D,R] = [1,2,3,4,5], N = 400, % Always start with A X = markov_chain(TransitionMatrix,one_hot(TransitionMatrix.len,A),N), XDiffMean = X.differences.map(abs).mean, % Positions of Bs BPos = positions_of(B,X), Diffs = BPos.differences, if Diffs.len > 0 then MeanDiff = Diffs.mean else MeanDiff = N end, % Average lengths of consecutive B's Runs = get_runs(X), BRunsLens = [Run.len : Run in Runs, Run[1] == B], BRunsLensMean = BRunsLens.mean, % % BCons = 1 + geometric_dist(1-TransitionMatrix[B,B]), BCons = shifted_geometric_dist(1-TransitionMatrix[B,B]), SecondChar = X[2], [CountA,CountB,CountC,CountD,CountR] = count_occurrences_list([A,B,C,D,R],X), [FirstPosA,FirstPosB,FirstPosC,FirstPosD,FirstPosR] = [find_first_of(X,I) : I in 1..5], ProbBra = count_occurrences_sublist([B,R,A],X) / N, ProbAbra = count_occurrences_sublist([A,B,R,A],X) / N, ProbBarbara = count_occurrences_sublist([B,A,R,B,A,R,A],X) / N, ProbAbracadabra = count_occurrences_sublist([A,B,R,A,C,A,D,A,B,R,A],X) / N, add_all([% ["x",X], % ["b pos",BPos], % ["diffs",Diffs], % ["mean diff",MeanDiff], % ["x diff mean",XDiffMean], % ["b runs",BRuns], % ["b runs len",BRunsLens], ["mean length of b runs",BRunsLensMean] % , % ["prob bra",ProbBra], % ["prob abra",ProbAbra], % ["prob barbara",ProbBarbara] % , % ["prob abracadabra",ProbAbracadabra], % ["second char",SecondChar], % ["count a",CountA], % ["count b",CountB], % ["count c",CountC], % ["count d",CountD], % ["count r",CountR], % ["first pos a",FirstPosA], % ["first pos b",FirstPosB], % ["first pos c",FirstPosC], % ["first pos d",FirstPosD], % ["first pos r",FirstPosR] ]).