/* 

  Probabilistic propgramming (PPL) utils in Picat.

  Utility functions for probabilistic programming (ppl_*).


  Configurations:

  * Setting a configuration parameter:
   set_config(Parameter,Values)

  * Getting a configuration parameter:
   get_config(Parameter,Default)

  In addition explicitly using set_config/2 (which are used internally), 
  it's recommended to add configuration value instead in the paramteter list 
  of run_model/3. For example:
     run_model(10_000,$model(Obs),[mean,show_probs_rat_trunc,
                                   truncate_size=5,
                                   presentation=["c","b","a"],
                                   hpd_intervals=[0.84,0.9999],
                                   random_seed=42])

  Supported configurations:
  - truncate_size (for show_probs/1 and show_probs_trunc)
    default 4 

  - presentation:  The presentation order of random variables.
    default: sort by the order in which they are presented in the model

  - hpd_intervals: The HPD intervals for show_hpd_intervals
    default: [0.5,0.84,0.90,0.95,0.99,0.999,0.9999,0.99999]

  - percentiles: The percentiles for show_percentiles
    default: 
    [0.001,0.01,0.025,0.05,0.10,0.25,0.50,0.75,0.84,0.90,0.95,0.975,0.99,0.999,0.9999,0.99999]

  - histogram_limit: Sets the upper limit of unique values for integer/discrete. Above that
                     it will be seen as continuous values.
    default: 40

  - random_seed: Sets the random seed
    default: none, i.e. random2() is used instead

  - min_accepted_samples=MinAcceptedSamples: 
       This is an alternative to the number of runs given by run_model/2-3.
       If set then continue to run the model until the number of 
       accepted samples >= MinAcceptedSamples
    Default: no restriction (i.e. run the model the given number of runs)
  - show_accepted_samples=<true|false>:
    If set to true and min_accepted_samples=MinAcceptedSamples is set, then show the
    number of accepted samples.

  - use_local_tabling=[false|true]  (experimental)
    Default: false
    If true then, all calls to the model clears the table area.

    This is for use for memoising recursive problems, especially
    for multi-function recursion (one function is calling another
    function, which in turn calls the first function).

    See ppl_hurricane.pi and ppl_italian_murder.pi (go3/0) for an example.

    Note: This should be considered experimental since it have
          effects in other parts of PPL which is using tabling.

  - garbage_collect_when: N 
    Default: 1000
    Runs garbage_collect() each N'th iteration.
    This might be useful when there are much data that are handles/transferred,
    and/or Picat report that memory is exhaused.

    
  - scatter_plot_size: [Width,Height]
    Size of show_scatter_plot,
    Default: [40,20]    


$ perl -nle 'print $1 if m!^([a-z][^\s]+?) =!' ppl_utils.pi | sort
add_all(L)
add(Name,Value)
add_post(Data,Y)
all_different(L)
analyse_post(Data)
analyse_post(Data,Post)
analyse_post_simple(Data)
analyse_post_simple(Data,Post)
analyze_post(Data)
analyze_post(Data,Post)
analyze_post_simple(Data)
analyze_post_simple(Data,Post)
and_ratf(X)
and_rat(X)
argmax(L)
argmax_random_ties(L)
argmin(L)
argmin_random_ties(L)
argsort(L)
b2i(V)
but_last(L)
cases(L)
case(Val,List)
cdf_from_data(Data)
cdf_from_pdf(PDF)
check1(Cond)
check(Cond)
check_duplicates(L)
check_format(V)
check_parameter(Parameter=Var,Pres)
collect(List)
comb_sum(Len,N,Sum)
condt(Cond,Then,Else)
count_occurrences_list(What,L)
count_occurrences_sublist(Sub,L)
count_occurrences(What,L)
credible_intervals_exact(Dist,HPD)
data_iqr(Data)
data_mad(Data)
data_percentiles(Data,Pcts)
data_range(Data)
debug(S)
debug(Text,S)
draw_until_one_pattern(Lst,Patterns,Choices)
draw_until_pattern(Lst,Pattern,Choices)
draw_without_replacement(A)
draw_without_replacement(N,A)
draw_without_replacement(N,A,Ns)
equivalence(S1,S2)
filter_runs(Runs,Val)
find_first(L,Val,Default)
flip_until_n_successes(P,N)
flip_until_pattern(Patterns,Lst)
float_to_rational(X)
generate_01_runs(N,P)
get_config_map()
get_config(Parameter,Default)
get_freq(List)
get_percentiles(Data)
get_probs(L)
get_records(A)
get_records(A,MaxVal,Aux,Op)
get_records(A,Op)
get_records_geq(A)
get_records_ix(A)
get_records_ix(A,Op)
get_records_ix_geq(A)
get_records_ix(Ix,A,MaxVal,Aux,Op)
get_runs1(As,Aux)
get_runs(As)
get_runs_lens(Xs)
get_runs_op(As,Aux,Op)
get_runs_op(Xs,Op)
get_store()
implication(S1,S2)
index_of_sub(List,Sub)
interpret_fit_score(S)
kurtosis(Data)
kurtosis_excess(Data)
markov_chain_loop(Transitions,N,A)
markov_chain_n(Transitions,InitState,N,Num)
markov_chain(Transitions,InitState,N)
mean(Data)
median(Data)
median_sorted(Sorted)
mod_replace(Val,Mod)
normalized_rmse(Data,Post)
normalized_rmse_explained(NRMSE)
num_runs([])
num_runs(A)
num_runs(T,A,Acc)
observe_abc_adj(Data,Ys)
observe_abc_adj(Data,Ys,Scale)
observe_abc(Data,Ys)
observe_abc(Data,Ys,Scale)
observe_abc(Data,Ys,Scale,Methods)
observe_abc_discrete(Data,Ys)
observe_abc_discrete(Data,Ys,Scale)
observed_ok()
observe(Obs)
observe(Obs1,Obs2)
observe(Obs1,Obs2,Obs3)
observe(Obs1,Obs2,Obs3,Obs4)
one_hot(N,I)
ones(N,Val)
online_moments(Data)
pdf_from_data(Data)
plot_ascii_points([],Grid,_,_,_,_,_,_)
plot_ascii_points([[X,Y]|Rest],Grid0,Width,Height,MinX,MinY,DX,DY)
plot_ascii_scatter(Data)
post_summary(Data)
printf_list(L)
print_list(L)
print_matrix(M)
print_matrix(M,Name)
q1(Data)
q3(Data)
quantile_from_cdf(CDF,Q)
quantile_from_data(Data,Q)
random_transition_matrix(N)
random_transition_matrix(N,V)
recommend_abc_stats(Data)
recommend_abc_stats_explained(Data)
repeat_string(S,N)
replace_nth([H|Rest],N,Val)
replace_nth([_|Rest],1,Val)
rep(N,V)
resample_all(Data)
resample(Data)
resample(N,Data)
reset_store()
rest(L)
rmse_percentiles(Data,Post)
roundf(V,Prec)
run_model(Runs,Model)
run_model(Runs,Model,Pres)
set_config(Parameter,Value)
show_all(Map)
show_all(Map,Ps)
show_credible_intervals_exact(Dist,Qs)
show(Data)
show(Data,Ps)
show_freq(List)
show_freq_prob(List)
show_histogram(Data)
show_hpd_intervals(Data)
show_matrix(M)
show_matrix(M,Name)
show_percentiles(Data)
show_probs(L)
show_probs_rat(L)
show_probs_rat_trunc(L)
show_probs_trunc(L)
show_scatter_plot(Data)
show_scatter_plot(Data,Width,Height)
show_simple_stats(Data)
show_stats(Data)
show_store_lengths()
show_summary_stats(Data)
shuffle1(List)
shuffle(L)
simplex(L)
skewness(Data)
stdev(Xs)
std_population(Data)
sublists(L,N)
swap(L,I,J)
take_last(A,N)
to_probabilities(L)
to_rat(X)
update_all(Map,L)
update_list_all(Map,L)
update_list(Map,Key,Value)
update(Map,Key)
variance_population(Data)
variance_sample(Data)
variance(Xs)
xor(A,B)


  This program was created by Hakan Kjellerstrand, hakank@gmail.com
  See also my Picat page: http://www.hakank.org/picat/

*/

module ppl_utils.

import ppl_distributions.
import ppl_common_utils.
import util.
import cp.

/*
  Updating a map.

*/
% Update the count of Key
update(Map,Key) =>
  Map.put(Key,Map.get(Key,0)+1).

% Add the Value in the list of Key
update_list(Map,Key,Value) =>
  % Map.put(Key,Map.get(Key,[])++[Value]).
  Map.put(Key,[Value|Map.get(Key,[])]).
 

update_all(Map,L) =>
  foreach(V in L)
    Map.update(V)
  end.

update_list_all(Map,L) =>
  foreach([V,K] in L)
    Map.update_list(V,K)
  end.


/*
  Handling the data / samples / trace store

  - reset_store: 
    Resets the store.
    Should be the called before a new call to run_model/1-2.
    Note: this is only needed if there are more than one call to
          run_model/1-2, but it's a good habit.

  - add(Name,Value)
    add_all(List)

    Add the element(s) to the data store

  - get store: returns the data store 

*/
reset_store() =>
  get_global_map(store).clear().

get_store() = get_global_map(store).

show_store_lengths() =>
  println("Variable lengths:"),
  Store = get_store(),
  foreach(K in Store.keys.sort)
    println(K=Store.get(K).len)
  end.

% Add the value Value to the store list with the name Name
add(Name,Value) =>
  AddOrder = get_global_map(add_order),
  Order = AddOrder.get(add_order),
  if not membchk(Name,Order) then
    AddOrder.put(add_order,AddOrder.get(add_order)++[Name]),
  end,
  Map = get_global_map(store),
  % Map.put(Name,Map.get(Name,[])++[Value]).
  Map.put(Name,[Value|Map.get(Name,[])]). % A little faster, but reversed order
  

% Add all pairs of [Name1,Value1,Name2,Value2]
% to the store
add_all(L) =>
  foreach([Name,Value] in L)
    add(Name,Value)
  end.


/*
  add_post(Data,Y)

  This is a convenience function when using BDA for adding 
  the posterior data set (Y). 
  It adds - foreach element in Data and Y -
  - "post <I>": The value of Y[I]
  - "post <I> diff:", The difference between Y[I] and Data[I].

  This can then be used for together with the (posterior) summary function
  post_summary(Data), see below.

*/
add_post(Data,Y) =>
  Len = min(Data.len,Y.len),
  foreach(I in 1..Len)
     IS = I.to_string,
     add("post " ++ IS,Y[I]),
     add("post "++ IS ++ " diff",Y[I]-Data[I]),
  end.


/*
  Configuration

  The configuration contains some parameters that are
  globally to the model. 

*/

% Return the configuration map
get_config_map() = get_global_map(config).

% Get the value of configuration parameter Parameter,
% with the default value Value
get_config(Parameter,Default) = get_global_map(config).get(Parameter,Default).

% Set the configuration parameter Parameter to the value of L
set_config(Parameter,Value) =>
  Config = get_config_map(),
  Config.put(Parameter,Value).


%
% Round the number V with a precision of Prec.
% Eg. roundf(100.1234567,0.001) ->  100.123
%
roundf(V,Prec) = round(V/Prec)*Prec.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% float_to_rational(X) -> (Num,Den)               % default: MaxDen=1e9, Eps=1e-12
% float_to_rational(X, MaxDen, Eps) -> (Num,Den)  % customize bounds/precision
%
% Uses continued fractions with a final truncation step to respect MaxDen.
% Always returns lowest terms (gcd-reduced). Handles negatives.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
to_rat(X) = float_to_rational(X).
and_rat(X) = to_fstring("%w (%w)", X, to_rat(X)).
and_ratf(X) = to_fstring("%.17f (%w)", X, to_rat(X)).

% float_to_rational(X) = float_to_rational(X, 1000000000, 1.0e-17).
% float_to_rational(X) = float_to_rational(X, 10**17, 1.0e-17).
float_to_rational(X) = float_to_rational(X, 10**10, 1.0e-10).

float_to_rational(X, MaxDen, Eps) = $(Num'/'Den) =>
    % basic checks
    Sign = cond(X < 0.0, -1, 1),
    Xabs = abs(X),
    if Xabs =:= 0.0 then
        Num = 0, Den = 1
    else
        % Continued-fraction convergents:
        % p[-2]=0,q[-2]=1 ; p[-1]=1,q[-1]=0
        Pm2 = 0, Qm2 = 1,
        Pm1 = 1, Qm1 = 0,
        Xcur = Xabs,
        Num = 0, Den = 1,
        Done = false,
        while (Done == false)
            A = floor(Xcur).to_int(),
            P = A*Pm1 + Pm2,
            Q = A*Qm1 + Qm2,

            if Q > MaxDen then
                % Truncate last partial quotient to fit MaxDen
                K = (MaxDen - Qm2) // Qm1,
                if K < 0 then K := 0 end,
                P := K*Pm1 + Pm2,
                Q := K*Qm1 + Qm2,
                Num := P, Den := Q,
                Done := true
            else
                % Accept this convergent if within tolerance or exact
                Err = abs(P / Q - Xabs),
                Frac = Xcur - A,
                if (Err =< Eps) ; (Frac =:= 0.0) then
                    Num := P, Den := Q,
                    Done := true
                else
                    % advance
                    Pm2 := Pm1, Qm2 := Qm1,
                    Pm1 := P,   Qm1 := Q,
                    Xcur := 1.0 / Frac
                end
            end
        end,
        % apply sign and reduce to lowest terms
        Num := Sign * Num,
        G = gcd(abs(Num), Den),
        if G > 1 then
            Num := Num // G,
            Den := Den // G
        end
    end.


% Repeat a string S N times.
repeat_string(S,N) = [S : _ in 1..N].flatten.


/*
  Wrapper for showing different procedures and functions.

  Example:
   show(Data,[show_histogram,show_hpd_intervals],[mean])
*/
show(Data) =>
  show(Data,[show_probs_trunc,mean,variance,show_percentiles,show_hpd_intervals,show_histogram,show_summary_stats]).
show(Data,Ps) =>
  foreach(P in Ps,not (P = (_ = _)))
    if once(P.to_string.find("show",_,_)) then
      % This is a procedure show_*
      catch(call(P,Data),_,_)
    else
      % This is a function
      catch(T=apply(P,Data),E,_),
      if var(E) then
        println(P=T)
      end
    end
  end,
  nl.

show_all(Map) =>
  AddOrder = get_global_map(add_order).get(add_order,[]),
  % Presentation = get_config(presentation,Map.keys.sort),
  Presentation = get_config(presentation,AddOrder),
  foreach(K in Presentation)
    if not Map.has_key(K) then
      Error=to_fstring("Key %w is not a variable name. Check the presentation list.",K).to_atom,
      throw($error(Error))
    end,
    printf("var : %w\n", K),
    show(Map.get(K))
  end.

show_all(Map,Ps) =>
  AddOrder = get_global_map(add_order).get(add_order,[]),
  % Presentation = get_config(presentation,Map.keys.sort),
  Presentation = get_config(presentation,AddOrder),
  foreach(K in Presentation)  
    if not Map.has_key(K) then
      Error=to_fstring("Key %w is not a variable name. Check the presentation list.",K).to_atom,
      throw($error(Error))
    end,  
    printf("var : %w\n", K),    
    show(Map.get(K),Ps)
  end.


/*
  Run a model:
  run_model(Runs,Model)
  run_model(Runs,Model,Pres)

  Example: 
  The model can have parameters, here ObservedHeight (which must be instantiated)
  - run_model(20_000,$model(ObservedHeight),[show_probs,presentation=["z","a","t"]])

  - run_model(20_000,$model(ObservedHeight))
    Showing the default parameters for show_all/0-1.
  
  Note: The model name must be $ escaped if there's a parameter.
  Without parameters, no $ escape is needed (and it should not end with '()' since 
  it will then be evaluated directly as a function).

  - run_model(20_000,model)

*/
run_model(Runs,Model) =>
  run_model(Runs,Model,default).
run_model(Runs,Model,Pres) =>
  garbage_collect(400_000_000),
  Observations = get_global_map(observations),
  Store = get_global_map(store),
  Store.clear,
  Config = get_config_map(),
  Config.clear,
  VariableOrder = get_global_map(variable_order),
  VariableOrder.clear,
  VariableOrder.put(order,[]),
  AddOrder = get_global_map(add_order),
  AddOrder.put(add_order,[]),
  if check_parameter(random_seed=RandomSeed,Pres) then
    println(randomSeed=RandomSeed),
    _ = random(RandomSeed)
  else 
    _ = random2()
  end,
  InitializeTable = false,
  if check_parameter(use_local_tabling=true,Pres) then
    % Using "local" table, i.e. for each run.
    println(use_local_tabling=true),
    InitializeTable := true
  end,
  GarbageCollectWhen = 1000,
  if check_parameter(garbage_collect_when=When,Pres) then
    println(garbage_collect_when=When),
    GarbageCollectWhen := When
  end,
  if check_parameter(min_accepted_samples=MinAcceptedSamples,Pres) then
    % println(min_accepted_samples=MinAcceptedSamples),
    NumAcceptedSamples = get_global_map(num_accepted_samples),
    NumAcceptedSamples.put(num_accepted_samples,0),
    NumAcceptedSamples.put(count_accepted_samples,true),
    NumAcceptedSamples.put(total_num_samples,0),
    if check_parameter(show_accepted_samples=true,Pres) then
      NumAcceptedSamples.put(show_accepted_samples,true)
    else
      NumAcceptedSamples.put(show_accepted_samples,false)    
    end,
    OK = false,
    NumSamples = 0,
    while(OK == false)
      if InitializeTable then
        initialize_table
      end,
      NumSamples := NumSamples + 1,
      if NumSamples mod GarbageCollectWhen = 0 then
        garbage_collect()
      end,
      Observations.clear(),
      call(Model),
      if NumAcceptedSamples.get(num_accepted_samples) >= MinAcceptedSamples then
        OK := true
      end
    end,
  else
    foreach(C in 1..Runs)
      if InitializeTable then
        initialize_table
      end,
      % Clear the observations for this new run
      Observations.clear(),
      if C mod GarbageCollectWhen == 0 then
        garbage_collect()
      end,
      call(Model),
    end  
  end,
  if Pres == default then
    show_all(Store)
  else
    Pres2 = [],
    foreach(P in Pres),
      % This is a configuration parameter: Parameter=Value
      if P = (Parameter=Value) then
        set_config(Parameter,Value)
      else
        Pres2 := Pres2 ++ [P]
      end
    end,
    show_all(Store,Pres2)
  end.

check_parameter(Parameter=Var,Pres) =>
  once(member(Parameter=Var,Pres)).

/*
  Observe something.
  
  Note: If there are multiple observations, they must be
  collected into one single expression using ()'s.
*/
observe(Obs) =>
  Map = get_global_map(observations),
  if cond(Obs,true,false) == true then
    Map.update_list("observations",true),
  else
    Map.update_list("observations",false)  
  end.

% Some wrapper that might be usefule.
observe(L), list(L) =>
  foreach(Obs in L)
    observe(Obs)
  end.

observe(Obs1,Obs2) =>
  observe(Obs1),
  observe(Obs2).

observe(Obs1,Obs2,Obs3) =>
  observe(Obs1,Obs2),
  observe(Obs3).

observe(Obs1,Obs2,Obs3,Obs4) =>
  observe(Obs1,Obs2,Obs3),
  observe(Obs4).


/*
  observe_abc(Data,Ys)
  observe_abc(Data,Ys,Scale)
  observe_abc(Data,Ys,Scale,Methods)

  This is a convenience procedure for observing that a
  generated dataset (Ys) is "the same" as the original 
  dataset Data. It uses a simple ABC (Approximate Bayesian 
  Computation) inspired method for this.

  The Scale is for scaling the limit of the accepted 
  interval (e.g. 1/4). Default Scale is 1.

  The Metods is a list of the statistics to use.
  Default is to use both mean and standard deviation (stdev).
  
*/
observe_abc(Data,Ys) =>
  observe_abc(Data,Ys,1.0,[mean,stdev]).
observe_abc(Data,Ys,Scale) =>
  observe_abc(Data,Ys,Scale,[mean,stdev]).
observe_abc(Data,Ys,Scale,Methods) =>
  Mean = Data.mean,
  Stdev = Data.stdev,
  % Fix for very small standard deviation and discrete Data
  % if Stdev <= 0.01, integer(Data[1]) then
  %   observe_abc_discrete(Data,Ys)
  % else  
  if Stdev <= 0.01 then
    WarningMap = get_global_map(observe_abs_warning),
    HasSeenWarning = WarningMap.get(has_seen_warning,false),
    if HasSeenWarning == false then
      printf("Stdev is very small: %.5f and might give spurious results. Please adjust the scale parameter\n", Stdev),
      WarningMap.put(has_seen_warning,true)
    end
  end,
  YsMean = Ys.mean,      
  YsStdev = Ys.stdev,
  if membchk(mean,Methods) then
    observe(abs(Mean-YsMean)<=Stdev*Scale)
  end,
  if membchk(stdev,Methods) then
    observe(abs(Stdev-YsStdev)<=Stdev*Scale)
  end,
  if membchk(variance,Methods) then
    Variance = Data.variance,
    YsVariance = Ys.variance,
    observe(abs(Variance-YsVariance)<=Variance*Scale)
  end,
  if membchk(skewness,Methods) then
    Skewness = Data.skewness,
    YsSkewness = Ys.skewness,
    observe(abs(Skewness-YsSkewness)<=Stdev*Scale)
  end,
  if membchk(kurtosis,Methods) then
    Kurtosis = Data.kurtosis,
    YsKurtosis = Ys.kurtosis,
    observe(abs(Kurtosis-YsKurtosis)<=Stdev*Scale)
  end,
  if membchk(median,Methods) then
    Median = Data.median,
    YsMedian = Ys.median,
    observe(abs(Median-YsMedian)<=Stdev*Scale)
  end,
  % General version of quantiles:
  % Note these must be $-escaped in the Methods list:
  % E.g. observe_abc(Data,Y,1,[mean,stdev,$q(01),$q(90),$q(99)])
  %
  Qs = [ Q : V in Methods, V=$q(Q)],
  foreach(Q in Qs)
    DataQ = data_quantile(Data,Q/100),
    YsQ   = data_quantile(Ys,Q/100),
    observe(abs(DataQ - YsQ) =< Stdev*Scale)
  end,
  % And some standard versions
  if membchk(q10,Methods) then
    Q10 = data_quantile(Data,0.10),
    YsQ10 = data_quantile(Ys,0.10),
    observe(abs(Q10 - YsQ10) =< Stdev*Scale)
  end,  
  if membchk(q25,Methods) then
    Q25 = data_quantile(Data,0.25),
    YsQ25 = data_quantile(Ys,0.25),
    observe(abs(Q25 - YsQ25) =< Stdev*Scale)
  end,
  if membchk(q50,Methods) then
    Q50 = data_quantile(Data,0.50),
    YsQ50 = data_quantile(Ys,0.50),
    observe(abs(Q50 - YsQ50) =< Stdev*Scale)
  end,
  if membchk(q75,Methods) then
    Q75 = data_quantile(Data,0.75),
    YsQ75 = data_quantile(Ys,0.75),
    observe(abs(Q75 - YsQ75) =< Stdev*Scale)
  end,
  if membchk(q90,Methods) then
    Q90 = data_quantile(Data,0.90),
    YsQ90 = data_quantile(Ys,0.90),
    observe(abs(Q90 - YsQ90) =< Stdev*Scale)
  end,
  % Moments
  Moments = [ K : V in Methods, V=$moment(K)],
  foreach(K in Moments)
    MData = data_moment(Data,K),
    MYs = data_moment(Ys,K),
    observe(abs(MData - MYs) =< Stdev*Scale)
  end,
  CentralMoments = [ K : V in Methods, V=$central_moment(K)],
  foreach(K in CentralMoments)
    MData = data_central_moment(Data,K),
    MYs = data_central_moment(Ys,K),
    observe(abs(MData - MYs) =< Stdev*Scale)
  end,
  StandardizedMoments = [ K : V in Methods, V=$standardized_moment(K)],
  foreach(K in StandardizedMoments)
    MData = data_standardized_moment(Data,K),
    MYs = data_standardized_moment(Ys,K),
    % dimensionless
    observe(abs(MData - MYs) =< Scale)
  end,
  if membchk(range,Methods) then
    Range = data_range(Data),
    YRange = data_range(Ys),
    observe(abs(Range - YRange) =< Stdev*Scale)
  end,
  if membchk(iqr,Methods) then
    IQR = data_iqr(Data),
    YIQR = data_iqr(Ys),
    observe(abs(IQR - YIQR) =< Stdev*Scale)
  end,
  if membchk(mad,Methods) then
    MAD = data_mad(Data),
    YMAD = data_mad(Ys),
    observe(abs(MAD - YMAD) =< Stdev*Scale)
  end,
  if membchk(sum,Methods) then
    Sum = Data.sum,
    YSum = Ys.sum,
    observe(abs(Sum - YSum) =< Stdev*Scale)
  end.


observe_abc_discrete(Data,Ys) =>
  observe_abc_discrete(Data,1).
observe_abc_discrete(Data,Ys,Scale) =>
  Mean = Data.mean,
  IsDiscrete = cond(integer(Data[1]),true,false),
  if IsDiscrete then
     Stdev = max(Data.stdev,1)
  else
     Stdev = Data.stdev
  end,
  if Stdev <= 0.01 then
    WarningMap = get_global_map(observe_abs_warning),
    HasSeenWarning = WarningMap.get(has_seen_warning,false),
    if HasSeenWarning == false then
      printf("Stdev is very small: %.5f and might give spurious results. Please adjust the scale parameter\n", Stdev),
      WarningMap.put(has_seen_warning,true)
    end
  end,  
  YsMean = Ys.mean,
  YsStdev = Ys.stdev,
  observe(abs(Mean-YsMean)<=Stdev*Scale),
  observe(abs(Stdev-YsStdev)<=Stdev*Scale).

/*
------------------------------------------------------------
recommend_abc_stats(Data)
------------------------------------------------------------
Analyzes the dataset Data and recommends a list of ABC
summary statistics to use in observe_abc/4.

The goal is to automatically choose statistics that are
informative and robust for the given data distribution.

Logic:
  - Checks skewness, kurtosis, and basic spread.
  - Uses heuristic thresholds to classify the data as:
      * roughly normal
      * mildly skewed
      * strongly skewed or heavy-tailed
  - Adds robust or shape-sensitive statistics accordingly.

Returns:
  Methods : list of atoms and/or $q(N) terms
            suitable for observe_abc/4

Example:
  Picat> Data = normal_dist_n(100,15,1000000),
  Picat> recommend_abc_stats(Data)
  [mean,stdev]

  Picat> Data = pareto_dist_n(1,3,1000000),
  Picat> recommend_abc_stats(Data)
  [median,iqr,mad,q10,q90,kurtosis]
------------------------------------------------------------
*/
recommend_abc_stats(Data) = Methods =>
    if not list(Data) then
        throw($error('domain_error(list,Data),recommend_abc_stats/1'))
    elseif length(Data) < 10 then
        % Too small sample: keep it simple
        Methods = [mean, stdev]
    else
        Skew = skewness(Data),
        Kurt = kurtosis(Data),
        MinVal = min(Data),
        MaxVal = max(Data),
        Range = MaxVal - MinVal,
        SD = stdev(Data),
        Ratio = cond(SD =:= 0.0, 0.0, Range / SD),

        if abs(Skew) < 0.1, abs(Kurt) < 0.5 then
            % Roughly normal-like
            Methods = [mean, stdev]
        elseif abs(Skew) < 0.5, abs(Kurt) < 2.0 then
            % Mildly skewed, moderate tails
            Methods = [mean, stdev, q25, q75]
        elseif abs(Skew) >= 0.5, abs(Kurt) < 5.0 then
            % Clearly skewed
            Methods = [median, iqr, mad, skewness]
        elseif abs(Skew) >= 0.5, abs(Kurt) >= 5.0 then
            % Strongly skewed and heavy-tailed
            Methods = [median, iqr, mad, q10, q90, kurtosis]
        else
            % Catch-all fallback
            Methods = [mean, stdev, median]
        end,

        % Optional: add range for data with extreme spread
        if Ratio > 10.0 then
            Methods := Methods ++ [range]
        end
    end.


/*
------------------------------------------------------------
recommend_abc_stats_explained(Data)
------------------------------------------------------------
Analyzes a dataset and recommends a set of ABC summary
statistics to use in observe_abc/4, together with a short
text explanation.

Returns:
  [Methods, Comment]
    Methods : list of recommended statistics
    Comment : short plain-text explanation of reasoning

Example:
  Picat> Data = normal_dist_n(100,15,1000000),
  Picat> [Methods,Comment] = recommend_abc_stats_explained(Data),
  Picat> println(Methods),
  Picat> println(Comment).

Output:
  [mean,stdev]
  "Data appears roughly symmetric with light tails. Using mean and stdev."

------------------------------------------------------------
*/

recommend_abc_stats_explained(Data) = [Methods, Comment] =>
    if not list(Data) then
        throw($error('domain_error(list,Data),recommend_abc_stats_explained/1'))
    elseif length(Data) < 10 then
        Methods = [mean, stdev],
        Comment = "Sample size too small for reliable shape analysis. Using mean and stdev only."
    else
        Skew = skewness(Data),
        Kurt = kurtosis(Data),
        MinVal = min(Data),
        MaxVal = max(Data),
        Range = MaxVal - MinVal,
        SD = stdev(Data),
        Ratio = cond(SD =:= 0.0, 0.0, Range / SD),

        if abs(Skew) < 0.1, abs(Kurt) < 0.5 then
            Methods = [mean, stdev],
            Comment = "Data appears roughly symmetric with light tails. Using mean and stdev."
        elseif abs(Skew) < 0.5, abs(Kurt) < 2.0 then
            Methods = [mean, stdev, q25, q75],
            Comment = "Data shows mild skewness or moderate tails. Adding quantiles for shape."
        elseif abs(Skew) >= 0.5, abs(Kurt) < 5.0 then
            Methods = [median, iqr, mad, skewness],
            Comment = "Data is clearly skewed. Using robust statistics (median, IQR, MAD, skewness)."
        elseif abs(Skew) >= 0.5, abs(Kurt) >= 5.0 then
            Methods = [median, iqr, mad, q10, q90, kurtosis],
            Comment = "Data appears strongly skewed and heavy-tailed. Using robust shape statistics."
        else
            Methods = [mean, stdev, median],
            Comment = "Data shows irregular structure. Using combined central and robust summaries."
        end,

        % Optional: detect extreme range vs. spread
        if Ratio > 10.0 then
            Methods := Methods ++ [range],
            Comment := Comment ++ " Large range detected; added range statistic."
        end
    end.

/*
  Adjustable observe_abc. Very experimental!
*/
observe_abc_adj(Data,Ys) =>
  observe_abc_adj(Data,Ys,1.0).
observe_abc_adj(Data,Ys,Scale) =>
  AdjScaleMap = get_global_map(abc_scale),
  AdjScale = AdjScaleMap.get(abc_scale,Scale),
  % println(adjScale=AdjScale),
  Mean = Data.mean,
  Stdev = Data.stdev,
  % Fix for very small standard deviation and discrete Data
  % if Stdev <= 0.01, integer(Data[1]) then
  %   observe_abc_discrete(Data,Ys)
  % else  
  if Stdev <= 0.01 then
    WarningMap = get_global_map(observe_abs_warning),
    HasSeenWarning = WarningMap.get(has_seen_warning,false),
    if HasSeenWarning == false then
      printf("Stdev is very small: %.5f and might give spurious results. Please adjust the scale parameter\n", Stdev),
      WarningMap.put(has_seen_warning,true)
    end
  end,
  YsMean = Ys.mean,
  YsStdev = Ys.stdev,
  Len = min(Data.len,Ys.len),
  DiffSum = [Ys[I]-Data[I] : I in 1..Len].sum,
  StdevScaled = Stdev*AdjScale,
  if DiffSum > 10 then
    NewAdjScale := AdjScale / 1.01,
    NewStdevScaled = Stdev*NewAdjScale,
    if NewAdjScale > 0.1, NewStdevScaled > 0.5 then
      println([data_mean=Mean,data_stdev=Stdev,ys_mean=YsMean,ys_stdev=YsStdev,diff_sum=DiffSum,diff_sum_div_n=(DiffSum/Len)]),    
      AdjScale := NewAdjScale,
      AdjScaleMap.put(abc_scale,AdjScale),
      StdevScaled := NewStdevScaled,
      println("Adjusting adj scale. New"=AdjScale=(1/AdjScale)=StdevScaled)
    end
  end,
  observe(abs(Mean-YsMean)<=StdevScaled),
  observe(abs(Stdev-YsStdev)<=StdevScaled).


/*
  Check if all observations evaluates to true
*/
observed_ok() =>
  Map = get_global_map(observations),
  if not Map.has_key("observations") then
    throw($error('observed_ok: There are no observations. Please check the model.'))
  else
    % If the user has num_accepted_sample=...
    NumAcceptedSamples = get_global_map(num_accepted_samples),
    if NumAcceptedSamples.get(count_accepted_samples,false) == true then
      DoAcceptedSamples = true,
      NumAcceptedSamples.put(total_num_samples,NumAcceptedSamples.get(total_num_samples)+1),
    else
      DoAcceptedSamples = false
    end,
    if Map.get("observations").remove_dups == [true] then
      if DoAcceptedSamples then
      NumAcceptedSamples.put(num_accepted_samples,NumAcceptedSamples.get(num_accepted_samples,0)+1),
      if NumAcceptedSamples.get(show_accepted_samples) == true then
        AcceptedSamples = NumAcceptedSamples.get(num_accepted_samples),
        TotalSamples    = NumAcceptedSamples.get(total_num_samples),
        printf("Num accepted samples: %d  Total samples: %d  (%.7f%%)\n", AcceptedSamples, TotalSamples, (AcceptedSamples/TotalSamples))
      end
    end,    
      true
    else
      false
    end
  end.

data_range(Data) = max(Data) - min(Data).
data_iqr(Data) = data_quantile(Data,0.75) - data_quantile(Data,0.25).
data_mad(Data) = data_quantile([abs(X - data_quantile(Data,0.5)) : X in Data], 0.5).


/*
------------------------------------------------------------
Moment Functions for Datasets
------------------------------------------------------------
These functions compute the k-th raw, central, and standardized
moments of a numeric dataset Data = [X1, X2, ..., Xn].

------------------------------------------------------------
1. data_moment(Data, K)
------------------------------------------------------------
Computes the raw (uncentered) k-th moment:

    m_k = (1/n) * sum( X_i^k )

Meaning:
    Measures overall magnitude. Sensitive to both scale
    and location (shift). For example:
      m1 = mean
      m2 = mean of squares

Parameters:
    Data : list of numeric values (length >= 1)
    K    : integer >= 1

Returns:
    The raw moment value (float)

Throws:
    domain_error if Data is not a list or is empty
    domain_error if K is not a positive integer

Units:
    Same as (data unit)^K

Example:
    data_moment([1,2,3], 3) -> 12.0

------------------------------------------------------------
2. data_central_moment(Data, K)
------------------------------------------------------------
Computes the central (about the mean) k-th moment:

    μ_k = (1/n) * sum( (X_i - mean)^k )

Meaning:
    Measures shape around the mean, independent of shift.
    Examples:
      μ2 = variance
      μ3 = third central moment (raw skewness numerator)
      μ4 = fourth central moment (raw kurtosis numerator)

Parameters:
    Data : list of numeric values (length >= 1)
    K    : integer >= 1

Returns:
    The k-th central moment (float)

Throws:
    domain_error if Data is not a list or is empty
    domain_error if K is not a positive integer

Units:
    Same as (data unit)^K

Example:
    data_central_moment([1,2,3,4,5], 2) -> 2.0

------------------------------------------------------------
3. data_standardized_moment(Data, K)
------------------------------------------------------------
Computes the standardized (dimensionless) k-th moment:

    γ_k = μ_k / (σ^k)
    where μ_k is the central moment and σ is the standard deviation.

Meaning:
    Provides a scale-free measure of shape. Useful for
    comparing distributions with different units or scales.
    Examples:
      γ3 = skewness
      γ4 = kurtosis

Parameters:
    Data : list of numeric values (length >= 2)
    K    : integer >= 1

Returns:
    The standardized moment (float)

Throws:
    domain_error if Data length < 2
    domain_error if variance is zero
    domain_error if K is not a positive integer

Units:
    Dimensionless

Example:
    data_standardized_moment([1,2,3,4,5], 4) -> 1.7

------------------------------------------------------------
Implementation notes:
------------------------------------------------------------
* Uses float-first arithmetic.
* Throws explicit $error(domain_error(...)) on invalid inputs.
* For variance or higher moments, division by N (population) is used.
  Replace with (N-1) if you prefer sample moments.
* When variance = 0.0, standardized moments are undefined and
  a domain_error is thrown.
* All computations use pow/2; replace with safe_pow/2 for
  additional underflow protection if desired.

------------------------------------------------------------
Integration in ABC framework:
------------------------------------------------------------
These functions can be used as ABC summary statistics, via
parameterized functors such as:

    $moment(K)
    $central_moment(K)
    $standardized_moment(K)

Example usage in observe_abc:

    elseif M = $standardized_moment(K) then
        MData = data_standardized_moment(Data,K),
        MYs   = data_standardized_moment(Ys,K),
        observe(abs(MData - MYs) =< Scale)

------------------------------------------------------------

------------------------------------------------------------
Named and commonly used moments
------------------------------------------------------------

Order (k) | Moment type   | Common name | Symbol / Relation           | Interpretation
-----------+---------------+-------------+-----------------------------+-----------------------------
1          | Raw / Central | Mean        | μ = E[X]                   | Location (center of mass)
2          | Central       | Variance    | σ² = E[(X - μ)²]           | Spread or dispersion
3          | Standardized  | Skewness    | γ₁ = μ₃ / σ³               | Asymmetry (left/right tail)
4          | Standardized  | Kurtosis    | γ₂ = μ₄ / σ⁴               | Tail heaviness / peakedness

Notes:
  * The zeroth moment (k = 0) is always 1 for normalized distributions.
  * Moments of order higher than 4 rarely have names or practical use.
  * Higher moments (k ≥ 5) mainly emphasize rare outliers and are
    numerically unstable for finite datasets.
------------------------------------------------------------

------------------------------------------------------------
Interpretation summary of statistical moments
------------------------------------------------------------

  k | What it measures        | Reliability from data
  --+-------------------------+-----------------------------
  1 | Location (mean)         | Very stable
  2 | Scale / variability     | Stable
  3 | Asymmetry (skewness)    | Needs moderate sample size
  4 | Tail weight (kurtosis)  | Needs large sample size
 ≥5 | Finer shape details     | Highly unstable / rarely useful

 Notes:
  * The first two moments (mean and variance) are almost always reliable.
  * The 3rd and 4th moments (skewness, kurtosis) require larger samples
    to estimate accurately.
  * Moments beyond 4 are seldom useful in practice; they are dominated
    by outliers and numerical noise.
------------------------------------------------------------

*/


/* Raw k-th moment */
data_moment(Data, K) = M =>
    if not list(Data) then
        throw($error('domain_error(list,Data),data_moment/2'))
    elseif length(Data) == 0 then
        throw($error('domain_error(length,Data),data_moment/2'))
    elseif not integer(K) ; K < 1 then
        throw($error('domain_error(number,K),data_moment/2'))
    else
        N = length(Data),
        M = sum([pow(X, K) : X in Data]) / N
    end.

/* Central k-th moment */
data_central_moment(Data, K) = M =>
    if not list(Data) then
        throw($error('domain_error(list,Data),data_central_moment/2'))
    elseif length(Data) == 0 then
        throw($error('domain_error(length,Data),data_central_moment/2'))
    elseif not integer(K) ; K < 1 then
        throw($error('domain_error(number,K),data_central_moment/2'))
    else
        N = length(Data),
        Mean = sum(Data) / N,
        M = sum([safe_pow(X - Mean, K) : X in Data]) / N
    end.

/* Standardized k-th moment */
data_standardized_moment(Data, K) = M =>
    N = length(Data),
    if N < 2 then
        throw($error('domain_error(length,Data),data_standardized_moment/2'))
    else
        Mean = sum(Data) / N,
        Devs = [X - Mean : X in Data],
        Var = sum([D * D : D in Devs]) / N,
        SD = sqrt(Var),
        if SD =:= 0.0 then
            throw($error('domain_error(number,SD),data_standardized_moment/2'))
        else
            M = sum([pow(D / SD, K) : D in Devs]) / N
        end
    end.


/*
  post_summary(Data)

  prints a summary of the difference with Data and the posterior samples that
  was generated with add_post(Data,Y).

*/ 
post_summary(Data) =>
  println("Post summary:"),
  Store = get_store(), 
  Post = [],
  Diffs = [],
  Vs = [],
  foreach(I in 1..Data.len)
    IS = I.to_string,
    P     = Store.get("post "++IS).mean,
    Post  := Post  ++ [P],
    D     = Store.get("post "++IS++" diff").mean,
    Diffs := Diffs  ++ [D],
    Vs    := Vs ++ [[Data[I],P,D]]
  end,
  foreach([V,P,D] in Vs)
    println([orig=V,post=P,diff=D])
  end,
  println([sum_diffs=Diffs.sum,sum_abs_diffs=Diffs.map(abs).sum]),
  
  nl.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% show_histogram(Data)
% ------------------------------------------------------------
% Prints an ASCII histogram for numeric (continuous/discrete)
% or symbolic data.  Format:
%   value: count ### (probability / cumulative)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
show_histogram(Data) =>
    if Data == [] then
        println("Empty dataset."),
        fail
    end,
    Limit = get_config(histogram_limit,40),
    N = length(Data),
    Unique = sort(remove_dups(Data)),
    UniqueLen = Unique.len,
    % Determine data type    
    if number(Data[1]), UniqueLen > 1 then
        Min = min(Data), Max = max(Data),
        % Heuristic: if all are integers and count of unique <= 40 (Limit), discrete
        if integer(Data[1]), length(Unique) =< Limit  then
            show_histogram_discrete(Unique, Data, N)
        else
            show_histogram_continuous(Min, Max, Data, N, min(UniqueLen,Limit))
        end
    else
        show_histogram_symbolic(Data, N)
    end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Continuous numeric histogram
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
show_histogram_continuous(Min, Max, Data, N, Bins) =>
    Step = (Max - Min) / Bins,
    Counts = new_list(Bins, 0),
    foreach(X in Data)
        I = min(Bins, max(1, round((X - Min) / Step) + 1)),
        Counts[I] := Counts[I] + 1
    end,
    MaxCount = max(Counts),
    Cum = 0.0,
    printf("Histogram (total %d)\n", Data.len),
    foreach(I in 1..Bins)
        X0 = Min + (I-1)*Step,
        C = Counts[I],
        P = C / N,
        Cum := Cum + P,
        BarLen = round(60 * C / MaxCount),
        Bar = repeat_string("#", BarLen),
        printf("%7.3f: %5d %-60s (%.3f / %.3f)\n", X0, C, Bar, P, Cum)
    end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Discrete numeric histogram (integers)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
show_histogram_discrete(Unique, Data, N) =>
    printf("Histogram (total %d)\n", Data.len),
    MaxCount = max([count_eq(Data, X) : X in Unique]),
    Cum = 0.0,
    foreach(X in Unique)
        C = count_eq(Data, X),
        P = C / N,
        Cum := Cum + P,
        BarLen = round(60 * C / MaxCount),
        Bar = repeat_string("#", BarLen),
        printf("%7d: %5d %-60s (%.3f / %.3f)\n", X, C, Bar, P, Cum)

    end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Symbolic histogram
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
show_histogram_symbolic(Data, N) =>
    Symbols = sort(remove_dups(Data)),
    printf("Histogram (total %d)\n", Data.len),    
    MaxCount = max([count_eq(Data, S) : S in Symbols]),
    Cum = 0.0,
    foreach(S in Symbols)
        C = count_eq(Data, S),
        P = C / N,
        Cum := Cum + P,
        BarLen = round(60 * C / MaxCount),
        Bar = repeat_string("#", BarLen),
        printf("%-10w: %5d %-60s (%.3f / %.3f)\n", S, C, Bar, P, Cum)
    end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Helper: count occurrences of X in list
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
count_eq(List, X) = length([Y : Y in List, Y == X]).


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% show_percentiles(Data)                         % default grid
% show_percentiles(Data, Ps :: list(float))      % custom percentiles in (0,1)
%
% Output format:
% Percentiles:
% (0.01 64.50...)
% ...
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
show_percentiles(Data) =>
    DefaultPs = [0.001,0.01,0.025,0.05,0.10,0.25,0.50,0.75,0.84,0.90,0.95,0.975,0.99,0.999,0.9999,0.99999],
    show_percentiles(Data, DefaultPs).

show_percentiles(Data, Ps1) =>
    if Data == [] then
        println("Empty dataset."),
        fail
    end,
    Ps = get_config(percentiles,Ps1),
    println("Percentiles:"),
    if number(Data[1]) then
        % ---- Numeric: R's Type-7 interpolation ----
        Sorted = sort(Data),
        foreach(P in Ps)
            Q = percentile_numeric_type7(Sorted, P),
            % Use maximum double precision (up to 17 significant digits)
            printf("(%.15g %.17g)\n", P, Q)
        end
    else
        % ---- Discrete (integers or symbols): step CDF ----
        Syms = sort(remove_dups(Data)),
        Counts = [count_eq(Data, V) : V in Syms],
        N = length(Data),
        Cum = 0.0,
        CDFPairs = [],
        foreach(I in 1..length(Syms))
            Cum := Cum + Counts[I]/N,
            CDFPairs := CDFPairs ++ [(Syms[I], Cum)]
        end,
        foreach(P in Ps)
            V = quantile_step(CDFPairs, P),
            % P at high precision; V printed exactly (int/symbol)
            printf("(%.15g %w)\n", P, V)
        end
    end.

%
% Returns the percentiles
%
get_percentiles(Data) = get_percentiles(Data, DefaultPs) =>
    DefaultPs = [0.001,0.01,0.025,0.05,0.10,0.25,0.50,0.75,0.84,0.90,0.95,0.975,0.99,0.999,0.9999,0.99999].

get_percentiles(Data, Ps1) = Percentiles =>
    if Data == [] then
        println("Empty dataset."),
        fail
    end,
    Ps = get_config(percentiles,Ps1),
    Percentiles = [],
    if number(Data[1]) then
        % ---- Numeric: R's Type-7 interpolation ----
        Sorted = sort(Data),
        foreach(P in Ps)
            Q = percentile_numeric_type7(Sorted, P),
            % Use maximum double precision (up to 17 significant digits)
            % printf("(%.15g %.17g)\n", P, Q)
            Percentiles := Percentiles ++ [[P,Q]]
        end
    else
        % ---- Discrete (integers or symbols): step CDF ----
        Syms = sort(remove_dups(Data)),
        Counts = [count_eq(Data, V) : V in Syms],
        N = length(Data),
        Cum = 0.0,
        CDFPairs = [],
        foreach(I in 1..length(Syms))
            Cum := Cum + Counts[I]/N,
            CDFPairs := CDFPairs ++ [(Syms[I], Cum)]
        end,
        foreach(P in Ps)
            V = quantile_step(CDFPairs, P),
            % P at high precision; V printed exactly (int/symbol)
            % printf("(%.15g %w)\n", P, V)
            Percentiles := Percentiles ++ [[P,Q]]
        end
    end.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Numeric percentile — R's Type-7
% h = 1 + (n-1)p, q = x_[floor h] + (h-floor h)(x_[ceil h] - x_[floor h])
% Assumes Sorted is ascending and non-empty, p in (0,1)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
percentile_numeric_type7(Sorted, P) = Q =>
    N = length(Sorted),
    if P =< 0.0 then
        Q = Sorted[1]
    elseif P >= 1.0 then
        Q = Sorted[N]
    else
        H = 1.0 + (N - 1.0) * P,
        L = floor(H).to_int(),
        U = ceiling(H).to_int(),
        Gamma = H - L,
        Xl = Sorted[L],
        Xu = Sorted[U],
        Q = Xl + Gamma * (Xu - Xl)
    end.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% quantile_step(CDFPairs, P)
% ------------------------------------------------------------
% Return the smallest Value such that cumulative >= P.
% CDFPairs = [(Value1,Cum1), (Value2,Cum2), ...]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
quantile_step(CDFPairs, P) = V =>
    V = find_quantile_step(CDFPairs, P).

find_quantile_step([], _P) = none.  % should not happen if P<=1

find_quantile_step([(Val, Cum) | Rest], P) = cond(P =< Cum, Val,find_quantile_step(Rest, P)).


/*
  pdf_from_data(Data) 

  Returns a list of Val=Prob for the values in Data
  which corresponds to the PDF.

*/
pdf_from_data(Data) = Data.get_probs().sort.

/*
  cdf_from_data(Data) 

  Returns a list of cumulative probabiltiies 
  Val=CumProb for the values in Data

*/
cdf_from_data(Data) = CDF =>
  PDF = pdf_from_data(Data),
  CDF = [],
  Cum = 0,
  foreach(V=F in PDF)
    Cum := Cum + F, 
    CDF := CDF ++ [V=Cum]
  end.


/*
  cdf_from_pdf(PDF) 

  Returns a list of cumulative probabiltiies 
  Val=CumProb for the values in Data

*/

cdf_from_pdf(PDF) = CDF =>
  CDF = [],
  Cum = 0,
  foreach(V=F in PDF)
    Cum := Cum + F, 
    CDF := CDF ++ [V=Cum]
  end.

/*
  quantile_from_cdf(Data,Q) 

  Returns the value that corresponds to the
  Q'th quantile in the CDF (generated by cdf_from_data/1).

*/
quantile_from_cdf(CDF,Q) = Res =>
  OK = false,
  I = 1,
  while(OK == false)
    C=Val = CDF[I],
    if Val >= Q then
      OK := C
    else
      I := I + 1
    end
  end,
  Res = cond(OK==false,0,OK).

/*
  quantile_from_data(Data,Q) 

  Returns the value that corresponds to the
  Q'th quantile in data (via cdf_from_data/1 
  cdf_from_pdf/1).

*/
quantile_from_data(Data,Q) = Res =>
  CDF = cdf_from_data(Data),
  Res = quantile_from_cdf(CDF,Q).


/*

  data_sigma(Data, Sigma)

  Given a dataset Data = [X1,X2,...,Xn] and a multiple Sigma,
  compute:

    Mean = mean(Data)
    Stdev = stdev(Data)
    Val = Mean + Sigma * Stdev
    CDF = fraction of data <= Val
    UpperTail = 1.0 - CDF

  Returns [Val, UpperTail].

  Parameter constraints:
    * Data must be a list of numbers (length >= 2)
    * Sigma must be numeric

  Throws explicit domain_error on invalid input.


  Cf dist_sigma/2 (in ppl_distributions)

  Picat> D=data_sigma(normal_dist_n(100,15,1000000),4)
  D = [160.05431345860336,0.000027]

  Picat> D=dist_sigma($normal_dist(100,15),4)  
  D = [160.0,0.000031671241833]

*/

data_sigma(Data, Sigma) = [Val, UpperTail] =>
    if not list(Data) then
        throw($error('domain_error(list,Data),data_sigma/2'))
    elseif length(Data) < 2 then
        throw($error('domain_error(length,Data),data_sigma/2'))
    elseif not number(Sigma) then
        throw($error('domain_error(number,Sigma),data_sigma/2'))
    else
        N = length(Data),
        Mean = Data.mean, % sum(Data) / N,
        % Deviations = [X - Mean : X in Data],
        % Variance = sum([D * D : D in Deviations]) / N,
        Stdev = Data.stdev, % sqrt(Variance),
        Val = Mean + Sigma * Stdev,

        % Empirical CDF: fraction of points <= Val
        CountLe = length([X : X in Data, X =< Val]),
        CDF = CountLe / N,
        UpperTail = 1.0 - CDF
    end.


/*
  ------------------------------------------------------------
  data_quantile(Data, P)
  ------------------------------------------------------------
  Computes the empirical quantile of a dataset.

  Given:
    Data : list of numeric values
    P    : quantile probability in [0.0, 1.0]

  Returns:
    The empirical quantile value q such that
        fraction of Data <= q ≈ P

  Method:
    * Sorts Data in ascending order.
    * Uses linear interpolation between order statistics:
        position = (N - 1) * P + 1
        lower index = floor(position)
        upper index = ceiling(position)
        interpolates if needed.

  Parameter constraints:
    * Data must be a list of numbers, length >= 1
    * P must be between 0.0 and 1.0

  Throws explicit domain_error on invalid input.

  Note: This is a variant of get_percentiles/2 but
        returns only the value.

*/
data_quantile(Data, P) = Q =>
    if not list(Data) then
        throw($error('domain_error(list,Data),data_quantile/2'))
    elseif length(Data) == 0 then
        throw($error('domain_error(length,Data),data_quantile/2'))
    elseif P < 0.0 ; P > 1.0 then
        throw($error('domain_error(number,P),data_quantile/2'))
    else
        Sorted = sort(Data),
        N = length(Sorted),
        if N == 1 then
            Q = Sorted[1]
        else
            Pos = (N - 1) * P + 1.0,
            I = floor(Pos),
            F = Pos - I,
            if I < 1 then
                Q = Sorted[1]
            elseif I >= N then
                Q = Sorted[N]
            else
                Lower = Sorted[I],
                Upper = Sorted[I + 1],
                Q = Lower + F * (Upper - Lower)
            end
        end
    end.


/*
  A version of data_quantile/2
  data_percentiles(Data,1..99) 
  I.e. the percentiles are written as 1..99 instead of 0.01 and 0.99

*/
data_percentiles(Data,Pcts) = [data_quantile(Data,Q/100) : Q in Pcts].

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% show_hpd_intervals(Data)                          % default masses
% show_hpd_intervals(Data, Masses :: list(float))   % e.g. [0.84,0.9,0.95,0.99]
% Prints: HPD interval (Mass): Lower..Upper  for each Mass in list
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
show_hpd_intervals(Data) =>
    Default = [0.5,0.84,0.90,0.95,0.99,0.999,0.9999,0.99999],
    show_hpd_intervals(Data, Default).

show_hpd_intervals(Data, Masses1) =>
    println("HPD intervals:"),
    Masses = get_config(hpd_intervals,Masses1),
    if Data == [] then
      throw("show_hpd_intervals: empty dataset")
    elseif not is_all_numbers(Data) then
        % throw("show_hpd_intervals: data must be numeric")
        % println("show_hpd_intervals: data is not numeric")
        println("")
    else 
      foreach(M in Masses)
        (M =< 0.0 ; M > 1.0) ->
            throw("show_hpd_intervals: each Mass must be in (0,1]")
        ;
        (L,U) = hpd_interval(Data, M),
        printf("HPD interval (%w): %.14f..%.14f\n", M, L, U)
      end
    end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% hpd_interval(Data, Mass) = (Lower, Upper)
% Narrowest interval covering 'Mass' fraction of samples (numeric only).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
hpd_interval(Data, Mass) = (Lower, Upper) =>
    (Data == []) -> throw("hpd_interval: empty dataset")
    ;
    (not is_all_numbers(Data)) -> throw("hpd_interval: data must be numeric")
    ;
    (Mass =< 0.0 ; Mass > 1.0) -> throw("hpd_interval: Mass must be in (0,1]")
    ;
    Sorted = sort(Data),
    N = length(Sorted),
    K0 = round(ceiling(Mass * N)),
    K  = cond(K0 < 1, 1, cond(K0 > N, N, K0)),
    BestI = 1, BestW = 1.0e300,
    MaxI = N - K + 1,
    foreach(I in 1..MaxI)
        J = I + K - 1,
        W = Sorted[J] - Sorted[I],
        if W < BestW then
            BestW := W, BestI := I
        end
    end,
    Lower = Sorted[BestI],
    Upper = Sorted[BestI + K - 1].


/*

  (Equally tailed) credible intervals for a probability distribution.

  credible_intervals_exact(Dist,Q)
  Returns the Low,High value of the credible interval Q

  Note: 
  * This is NOT HPD intervals, but are simpler to
    calculate exact for a probability distribution.
    In general credible intervals are wider than HPDs.
  * I.e. this cannot be used for random generated data
    (which show_hpd_intervals do).

*/
credible_intervals_exact(Dist,HPD) = [V1,V2] =>
  T = HPD / 2,
  Low = 0.5 - T,
  High = 0.5 + T,
  V1 = quantile(Dist,Low),
  V2 = quantile(Dist,High).

show_credible_intervals_exact(Dist,Qs) =>
  println("Credible intervals (exact):"),
  foreach(Q in Qs)
    [V1,V2] = credible_intervals_exact(Dist,Q),
    if integer(V1), integer(V2) then
      printf("%w: %d..%d\n",Q,V1,V2),
    else
      printf("%w: %.15f .. %0.15f\n",Q,V1,V2)
    end
  end,
  nl.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Helper: ensure list is all numeric
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
is_all_numbers([]).
is_all_numbers([X|Xs]) :- number(X), is_all_numbers(Xs).


%
% Variance of Xs
%
variance(Xs) = Variance =>
    if number(Xs[1]) then
      Mu = avg(Xs),
      N  = Xs.len,
      % Variance = sum([ (X-Mu)**2 : X in Xs ]) / N
      Variance = sum([ safe_pow(X-Mu,2) : X in Xs ]) / N
    else
      if Xs[1] == false ; Xs[1] == true then
        Nums = [cond(V==true,1,0) : V in Xs],
        Variance = variance(Nums)
      else
        Variance = "data is not numeric"
      end
    end.

% Standard deviation
stdev(Xs) = sqrt(variance(Xs)).


/*
  Return the freqencies of the elements in List
*/
get_freq(List) = Map =>
  Map = new_map(),
  foreach(E in List)
    Map.put(E,Map.get(E,0)+1)
  end.

% alias for get_freq/1
collect(List) = get_freq(List).

show_freq(List) =>
  println("Show freqs:"),
  Freqs = get_freq(List),
  foreach(K in Freqs.keys.sort)
    println(K=Freqs.get(K))
  end.

show_freq_prob(List) =>
  println("Show freqs:"),
  Freqs = get_freq(List),
  Total = Freqs.values.sum,
  foreach(K in Freqs.keys.sort)
    F = Freqs.get(K),
    FT = F/Total,
    printf("%w: %d (%.16f)\n",K,F,FT)
  end.

%
% Print the values in list L and their normalized probabilities
% in decreasing order.
%
show_probs(L) =>
  println("Probabilities:"),
  foreach(K=V in get_probs(L))
    printf("%w: %.16f\n",K,V)
  end.

% Truncated version
% Configurate the size by setting global_map truncate_size
show_probs_trunc(L) =>
  Size = get_config(truncate_size,4),
  Probs = get_probs(L),
  Len = Probs.len,
  if Len > Size * 2 + 2 then
    println("Probabilities (truncated):"),
    Probs1 = Probs[1..Size],
    Probs2 = Probs[Len-Size+1..Len],
    S = "",
    foreach(K=V in Probs1)
      S := S ++ to_fstring("%w: %.16f\n",K,V)
    end,
    S := S ++ ".........\n",
    foreach(K=V in Probs2)
      S := S ++ to_fstring("%w: %.16f\n",K,V)
    end,    
    print(S)
    
  else
    % println("Probabilities:"),
    % foreach(K=V in Probs)
    %  printf("%w: %.16f\n",K,V)
    % end
    show_probs(L)
  end.

show_probs_rat(L) =>
  println("Probabilities:"),
  foreach(K=V in get_probs(L))
    printf("%w: %w\n",K,V.and_ratf)
  end.

% Truncated version
show_probs_rat_trunc(L) =>
  Size = get_config(truncate_size,4),  
  Probs = get_probs(L),
  Len = Probs.len,
  if Len > Size*2 + 2 then
    println("Probabilities (truncated):"),
    Probs1 = Probs[1..Size],
    Probs2 = Probs[Len-Size+1..Len],
    S = "",
    foreach(K=V in Probs1)
      S := S ++ to_fstring("%w: %w\n",K,V.and_ratf)
    end,
    S := S ++ ".........\n",
    foreach(K=V in Probs2)
      S := S ++ to_fstring("%w: %w\n",K,V.and_ratf)
    end,    
    print(S)
  else
    % println("Probabilities:"),
    show_probs_rat(L)
  end.


% Return the values of list L and their normalized probabilities
% in decreasing order.
get_probs(L) = Probs =>
  Len = L.len,
  Freq = L.get_freq.to_list.sort_down(2),
  Probs=[],
  foreach(K=V in Freq)
    Probs := Probs ++ [K=V/Len]
  end.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% PPAST: Summary statistics for numeric datasets
% ----------------------------------------------
% show_summary_stats(Data) -> prints a stats report
% Also exposes individual functions if you want programmatic access.
%
% Includes:
%  - Count, Min, Max, Sum, Mean
%  - Population/Sample Variance & Std Dev
%  - Skewness (Fisher’s unbiased), Kurtosis Excess (unbiased)
%  - Median, Q1, Q3 (R Type-7 percentiles)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

show_stats(Data) =>
  show_summary_stats(Data).
show_summary_stats(Data) =>
    (Data == []) -> throw("show_summary_stats: empty dataset")
    ;
    (not is_all_numbers(Data)) -> throw("show_summary_stats: data must be numeric")
    ;
    N = length(Data),
    Sorted = sort(Data),
    (Mean,M2,M3,M4) = online_moments(Data),

    VarPop = cond(N > 0, M2/N, 0.0),
    VarSmp = cond(N > 1, M2/(N-1), 0.0),
    StdPop = sqrt(VarPop),
    StdSmp = sqrt(VarSmp),

    Skew  = skewness_unbiased(N, M2, M3),          % NaN if N<3
    KurtE = kurtosis_excess_unbiased(N, M2, M4),   % NaN if N<4

    Med = median_sorted(Sorted),
    Q1  = percentile_numeric_type7(Sorted, 0.25),
    Q3  = percentile_numeric_type7(Sorted, 0.75),

    println("Summary statistics"),
    printf("Count:               %d\n", N),
    printf("Min:                 %.17g\n", Sorted[1]),
    printf("Max:                 %.17g\n", Sorted[N]),
    printf("Sum:                 %.17g\n", sum(Data)),
    printf("Mean:                %.17g\n", Mean),
    printf("Variance (pop):      %.17g\n", VarPop),
    printf("Std dev (pop):       %.17g\n", StdPop),
    printf("Variance (sample):   %.17g\n", VarSmp),
    printf("Std dev (sample):    %.17g\n", StdSmp),
    printf("Skewness (unbiased): %.17g\n", Skew),
    printf("Kurtosis excess:     %.17g\n", KurtE),
    printf("Median:              %.17g\n", Med),
    printf("Q1 (25%%):            %.17g\n", Q1),
    printf("Q3 (75%%):            %.17g\n", Q3).


show_simple_stats(Data) =>
  if not number(Data[1]) then
    println("show_simple_stats: no numeric data")
  else
    println([len=Data.len,min=Data.min,mean=Data.mean,median=Data.median,max=Data.max,variance=Data.variance,stdev=Data.stdev])
  end.


% Mean value of data
mean(Data) = Res =>
  % Convert boolean to 0/1
  if Data[1] == true ; Data[1] == false then
    Data := [cond(D==true,1,0) : D in Data]
  end,
  if number(Data[1]) then
    Res = cond(number(Data[1]), sum(Data) / length(Data), not_numeric)
  else
    Res = ""
    % Res = get_probs(Data),
    % if Res.len > 20 then
    %   println("Mean (truncated)"),
    %   Res := Res[1..20]
    % end
  end.


% Variance
variance_population(Data) = (M2/length(Data)) =>
    (_,M2,_,_) = online_moments(Data).

variance_sample(Data) = (M2/(length(Data)-1)) =>
    (length(Data) > 1) -> true ; throw("variance_sample: need N>1"),
    (_,M2,_,_) = online_moments(Data).

std_population(Data) = sqrt(variance_population(Data)).
std_sample(Data)     = sqrt(variance_sample(Data)).

% skewness(Data) = skewness_unbiased(N,M2,M3) =>
%    (N, M2, M3, _) = online_moments(Data).

kurtosis_excess(Data) = kurtosis_excess_unbiased(N,M2,M4) =>
    (N, M2, _, M4) = online_moments(Data).
% kurtosis(Data) = kurtosis_excess(Data).

/*
------------------------------------------------------------
skewness(Data)
------------------------------------------------------------
Computes the skewness of a numeric dataset, returning 0.0
if the variance is zero (i.e., all values are identical).

Formula:
    skewness = E[(X - mean)^3] / (stdev^3)

Safe behavior:
  * Returns 0.0 when variance = 0.0
  * Throws explicit domain_error for non-list or empty Data

Parameters:
  Data : list of numeric values (length >= 1)

Returns:
  Skewness : float value (0.0 for constant data)

Example:
  skewness([1,2,3,4,5]) -> 0.0
  skewness([1,1,1,1])   -> 0.0
------------------------------------------------------------
*/
skewness(Data) = S =>
    if not list(Data) then
        throw($error('domain_error(list,Data),safe_skewness/1'))
    elseif length(Data) == 0 then
        throw($error('domain_error(length,Data),safe_skewness/1'))
    else
        N = length(Data),
        Mean = sum(Data) / N,
        Devs = [X - Mean : X in Data],
        Var = sum([D * D : D in Devs]) / N,
        if Var =:= 0.0 then
            S = 0.0
        else
            SD = sqrt(Var),
            % Num = sum([pow(D,3) : D in Devs]) / N,
            Num = sum([safe_pow(D,3) : D in Devs]) / N,            
            S = Num / pow(SD, 3)
        end
    end.

/*
------------------------------------------------------------
kurtosis(Data)
------------------------------------------------------------
Computes the excess kurtosis of a numeric dataset,
returning 0.0 if the variance is zero (i.e., all values are identical).

Formula:
    kurtosis = E[(X - mean)^4] / (stdev^4) - 3

Safe behavior:
  * Returns 0.0 when variance = 0.0
  * Throws explicit domain_error for non-list or empty Data

Parameters:
  Data : list of numeric values (length >= 1)

Returns:
  Kurtosis : float value (0.0 for constant data)

Example:
  kurtosis([1,2,3,4,5]) -> -1.3
  kurtosis([1,1,1,1])   -> 0.0
  kurtosis(normal_dist_n(0,1,1000000)) -> approx 0.0
------------------------------------------------------------
*/
kurtosis(Data) = K =>
    if not list(Data) then
        throw($error('domain_error(list,Data),safe_kurtosis/1'))
    elseif length(Data) == 0 then
        throw($error('domain_error(length,Data),safe_kurtosis/1'))
    else
        N = length(Data),
        Mean = sum(Data) / N,
        Devs = [X - Mean : X in Data],
        Var = sum([D * D : D in Devs]) / N,
        if Var =:= 0.0 then
            K = 0.0
        else
            SD = sqrt(Var),
            Num = sum([pow(D,4) : D in Devs]) / N,
            K = Num / pow(SD, 4) - 3.0
        end
    end.


median(Data) = median_sorted(sort(Data)).

q1(Data) = percentile_numeric_type7(sort(Data), 0.25).
q3(Data) = percentile_numeric_type7(sort(Data), 0.75).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Online one-pass moments (mean, M2, M3, M4) — Pébay/West/Chan update
% Returns (Mean, M2, M3, M4); also returns N via length(Data) when needed.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
online_moments(Data) = (Mean, M2, M3, M4) =>
    N = 0, Mean = 0.0, M2 = 0.0, M3 = 0.0, M4 = 0.0,
    foreach(X in Data)
        N1 = N + 1,
        Delta = X - Mean,
        DeltaN = Delta / N1,
        Term1 = Delta * DeltaN * N,
        M4 := M4 + Term1*DeltaN*DeltaN*(N1*N1 - 3*N1 + 3)
                    + 6.0*DeltaN*DeltaN*M2
                    + 4.0*DeltaN*M3,
        M3 := M3 + Term1*DeltaN*(N1 - 2) - 3.0*DeltaN*M2,
        M2 := M2 + Term1,
        Mean := Mean + DeltaN,
        N := N1
    end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Unbiased skewness (Fisher-Pearson) and unbiased kurtosis excess
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
skewness_unbiased(N, M2, M3) = S =>
    if N < 3 ; M2 =:= 0.0 then
        throw($error('skewness_unbiased: N >= 3, M2 != 0.0'))
        % S = 0.0/0.0   % NaN
    else
        G = M3 / pow(M2, 1.5),
        S = sqrt(N*(N-1)) / (N-2) * G
    end.

kurtosis_excess_unbiased(N, M2, M4) = K =>
    println($kurtosis_excess_unbiased(N, M2, M4)),
    if N < 4 ; M2 =:= 0.0 then
        throw($error('skewness_unbiased: N >= 4, M2 != 0.0'))
        % K = 0.0/0.0   % NaN
    else
        G2 = M4 / (M2*M2) - 3.0, % excess kurtosis (biased)
        % Unbiased adjustment:
        K = ((N-1.0)/((N-2.0)*(N-3.0))) * ((N+1.0)*G2 + 6.0)
    end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Median for a sorted vector
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
median_sorted(Sorted) = Med =>
    N = length(Sorted),
    Mid = N // 2,
    Med = cond(N mod 2 == 1,
               Sorted[Mid+1],
               0.5*(Sorted[Mid] + Sorted[Mid+1])).


/*
  draw_without_replacement(N,A)

  Return N elements from the list A, drawn without replacement

*/
draw_without_replacement(N,A) = draw_without_replacement(N,A,[]).
draw_without_replacement(N,A,Ns) = Res =>
  if N == 0 ; A == [] then
    Res = Ns
  else
    D = uniform_draw(A),
    NewA = delete(A,D),
    Res = draw_without_replacement(N-1,NewA,Ns++[D])
  end.

% Draw all 
draw_without_replacement(A) = draw_without_replacement(A.len,A).

/*
  resample(N,Data)

  Draw N elements from the list Data, with replacement.
*/
resample(N,Data) = L =>
  L = [],
  foreach(_ in 1..N)
    L := L ++ [uniform_draw(Data)]
  end.

% Draw Data.len elements from Data
resample(Data) = resample(Data.len,Data).
resample_all(Data) = resample(Data.len,Data).


/*
  draw_until_pattern(List,Pattern,Choices)

  Draw from Choices until the Pattern occurs.
  Return the list of the drawn elements

*/ 
draw_until_pattern(Lst,Pattern,Choices) = Res =>
  if append(_,Pattern,Lst) then
    Res = Lst
  else
    Res = draw_until_pattern(Lst ++ [uniform_draw(Choices)],Pattern,Choices)
  end.


/*
  flip_until_pattern(Patterns,Lst)

  Toss a toss until one of the patterns in Patterns
  occurs. Return the generated list and the successful pattern.
*/
flip_until_pattern(Patterns,Lst) = Res =>
  MinPatternLen = [Pt.len : Pt in Patterns].min,
  LstLen = Lst.len,
  New = bern(1/2),
  if LstLen >= MinPatternLen then
    PCheck = [],
    foreach(P in Patterns,break(PCheck != []))
      if append(_,P,Lst) then
        PCheck := P
      end
    end,
    if PCheck != [] then
      Res = [Lst,PCheck]
    else
      Res = flip_until_pattern(Patterns,Lst ++ [New])
    end
  else
    Res = flip_until_pattern(Patterns,Lst ++ [New])
  end.


/*
  draw_until_one_pattern(Lst,Patterns,Choices) 

  Draw from Choices until one of the pattern in Pattern occurs.
  Return the generated list and the winning pattern.

  (This is a generalization of flip_until_pattern/3)
*/
draw_until_one_pattern(Lst,Patterns,Choices) = Res =>
  MinPatternLen = [Pt.len : Pt in Patterns].min,
  LstLen = Lst.len,
  New = uniform_draw(Choices),
  if LstLen >= MinPatternLen then
    PCheck = [],
    foreach(P in Patterns,break(PCheck != []))
      if once(append(_,P,Lst)) then
        PCheck := P
      end
    end,
    if PCheck != [] then
      Res = [Lst,PCheck]
    else
      Res = draw_until_one_pattern(Lst++[New],Patterns,Choices)
    end
  else
     Res = draw_until_one_pattern(Lst++[New],Patterns,Choices)  
  end.

/*

  flip_until_n_success(P,N)

  Returns the number of draws until N Successes. 
  
  P is the probability of success.

*/
flip_until_n_successes(P,N) = Res =>
  C = 0,
  OK = false,
  NumSuccesses = 0,
  while (OK == false)
    C := C + 1,
    B = bern(P),
    if B == 1 then
      NumSuccesses := NumSuccesses + 1
    end,
    if NumSuccesses == N then
      OK := true
    end
  end,
  Res = C.

/*
  count_occurrences(What,L)

  Returns the number of occurrences of elements What in the list L.

  Note: For many calls to count_occurrences/2 it might be better
        to use get_freq(L) which returns a map of the counts in L.

*/
count_occurrences(What,L) = [1 : T in L, T == What].sum.

/*
  count_occurrences_list(What,L)

  Return the number of occurrences for each element in What.
*/
count_occurrences_list(What,L) = [count_occurrences(W,L) : W in What].

%
% Count the number of matching sublists sub in the list lst
%
count_occurrences_sublist(Sub,L) = [S : S in sublists(L,Sub.len), S == Sub].len.

%
% simplex(L)
% to_probabilities(L)
%
% Normalize the values in L to sum to 1.
% 
simplex(L) = [ V / Sum : V in L] =>
  Sum = L.sum.
to_probabilities(L) = simplex(L).


/*
  case(Val, List)

  Given a list List of different alternative [V,R] results,
  case/2 returns the value R for this Val == V.

  Example:
  Toss = case(Coin,[["double-head","head"],
                    ["double-tail","tail"],
                    [default,categorical([1/2,1/2],["head","tail"])]
                    ])

*/
case(Val,List) = R =>
  if member([Val,R],List) then
    true
  else
    member([default,R],List)
  end.

/*

  cases(L)
  The list L contains pair of [condition,return value],
  and returns the first return value that has a matching 
  condition.

  Example:
  FlipCoin = cases([[Coin=="double-head","head"],
                    [Coin=="double-tail","tail"],
                    [Toss=="tail","head"],
                    [Toss=="head","tail"]])

*/
cases(L) = Ret =>
   Ret = _,
   Found = false,
   foreach([V,T] in L, break(Found != false))
     if cond(V,true,false) == true then
       Ret = T,       
       Found := true
     end
   end.

/*
  Wrapper for cond(..=true,...,...)
  i.e. skipping the == part
*/
condt(Cond,Then,Else) = cond(Cond==true,Then,Else).

/*
  check(Cond) 
  Return true or false depending on the truthfullness of the condition Cond.

  (An even shorter variant of cond/3.)

*/
check(Cond) = cond(Cond, true, false).
check1(Cond) = cond(Cond, 1, 0).

/*
  mod_replace(Val,Mod)

  Return Mod if Val mod Mod = 0, else Val mod Mod.

*/
mod_replace(Val,Mod) = cond(Res==0,Mod,Res) =>
  Res = Val mod Mod.


/*
  xor(A,B) 
  Exactly one of A or B is true, but not both.
*/
xor(A,B) = cond( ((A,not B) ; (not A, B)),true,false).


% Boolean -> Integer
b2i(V) = cond(V==true,1,0).


%
% Swap position I <=> J in list L
%
swap(L,I,J) = L2, list(L) =>
  L2 = L,
  T = L2[I],
  L2[I] := L2[J],
  L2[J] := T.

% Returns a shuffled list
shuffle(L) = shuffle1(L).shuffle1. % One loop does not seems to be enough...
shuffle1(List) = List2 => 
  List2 = List,
  Len = List.length,
  foreach(I in 1..Len) 
    R2 = random(1,Len),
    List2 := swap(List2,I,R2)
  end.


/*
  all_different(L)

  True if all values in L are different.

*/
all_different(L) = L.remove_dups.len == L.len.


/*
  debug(S) 
  Simple debugging (in function context):
  Prints S and return the value S.

*/
debug(S) = S => println(S).
debug(Text,S) = S => println(Text=S).


/*
  rep(N,V)

  Return N copies of the value V
*/
rep(N,V) = [V : _ in 1..N].


/*
  print_list(L)
  print each values in the list at a separate line
*/
print_list(L) =>
  foreach(E in L)
    println(E)
  end.

/*
  printf_list(L) 
  (Trying to) print a little more intelligent output of the list L (than print_list/1)

*/

check_format(V) = F =>
    if integer(E) then
      F = "%2d "
    elseif float(E) then
      F ="%.17f "
    else
      F = "%2w "
    end.

printf_list(L) =>
  Format = "",
  First = L[1],
  if list(First) ; compound(First) then
    foreach(E in L.first)
      Format := Format ++ check_format(E)
    end
  else
    if number(First) then
      Format := Format ++ check_format(First)
    else
      Format := Format ++ "%w "
    end
  end,
  Format := Format ++ "\n",
  foreach(E in L)  
    if E = $(V '=' P) then
      call(printf,Format,V,P)
    else
      call(printf,Format,E)    
    end
  end.


/*
  index_of_sub(List,Sub)

  Return the first position that sub list Sub occurs in list A
  If there is no such occurrence, return the length of a
*/
index_of_sub(List,Sub) = Res =>
  if once(append(B,Sub,_,List)) then
    % Res = B.len+1, % return position of the start of the sub
    % This is what my Gamble returned:
    Res = B.len+Sub.len % return position of the end of the sub
  else
    Res = List.len
  end.


/*
  sublists(L,N)

  Returns all N sized sub lists of L.

*/
sublists(L,N) = [  L[I..I+N-1] : I in 1..Len-N+1] => Len = L.len.


/*
  ones(N,Val) 

  Return a list of N Val
*/
ones(N,Val) = [Val : _ in 1..N].


/*
  num_runs(A) 

  returns the number of runs in the list A.
*/
num_runs([]) = 0.
num_runs(A) = num_runs(A.head,A.tail,0).
num_runs(T,A,Acc) = Res =>
  if A.len == 0 then
    Res = cond(T == A, Acc,Acc+1)     
  else
    FirstA = A.head,
    RestA  = A.tail,
    Res = num_runs(FirstA,RestA,Acc + cond(T == FirstA,0,1))
  end.


%
% argmin/argmax
% find the index/indices for the min/max value(s) of L
% 
% Picat> argmin([1,2,3,1000,4,5])=X
% X = [1]
% 
argmin(L) = MinIxs =>
  Min = min(L),
  MinIxs = [I : I in 1..L.length, L[I] == Min].


argmax(L) = MaxIxs =>
  Max = max(L),
  MaxIxs = [I : I in 1..L.length, L[I] == Max].

%
% argmin_random_ties/argmax_random_ties
%
% find the index/indices for the min/max value(s) of L.
% For ties, pick any of the candicates
%
argmin_random_ties(L) = Res =>
  M = argmin(L),
  Res = uniform_draw(M).

argmax_random_ties(L) = Res =>
  M = argmin(L),
  Res = uniform_draw(M).

/*
  argsort(L) = P
  P contains the indices to permute L to a sorted list.
   L[P[1]] represents the smallest value in L,
   L[P[2]] represents the second smallest value in L, etc.

  Picat> L=[2,3,1,5,4], L.argsort=P
  L = [2,3,1,5,4]
  P = [3,1,2,5,4]

*/
argsort(L) = zip(L,1..L.len).sort.map(second).


/*
  check_duplicates(L)

  Returns true if there are any duplicates in list L, else false.
*/
check_duplicates(L) = cond(L.remove_dups.len != L.len,true,false).


%
% (equivalence s1 s2)
% Equivalence of boolean statements: s1 <=> s2
% i.e. if s1 is true -> s2 is true
%      if s2 is true -> s1 is true
%
equivalence(S1,S2) =>
  if cond(S1,true,false) == cond(S2,true,false) then
    observe(true)
  else
    observe(false)
  end.

%
% (implication s1 s2)
% Implication of boolean statements: s1 => s2
% i.e. if s1 is true -> s2 is true
% Supports both "plain" conditions as well as lambda based conditions
%
implication(S1,S2) => 
  % Convert lambda based condition to plain conditions
  if cond(S1 == true,true,false) == true then observe( cond(S2==true,true,false) == true) end. 


% Pearson correlation coefficient
% correlation(Xs, Ys)
%   Xs, Ys : lists of equal length with numeric values
%
correlation_coefficient(Xs, Ys) = R =>
    LenX = length(Xs),
    LenY = length(Ys),
    if LenX =\= LenY then
        throw($error('correlation: lists must have same length'))
    elseif LenX =:= 0 then
        throw($error('correlation: empty lists'))
    else
        MeanX = sum(Xs) / LenX,
        MeanY = sum(Ys) / LenY,

        % deviations
        DXs = [X - MeanX : X in Xs],
        DYs = [Y - MeanY : Y in Ys],

        % numerator: sum((X - MeanX)*(Y - MeanY))
        Num = sum([DXs[I] * DYs[I] : I in 1..LenX]),

        % denominator: sqrt(sum(dx^2) * sum(dy^2))
        DenX = sqrt(sum([DX * DX : DX in DXs])),
        DenY = sqrt(sum([DY * DY : DY in DYs])),

        if DenX =:= 0.0 ; DenY =:= 0.0 then
            throw($error('correlation: zero variance in one list'))
        else
            R = Num / (DenX * DenY)
        end
    end.


/*
% CHECK THIS
% Sample covariance (population form uses /N; sample uses /(N-1))
covariance(Xs, Ys) = Cov =>
    Len = length(Xs),
    if Len =\= length(Ys) then
        throw($error('covariance: lists must have same length'))
    elseif Len =:= 0 then
        throw($error('covariance: empty lists'))
    else
        MeanX = sum(Xs)/Len, MeanY = sum(Ys)/Len,
        Cov = sum([(Xs[I]-MeanX)*(Ys[I]-MeanY) : I in 1..Len]) / Len
    end.
*/

/*
% CHECK THIS
% (Optional) sample correlation uses sample std devs; algebraically,
% the / (N-1) factors cancel, so it equals the same r as before.
sample_correlation(Xs, Ys) = R =>
    Len = length(Xs),
    if Len =\= length(Ys) then
        throw($error('sample_correlation: lists must have same length'))
    elseif Len < 2 then
        throw($error('sample_correlation: need at least 2 points'))
    else
        MeanX = sum(Xs)/Len, MeanY = sum(Ys)/Len,
        DX = [X-MeanX : X in Xs], DY = [Y-MeanY : Y in Ys],
        Num = sum([DX[I]*DY[I] : I in 1..Len]),
        Sxx = sum([DX[I]*DX[I] : I in 1..Len]),
        Syy = sum([DY[I]*DY[I] : I in 1..Len]),
        if Sxx =:= 0.0 ; Syy =:= 0.0 then
            throw($error('sample_correlation: zero variance in one list'))
        else
            R = Num / sqrt(Sxx*Syy)   % same as correlation/2
        end
    end.
*/


/*
  but_last(L) 
  Return all but the last element in L
*/
but_last(L) = L[1..L.length-1].

/*
  rest(L) 
  Returns the rest/tail of the list L

*/
rest(L) = tail(L).


/*
  take_last(A,N)

  Returns the last N elements of the list A.
*/
take_last(A,N) = Res =>
  if A.len < N then
    Res = []
  else
    Len = A.len,
    Res = A[Len-N+1..Len]
  end.


/*
  get_runs(As)
  Return the runs of the list As

*/
get_runs(As) = get_runs1(As.rest, [[As.first]]).
get_runs1(As,Aux) = Res =>
  if As == [] then
    Res = Aux
  else
    ButLastAux = but_last(Aux),
    LastAux    = last(Aux),
    AuxVal     = LastAux.first,
    A          = As.first,
    if AuxVal == A then
      Res = get_runs1(As.rest,ButLastAux ++ [[A]++LastAux])
    else
      Res = get_runs1(As.rest,Aux ++ [[A]])
    end
  end.

% Return the lengths of the runs in Xs
get_runs_lens(Xs) = get_runs(Xs).map(len).

/*
  get_runs_op(Xs,Op)

  This is a generalized version of get_runs/1, which is the same as get_runs_op(Xs,$=).
  The Op can be about anything, but it must adhere to the type of the elements in the list Xs.

*/
get_runs_op(Xs,Op) = get_runs_op(Xs.rest,[[Xs.first]],Op).
get_runs_op(As,Aux,Op) = Res =>
  if As == [] then
    Res = Aux
  else
    ButLastAux = but_last(Aux),
    LastAux    = last(Aux),
    AuxVal     = first(LastAux),
    A          = first(As),
    F =.. [Op,AuxVal,A],
    if cond(F.call,true,false) == true then
      Res = get_runs_op(As.rest,Aux ++ [[A]],Op)
    else
      Res = get_runs_op(As.rest,ButLastAux ++ [LastAux++[A]],Op)
    end
  end.


% Generate N 0/1 runs with probability P of success
generate_01_runs(N,P) = get_runs(bern_n(P,N)).

% Return the runs with value vals
filter_runs(Runs,Val) = [Run : Run in Runs, Run[1] == Val].


/*
  Records

*/


/*
  get_records(A) 
  get_records(A,Op) 

  Return all records in the list A
  * Op: The comparison operator, default $(>)

  Note: This supports the operations: <, <=, !=, >, >= 
  (and '=' but that only compare against the first element)

  And also user defined operators.

*/
get_records(A) =  get_records(A.rest,A.first,[A.first],$(>)).
get_records(A,Op) = get_records(A.rest,A.first,[A.first],Op).
get_records(A,MaxVal,Aux,Op) = Res =>
  if A == [] then
    Res = Aux
  else
    Val = A.first,
    F =.. [Op,Val,MaxVal],
    if cond(F.call,true,false) == true then
      Res = get_records(A.rest,Val,Aux++[Val],Op)
    else
      Res = get_records(A.rest,MaxVal,Aux,Op)
    end
  end.
  
/*
  get_records_geq(A) 

  Weak records, i.e. a record is defined as >= instead of >
*/
get_records_geq(A) = get_records(A,($>=)).
  
/*
  get_records_ix(A) 
  get_records_ix(A,Op) 

  Return all indices of the records in the list A
  * Op: The comparison parameter, default $(>)

  Note: This supports the operations: <, <=, !=, >, >= 
  (and '=' but that only compare against the first element)

  And also user defined operators.

*/
get_records_ix(A) =  get_records_ix(2,A.rest,A.first,[1],$(>)).
get_records_ix(A,Op) =  get_records_ix(2,A.rest,A.first,[1],Op).
get_records_ix(Ix,A,MaxVal,Aux,Op) = Res =>
  if A == [] then
    Res = Aux
  else
    Val = A.first,
    F =.. [Op,Val,MaxVal],
    if cond(F.call,true,false) == true then
      Res = get_records_ix(Ix+1,A.rest,Val,Aux++[Ix],Op)
    else
      Res = get_records_ix(Ix+1,A.rest,MaxVal,Aux,Op)
    end
  end.

% >= instead of >
get_records_ix_geq(A) = get_records_ix(A,$(>=)).


/*
  comb_sum(Len,N,Sum)

  Return a list of all valid combinations of length len with 
  the values 0..N that sums to Sum

*/
comb_sum(Len,N,Sum) = solve_all(X).sort =>
  X = new_list(Len),
  X :: 0..N,
  sum(X) #= Sum.


/*
   Show the plot_ascii_scatter plot.

   Config: scatter_plot_size

*/
show_scatter_plot(Data) =>
  [Width,Height] = get_config(scatter_plot_size,[60,20]),  
  show_scatter_plot(Data,Width,Height).
show_scatter_plot(Data,Width,Height) =>
  println("Scatter plot:"),
  nl,
  if (list(Data[1]),not number(Data[1,1]))
     ;
     (not list(Data[1]), not number(Data[1]))
     then
     println("show_scatter_plot: Data is not numeric")
  else  
    [Plot,MinX,MaxX,_MinY,_MaxY] = plot_ascii_scatter(Data,Width,Height),
    foreach(P in Plot)
      println(P)
    end,
    println(x=[MinX,MaxX]),
    nl
  end.


/*
------------------------------------------------------------
plot_ascii_scatter(Data, Width, Height)
------------------------------------------------------------
Creates an ASCII scatter plot from a list of (X,Y) points.

Parameters:
  Data   : list of [X,Y] pairs, numeric
  Width  : number of columns in the plot  (e.g., 60)
  Height : number of rows in the plot     (e.g., 20)

Features:
  * Automatic scaling of X and Y axes
  * Uses '.' for empty space and '*' for plotted points
  * Points outside the bounding box are ignored
  * Returns a list of strings (each row)
  * To print, use: foreach(Row in Plot) println(Row) end

Example:
  Data = [[1,2],[2,3],[3,5],[4,4]],
  Plot = plot_ascii_scatter(Data,60,20),
  foreach(R in Plot) println(R) end.
------------------------------------------------------------
*/

plot_ascii_scatter(Data) = plot_ascii_scatter(Data, 60, 20).
plot_ascii_scatter(Data, Width, Height) = [Plot,MinX,MaxX,MinY,MaxY] =>
    if not list(Data) then
        throw($error('domain_error(list,Data),plot_ascii_scatter/3'))
    elseif Width =< 1 ; Height =< 1 then
        throw($error('domain_error(size,[Width,Height]),plot_ascii_scatter/3'))
    else
        if number(Data[1]) then
          Data := [ [I,Data[I]] : I in 1..Data.len]
        end,
        
        % Extract X and Y
        % Xs = [X : [X,_] in Data],
        % Ys = [Y : [_,Y] in Data],
        [Xs,Ys] = Data.transpose,

        MinX = min(Xs),
        MaxX = max(Xs),
        MinY = min(Ys),
        MaxY = max(Ys),

        DX = MaxX - MinX,
        DY = MaxY - MinY,

        % Avoid division by zero by expanding degenerate ranges
        if DX =:= 0.0 then DX := 1.0 end,
        if DY =:= 0.0 then DY := 1.0 end,

        % Initialize blank grid as list of lists
        Grid0 = [ [" " : _ in 1..Width] : _ in 1..Height ],

        % Plot points
        Grid = plot_ascii_points(Data,Grid0,Width,Height,MinX,MinY,DX,DY),

        % Convert rows to strings
        Plot = [ymax=MaxY],
        Plot := Plot ++ [ "|" ++ Row.join('') : Row in Grid ],
        Plot := Plot ++ [ymin=MinY]
    end.


/*
------------------------------------------------------------
plot_ascii_points(Data,Grid,Width,Height,MinX,MinY,DX,DY)
------------------------------------------------------------
Internal helper: places '*' into the grid.
------------------------------------------------------------
*/
plot_ascii_points([],Grid,_,_,_,_,_,_) = Grid.
plot_ascii_points([[X,Y]|Rest],Grid0,Width,Height,MinX,MinY,DX,DY) = Grid =>
    % Convert numeric coordinates to grid coordinates
    GXfloat = (X - MinX) * (Width - 1) / DX,
    GYfloat = (Y - MinY) * (Height - 1) / DY,

    GX = round(GXfloat),
    GY = Height - 1 - round(GYfloat),   % Flip Y-axis (top=0)

    if GX >= 0, GX < Width, GY >= 0, GY < Height then
        % Replace '.' with '*' at (GY,GX)
        Row = Grid0[GY+1],
        NewRow = replace_nth(Row,GX+1,"*"),
        Grid1 = replace_nth(Grid0,GY+1,NewRow),
        Grid = plot_ascii_points(Rest,Grid1,Width,Height,MinX,MinY,DX,DY)
    else
        % Point outside plot → ignore
        Grid = plot_ascii_points(Rest,Grid0,Width,Height,MinX,MinY,DX,DY)
    end.


/*
------------------------------------------------------------
replace_nth(List,N,Val)
------------------------------------------------------------
Replaces the Nth element (1-based) of a list.
------------------------------------------------------------
*/
replace_nth([_|Rest],1,Val) = [Val|Rest] => true.
replace_nth([H|Rest],N,Val) = [H|replace_nth(Rest,N-1,Val)] =>
    N > 1.


/*

Notes on the effect of dataset size in ABC distance metrics


The ABC distance measures used here (Wasserstein, normalized RMSE,
KS, and the combined abc_fit_score) behave differently depending on
the sample sizes of the observed data and the posterior predictive
data. The following summarizes their sensitivity.

1. Wasserstein distance (mean absolute percentile difference)
   - Very robust to differences in sample size.
   - Depends mainly on the underlying distribution, not on how many
     samples were drawn.
   - Works well even when the two datasets have different lengths.
   - Becomes noisy only when a dataset is very small (< 50 samples).

2. Normalized RMSE over percentiles
   - Behaves similarly to the Wasserstein distance.
   - Mostly insensitive to unequal dataset sizes.
   - Good global measure of mismatch.

3. Kolmogorov-Smirnov (KS) statistic
   - Moderately sensitive to sample-size differences.
   - KS uses the maximum difference between empirical CDFs; the CDF
     of a smaller dataset is more "steppy", which increases noise.
   - Still reliable unless the smaller dataset is very small (< 30).

4. Combined abc_fit_score (Wnorm + KS)
   - Dominated by the normalized Wasserstein part (Wnorm).
   - Only slightly affected by KS noise when dataset sizes differ.
   - Typical variation due to sample-size differences is small
     (often around 0.02 to 0.05 in the score).
   - Remains stable for practical ABC usage.

Rule of thumb:
   * Data size >= 50    gives reasonable stability.
   * Data size >= 100   gives very good stability.
   * Posterior predictive size >= 1000 is ideal.

In summary:
   Wasserstein and normalized RMSE are highly robust to unequal
   sample sizes, KS is moderately sensitive, and the abc_fit_score
   remains stable because the Wasserstein component dominates.

*/


/*
  The RMSE over percentiles measures the typical difference between
  the empirical percentiles of the observed data and those of the
  posterior predictive sample. The value is always expressed in the
  same units as the data.

  Small RMSE means the two distributions are similar. As a rule of
  thumb:
  - RMSE < 0.5 * stdev(data): very good agreement
  - RMSE ≈ stdev(data): moderate mismatch
  - RMSE > stdev(data): poor match
  - RMSE << IQR/10: excellent match across the entire distribution

  To compare across different scales, use RMSE / stdev(data), which
  behaves like an effect size.

*/
rmse_percentiles(Data,Post) = RMSE =>
  Qs = [Q : Q in 0.01..0.01..0.99],
  PData = [data_quantile(Data,Q) : Q in Qs],
  PPost = [data_quantile(Post,Q) : Q in Qs],  
  RMSE = sqrt( sum([ (Pi - Qi)**2 : {Pi,Qi} in zip(PData,PPost)]) / length(PData) ).

/*
 Relationship to “effect size”

 RMSE can be interpreted similarly to Cohen’s d or effect size measures:

 Normalized_RMSE = RMSE / stdev(data)

 Meaning:
 < 0.2 -> tiny difference
 0.2–0.5 -> small
 0.5–0.8 -> moderate
 0.8 -> large
 1.2 - very large
 2.0 -> distributions do not match at all

  This helps compare RMSE across different scales.
*/
normalized_rmse(Data,Post) = rmse_percentiles(Data,Post) / stdev(Data).

/*
--------------------------------------------------------------------
normalized_rmse_explained(NRMSE)
--------------------------------------------------------------------
Given a normalized RMSE (RMSE / stdev(data)),
returns a textual interpretation.

Thresholds (interpreting effect size):
  NRMSE < 0.20  -> very good match
  0.20–0.50     -> good
  0.50–0.80     -> moderate mismatch
  0.80–1.20     -> strong mismatch
  >1.20         -> poor match / distributions differ a lot

Returns a string with a brief explanation.

Example:
  normalized_rmse_explained(0.09)
  "Excellent match (very small difference between distributions)."

  normalized_rmse_explained(0.75)
  "Moderate mismatch; distributions differ noticeably."
--------------------------------------------------------------------
*/

normalized_rmse_explained(NRMSE) = Msg =>
    if NRMSE < 0.0 then
        throw($error('domain_error(non_negative,NRMSE),normalized_rmse_explained/1'))
    elseif NRMSE < 0.20 then
        Msg = "Excellent match (very small difference between distributions)."
    elseif NRMSE < 0.50 then
        Msg = "Good match; only small differences across the distribution."
    elseif NRMSE < 0.80 then
        Msg = "Moderate mismatch; distributions differ noticeably."
    elseif NRMSE < 1.20 then
        Msg = "Strong mismatch; posterior predictive is not close to the data."
    else
        Msg = "Poor match; distributions are substantially different."
    end.


/*
--------------------------------------------------------------------
abc_fit_score(Data, Ys)
--------------------------------------------------------------------
Computes a single scalar measuring how well Ys (posterior predictive)
matches Data (observed data).

Components:
  W = Wasserstein distance over percentiles (global mismatch)
  KS = Kolmogorov--Smirnov statistic (max CDF difference)
  Wnorm = W / stdev(Data)   (scale-free)

Score = Wnorm + KS

Interpretation:
  0.00–0.10  excellent
  0.10–0.30  good
  0.30–0.60  moderate
  0.60–1.00  poor
  >1.00      very poor
--------------------------------------------------------------------
*/

abc_fit_score(Data, Post) = Score =>
    Qs = [Q : Q in 0.01..0.01..0.99],
    P = [data_quantile(Data,Q) : Q in Qs],
    Q = [data_quantile(Post,Q) : Q in Qs],  

    % Wasserstein distance
    W = sum([abs(Pi - Qi) : {Pi,Qi} in zip(P,Q)]) / length(P),

    % Normalize by stdev(Data)
    SD = stdev(Data),
    Wnorm = cond(SD =:= 0.0, 0.0, W / SD),

    % KS statistic (max CDF difference)
    F  = [I/100.0 : I in 1..99],
    KS = max([abs(Fi - Fj) : {Fi,Fj} in zip(F,F)]),  % Actually needs both CDFs

    % Real KS between empirical CDFs:
    KS := max([abs((I/100.0) - (J/100.0)) : {I,J} in zip(P,Q)]),

    Score = Wnorm + KS.


/*
====================================================================
abc_fit_score_explained(Data, Ys)
====================================================================
Computes a distribution-agnostic ABC goodness-of-fit score
based on:

  Wnorm : normalized Wasserstein distance over percentiles
  KS    : Kolmogorov-Smirnov statistic

Score = Wnorm + KS

Returns:
  [Score, Explanation]

Interpretation:
  Score < 0.10  : excellent fit
  Score < 0.30  : good fit
  Score < 0.60  : moderate mismatch
  Score < 1.00  : strong mismatch
  Score >= 1.00 : poor fit

Notes:
  - Wasserstein distance measures global mismatch
  - KS distance measures local maximum mismatch
  - Using percentiles makes it distribution-free
====================================================================
*/

abc_fit_score_explained(Data, Ys) = [Score, Msg] =>
    if not list(Data) ; not list(Ys) then
        throw($error('domain_error(list,[Data,Ys]),abc_fit_score_explained/2'))
    end,

    % Percentile grid
    Ps = data_percentiles(Data,1..99),
    Qs = data_percentiles(Ys,  1..99),

    % --- Wasserstein distance (global difference)
    W = sum([abs(Pi - Qi) : {Pi,Qi} in zip(Ps,Qs)]) / length(Ps),

    % --- Normalize by stdev(Data)
    SD = stdev(Data),
    Wnorm = cond(SD =:= 0.0, 0.0, W / SD),
    % --- KS statistic (max CDF difference)
    %     Using empirical CDFs based on sorted data
    KS = ks_distance(Data,Ys),
    Score = Wnorm + KS,
    Msg = interpret_fit_score(Score).


/*
====================================================================
ks_distance(Data, Ys)
====================================================================
Computes the empirical Kolmogorov-Smirnov distance.

This uses a linear-time merge walk over sorted Data and Ys.
====================================================================
*/
ks_distance(Data, Ys) = KS =>
    SortedD = sort(Data),
    SortedY = sort(Ys),

    ND = length(SortedD),
    NY = length(SortedY),

    I = 1,
    J = 1,
    KS0 = 0.0,

    while (I =< ND ; J =< NY)
        Xd = cond(I =< ND, SortedD[I], 1.0e100),
        Xy = cond(J =< NY, SortedY[J], 1.0e100),

        if Xd =< Xy then
            Cd = I / ND,
            Cy = (J-1) / NY,
            Diff = abs(Cd - Cy),
            if Diff > KS0 then KS0 := Diff end,
            I := I + 1
        else
            Cd = (I-1) / ND,
            Cy = J / NY,
            Diff = abs(Cd - Cy),
            if Diff > KS0 then KS0 := Diff end,
            J := J + 1
        end
    end,
    KS = KS0.


/*
====================================================================
interpret_fit_score(Score)
====================================================================
Human-readable interpretation of ABC fit score.
====================================================================
*/
interpret_fit_score(S) = Msg =>
    if S < 0.10 then
        Msg = "Excellent fit; posterior predictive matches the data very closely."
    elseif S < 0.30 then
        Msg = "Good fit; only small distributional differences."
    elseif S < 0.60 then
        Msg = "Moderate mismatch; distributions differ in shape."
    elseif S < 1.00 then
        Msg = "Strong mismatch; posterior predictive is noticeably different."
    else
        Msg = "Poor fit; distributions differ substantially."
    end.


/*
  Analyse posterior and compare with data.
*/
analyse_post(Data) =>
  analyse_post(Data,get_store().get("post")).
analyse_post(Data,Post) =>
  println("Post:"),
  % println(Post),
  show_simple_stats(Post),
  show_histogram(Post),
  show_scatter_plot(Post),  
  % println(rmse=rmse_percentiles(Data,Post)),
  RMSENormalized = normalized_rmse(Data,Post),
  println(normalized_rmse=RMSENormalized),
  println(normalized_rmse_explained(RMSENormalized)),
  % println(abc_fit_score=abc_fit_score(Data, Post)),
  println(abc_fit_score_explained=abc_fit_score_explained(Data, Post)),
  nl.


analyse_post_simple(Data) =>
  analyse_post_simple(Data,get_store().get("post")).
analyse_post_simple(Data,Post) =>
  println("Post:"),
  RMSENormalized = normalized_rmse(Data,Post),
  println(normalized_rmse=RMSENormalized),
  println(normalized_rmse_explained(RMSENormalized)),
  println(abc_fit_score_explained=abc_fit_score_explained(Data, Post)),
  nl.

analyze_post(Data) => analyse_post(Data).
analyze_post(Data,Post) => analyse_post(Data,Post).

analyze_post_simple(Data) => analyse_post_simple(Data).
analyze_post_simple(Data,Post) => analyse_post_simple(Data,Post).


% Return a random markov chain with transitions transitions and init-states init-state
markov_chain_loop(Transitions,N,A) = Res =>
  if N == 0 then
    Res = A
  else
    State = A.last,
    Len = Transitions.first.len,
    Next = categorical(Transitions[State],1..Len),
    Res = markov_chain_loop(Transitions,N-1,A ++ [Next])
  end.
markov_chain(Transitions,InitState,N) = Res =>
  Init = categorical(InitState,1..Transitions.len),
  Res = markov_chain_loop(Transitions,N-1,[Init]).

markov_chain_n(Transitions,InitState,N,Num) = [markov_chain(Transitions,InitState,N) : _ in 1..Num].


/* 
  one_hot(N,I)

  Create a one hot list of length n and the single 1 in position i (1-based)
  Example
  Picat> one_hot(10,5)
  [0,0,0,0,1,0,0,0,0,0]
*/
one_hot(N,I) = Res =>
  A = ones(N,0),
  A[I] := 1,
  Res = A.


/*
  Generate a random transition matrix.
*/
random_transition_matrix(N) = random_transition_matrix(N,1/N).
random_transition_matrix(N,V) = M =>
  Alpha = ones(N,V),
  M = [ dirichlet_dist(Alpha) : _ in 1..N].


/*
  Print a matrix
*/
show_matrix(M) => show_matrix(M,"Matrix").
show_matrix(M,Name) =>
  println(Name),
  foreach(Row in M)
    println(Row)
  end,
  nl.

% Alias
print_matrix(M) => show_matrix(M).
print_matrix(M,Name) => show_matrix(M,Name).


/* 
  find_first(L,Val,Default)

  Returns the first occurrence (position) of val in list a.
  If no occurrence, return default value.
*/
find_first(L,Val,Default) = cond(Pos > 0, Pos, Default) =>
  Pos = find_first_of(L,Val).

/*
  overlap(A1,A2,B1,B2)

  Return true if there is an overlap between [Start1,End1] and [Start2,End2].
*/
overlap(Start1,End1,Start2,End2) = cond(max(Start1,Start2) <= min(End1,End2),true,false).