Expandindo a excelente resposta de @ juho-kokkala e usando R aqui estão os resultados. Para uma distribuição prévia da população, a média mu utilizou uma mistura igual de duas normais com média zero, uma delas muito cética em relação às médias grandes.
## Posterior density for a normal data distribution and for
## a mixture of two normal priors with mixing proportions wt and 1-wt
## and means mu1 mu2 and variances v1 an
## Adapted for LearnBayes package normal.normal.mix function
## Produces a list of 3 functions. The posterior density and cum. prob.
## function can be called with a vector of posterior means and variances
## if the first argument x is a scalar
mixpost <- function(stat, vstat, mu1=0, mu2=0, v1, v2, wt) {
if(length(stat) + length(vstat) != 2) stop('improper arguments')
probs <- c(wt, 1. - wt)
prior.mean <- c(mu1, mu2)
prior.var <- c(v1, v2)
post.precision <- 1. / prior.var + 1. / vstat
post.var <- 1. / post.precision
post.mean <- (stat / vstat + prior.mean / prior.var) / post.precision
pwt <- dnorm(stat, prior.mean, sqrt(vstat + prior.var))
pwt <- probs * pwt / sum(probs * pwt)
dMix <- function(x, pwt, post.mean, post.var)
pwt[1] * dnorm(x, mean=post.mean[1], sd=sqrt(post.var[1])) +
pwt[2] * dnorm(x, mean=post.mean[2], sd=sqrt(post.var[2]))
formals(dMix) <- z <-
list(x=NULL, pwt=pwt, post.mean=post.mean, post.var=post.var)
pMix <- function(x, pwt, post.mean, post.var)
pwt[1] * pnorm(x, mean=post.mean[1], sd=sqrt(post.var[1])) +
pwt[2] * pnorm(x, mean=post.mean[2], sd=sqrt(post.var[2]))
formals(pMix) <- z
priorMix <- function(x, mu1, mu2, v1, v2, wt)
wt * dnorm(x, mean=mu1, sd=sqrt(v1)) +
(1. - wt) * dnorm(x, mean=mu2, sd=sqrt(v2))
formals(priorMix) <- list(x=NULL, mu1=mu1, mu2=mu2, v1=v1, v2=v2, wt=wt)
list(priorMix=priorMix, dMix=dMix, pMix=pMix)
}
## mixposts handles the case where the posterior distribution function
## is to be evaluated at a scalar x for a vector of point estimates and
## variances of the statistic of interest
## If generates a single function
mixposts <- function(stat, vstat, mu1=0, mu2=0, v1, v2, wt) {
post.precision1 <- 1. / v1 + 1. / vstat
post.var1 <- 1. / post.precision1
post.mean1 <- (stat / vstat + mu1 / v1) / post.precision1
post.precision2 <- 1. / v2 + 1. / vstat
post.var2 <- 1. / post.precision2
post.mean2 <- (stat / vstat + mu2 / v2) / post.precision2
pwt1 <- dnorm(stat, mean=mu1, sd=sqrt(vstat + v1))
pwt2 <- dnorm(stat, mean=mu2, sd=sqrt(vstat + v2))
pwt <- wt * pwt1 / (wt * pwt1 + (1. - wt) * pwt2)
pMix <- function(x, post.mean1, post.mean2, post.var1, post.var2, pwt)
pwt * pnorm(x, mean=post.mean1, sd=sqrt(post.var1)) +
(1. - pwt) * pnorm(x, mean=post.mean2, sd=sqrt(post.var2))
formals(pMix) <-
list(x=NULL, post.mean1=post.mean1, post.mean2=post.mean2,
post.var1=post.var1, post.var2=post.var2, pwt=pwt)
pMix
}
## Compute proportion mu > 0 in trials for
## which posterior prob(mu > 0) > 0.95, and also use a loess smoother
## to estimate prob(mu > 0) as a function of the final post prob
## In sequential analyses of observations 1, 2, ..., N, the final
## posterior prob is the post prob at the final sample size if the
## prob never exceeds 0.95, otherwise it is the post prob the first
## time it exceeds 0.95
sim <- function(N, prior.mu=0, prior.sd, wt, mucut=0, postcut=0.95,
nsim=1000, plprior=TRUE) {
prior.mu <- rep(prior.mu, length=2)
prior.sd <- rep(prior.sd, length=2)
sd1 <- prior.sd[1]; sd2 <- prior.sd[2]
v1 <- sd1 ^ 2
v2 <- sd2 ^ 2
if(plprior) {
pdensity <- mixpost(1, 1, mu1=prior.mu[1], mu2=prior.mu[2],
v1=v1, v2=v2, wt=wt)$priorMix
x <- seq(-3, 3, length=200)
plot(x, pdensity(x), type='l', xlab=expression(mu), ylab='Prior Density')
title(paste(wt, 1 - wt, 'Mixture of Zero Mean Normals\nWith SD=',
round(sd1, 3), 'and', round(sd2, 3)))
}
j <- 1 : N
Mu <- Post <- numeric(nsim)
stopped <- integer(nsim)
for(i in 1 : nsim) {
# See http://stats.stackexchange.com/questions/70855
component <- sample(1 : 2, size=1, prob=c(wt, 1. - wt))
mu <- prior.mu[component] + rnorm(1) * prior.sd[component]
# mu <- rnorm(1, mean=prior.mu, sd=prior.sd) if only 1 component
Mu[i] <- mu
y <- rnorm(N, mean=mu, sd=1)
ybar <- cumsum(y) / j
pcdf <- mixposts(ybar, 1. / j, mu1=prior.mu[1], mu2=prior.mu[2],
v1=v1, v2=v2, wt=wt)
if(i==1) print(body(pcdf))
post <- 1. - pcdf(mucut)
Post[i] <- if(max(post) < postcut) post[N]
else post[min(which(post >= postcut))]
stopped[i] <- if(max(post) < postcut) N else min(which(post >= postcut))
}
list(mu=Mu, post=Post, stopped=stopped)
}
# Take prior on mu to be a mixture of two normal densities both with mean zero
# One has SD so that Prob(mu > 1) = 0.1
# The second has SD so that Prob(mu > 0.25) = 0.05
prior.sd <- c(1 / qnorm(1 - 0.1), 0.25 / qnorm(1 - 0.05))
prior.sd
set.seed(2)
z <- sim(500, prior.mu=0, prior.sd=prior.sd, wt=0.5, postcut=0.95, nsim=10000)
mu <- z$mu
post <- z$post
st <- z$stopped
plot(mu, post)
abline(v=0, col=gray(.8)); abline(h=0.95, col=gray(.8))
hist(mu[post >= 0.95], nclass=25)
k <- post >= 0.95
mean(k) # 0.44 of trials stopped with post >= 0.95
mean(st) # 313 average sample size
mean(mu[k] > 0) # 0.963 of trials with post >= 0.95 actually had mu > 0
mean(post[k]) # 0.961 mean posterior prob. when stopped early
w <- lowess(post, mu > 0, iter=0)
# perfect calibration of post probs
plot(w, type='n', # even if stopped early
xlab=expression(paste('Posterior Probability ', mu > 0, ' Upon Stopping')),
ylab=expression(paste('Proportion of Trials with ', mu > 0)))
abline(a=0, b=1, lwd=6, col=gray(.85))
lines(w)