ModelFittingTutorial/paramfit2.py.sln at master · capprogram/ModelFittingTutorial · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""
Part 2, third activity in Parameter Fitting Tutorial
Modified by Katie Eckert from ASTR502 activity written by Sheila Kannappan
June 24, 2015
"""

import numpy as np
import matplotlib.pyplot as plt
import numpy.random as npr
import pylab
pylab.ion()


# Generating fake data set (same as in paramfit1.py) to start with:
alphatrue=2. # slope
betatrue=5.  # intercept
errs=2.5 # sigma (amplitude of errors)

narr=50. # number of data points
xvals = np.arange(narr) + 1.
yvals = alphatrue*xvals + betatrue+ npr.normal(0,errs,narr)
plt.figure(1) # plot of fake data
plt.clf()
plt.plot(xvals,yvals,'b*',markersize=10)
plt.xlabel('xvalues')
plt.ylabel('yvalues')

# Bayesian numerical solution finding the full
# posterior probability distribution of a grid of models

# Setup the grids over parameter space
gridsize1=1000
gridsize2=100
alphaposs=np.arange(gridsize1) / 100. # what values are we considering?
betaposs=np.arange(gridsize2) / 10.  # and here?

print("min slope is %f and max slope is %f" % (np.min(alphaposs), np.max(alphaposs)))
print("min y-intercept is %f and max y-intercept is %f" % (np.min(betaposs), np.max(betaposs)))

# What are our implicit priors?
# Implicit priors are flat priors on slope and y-intercept

# Check to see that the model space from our choice of grid spacing
# is uniformly spaced by plotting lines with the different values of
# the y-intercept and slope for a line with x values ranging from (0,1)
xx=np.arange(0,1,0.1)  # set up array of dummy values

# Test y-intercept spacing at fixed slope (choosing example slope=1)
plt.figure(2)
plt.clf()
plt.subplot(121)
for i in range(len(betaposs)):       # loop over all y-intercept values
    plt.plot(xx,xx+betaposs[i],'b-') # plot lines with different y-intercept values

plt.xlim(0,1) # limit to small range
plt.ylim(0,1) # limit to small range
plt.xlabel("x values")
plt.ylabel("y values for several values of y-intercept (y=x+beta)")
plt.title("test y-intercept prior")
# yes - evenly spaced uniform input distribution

# Test slope at fixed y-intercept of zero
plt.subplot(122)
for i in range(len(alphaposs)):       # loop over all slope values
    plt.plot(xx,alphaposs[i]*xx)          # plot lines with different slope values

plt.xlim(0,1)
plt.ylim(0,0.2) # will need to zoom in to distinguish lines of different slope values
plt.xlabel("x values")
plt.ylabel("y values for several values of slope (y=alpha*x)")
plt.title("test slope prior")

# A flat prior in slope amounts to a non-flat prior on the angle = tan(y/x), weighting our fit more heavily to steeper values of slope

# We can then determine a prior that compensates for this unequal spacing in angle
# Read through http://jakevdp.github.io/blog/2014/06/14/frequentism-and-bayesianism-4-bayesian-in-python/ for more details on obtaining this prior
# Note that they have reversed the notation for the slope and y-intercept from our convention. The prior is written as (1+slope**2)**(-3./2.)


# remember Bayes's theorem: P(M|D)=P(D|M)*P(M)/P(D)
# P(M|D) is the posterior probability distribution
# P(D|M) is the likelihood of the data given the model
# P(M) is the prior
# P(D) is the normalization

# For computational convenience, we'll want to compute the log of the posterior probability distribution:
# postprob=exp(-1*chisq/2.)*prior
# ln(postprob)=-1*chisq/2. + ln(prior)

# Compute the posterior probability for all possible models with two different priors
lnpostprob_flat=np.zeros((gridsize1,gridsize2)) # setup an array to contain those values for the flat prior
lnpostprob_comp=np.zeros((gridsize1,gridsize2)) # setup an array to contain those values for the compensating prior

for i in xrange(gridsize1):  # loop over all possible values of alpha
    for j in xrange (gridsize2): # loop over all possible values of beta
        modelvals = alphaposs[i]*xvals+betaposs[j] # compute yfit for given mode
        resids = (yvals - modelvals) # compute residuals for given grid model
        chisq = np.sum(resids**2 / errs**2) # compute chisq likelihood
        priorval_flat=1.    # uniform prior
        priorval_comp=(1.+alphaposs[i]**2)**(-3./2.)   # prior to compensate for unequal spacing of slope
        lnpostprob_flat[i,j] = (-1./2.)*chisq + np.log(priorval_flat)
        lnpostprob_comp[i,j] = (-1./2.)*chisq + np.log(priorval_comp)

# Now we have the full posterior probability distribution computed for
# each possible model using both priors.

# What if we want to know the posterior distribution for the slope?
# We can find out by "marginalizing" over the intercept or integrating over the posterior distribution of the intercept.

# First, we take exp(lnpostprob)
postprob_flat=np.exp(lnpostprob_flat)
postprob_comp=np.exp(lnpostprob_comp)

# Second, we sum over the y-intercept parameter and normalize
marginalizedpprob_flat_slope = np.sum(postprob_flat,axis=1) / np.sum(postprob_flat)
marginalizedpprob_comp_slope = np.sum(postprob_comp,axis=1) / np.sum(postprob_comp)

#  summing over axis 1 sums the probability in the y-intercept direction
#  summing over the entire array gives the normalization

# Plot the marginalized posterior distribution of slope values
plt.figure(3)
plt.clf()
plt.plot(alphaposs,marginalizedpprob_flat_slope,'g.',markersize=10)
plt.plot(alphaposs,marginalizedpprob_comp_slope,'r.',markersize=10)
plt.xlabel("alpha")
plt.ylabel("marginalized posterior distribution of slope")

# zoom in on the region of significant probability
# and estimate the error from the graph
# Compare your error estimate with the error from paramfit1.py - are they similar?


# now marginalize over the slope to see the posterior distribution in y-intercept
marginalizedpprob_flat_yint = np.sum(postprob_flat,axis=0) / np.sum(postprob_flat)
marginalizedpprob_comp_yint = np.sum(postprob_comp,axis=0) / np.sum(postprob_comp)

plt.figure(4)
plt.clf()
plt.plot(betaposs,marginalizedpprob_flat_yint,'g',markersize='10.')
plt.plot(betaposs,marginalizedpprob_comp_yint,'r',markersize='10.')
plt.xlabel("beta")
plt.ylabel("marginalized posterior distribution of y-intercept")


# How do the MLE values of the slope & y-intercept compare with marginalized posterior distributions? MLE values are within the posterior distributions.

# How does the error on the slope & y-intercept compare with the value from the covariance matrix from paramfit1.py? eyeballed uncertainties are comparable to the MLE uncertainties

# What happens to the values and uncertainties of the slope and y-intercept if you change the number of points in your data (try N=100, N=10)? Decreasing to N=10, the marginalized posterior distributions using the different priors diverge. Increasing to N=100, the marginalized posterior distributions using the different priors are very similar and the posterior distributions become narrower.

# What happens if you change the grid spacing (try slope ranges from 1-10 in steps of 0.1, y-intercept ranges from 1-10 in steps of 1)? When gridded less finely the distributions become less well defined, binning the distributions more finely results in a better solution but takes more time to compute