critter twitter ( PRML) PRML PRML PRML PRML 1. 2. 3. PRML PRML 110 PRML 700 1
PRML pdf PRML (http://critter.sakura.ne.jp) 1 1.1 N x t y(x, w) = w 0 + w 1 x + w 2 x 2 + + w M x m = M w j x j (1.1) j=0 E(w) = 1 {y(x n, w) t n } 2 (1.2) 2 w w E RMS = 2E(w )/N (1.3) Ẽ(w) = 1 {y(x n, w) t n } 2 + λ 2 2 w 2 (1.4) 1.2 1.2.1 2
1.2.2 f (x) p(x) E[ f ] p(x) f (x) (1.5) x E[ f ] p(x) f (x)dx (1.6) N E[ f ] 1 N f (x n ) (1.7) E x [ f (x, y)] p(x, y) f (x, y) (1.8) x y E x [ f (x, y) y] p(x y) f (x, y) (1.9) x f (x) var[ f ] E [ ( f (x) E[ f (x)]) 2] = E[ f (x) 2 ] E[ f (x)] 2 (1.10) x var[x] = E[x 2 ] E[x] 2 (1.11) 2 x y cov[x, y] E [ {x E[x]}{y E[y]} ] = E[xy] E[x]E[y] (1.12) 2 x, y cov[x, y] E [ {x E[x]}{y T E[y T ]} ] = E[xy T ] E[x]E[y T ] (1.13) x x cov[x, x] (1.14) 3
1.2.3 w w p(w) D p(w D) = p(d w)p(w) p(d) (1.15) p(d w) p(d) = p(d w)p(w)dw (1.16) 1.2.4 N(x µ, σ 2 ) { 1 exp 1 } (x (2πσ 2 µ)2 ) 1/2 2σ2 (1.17) N(x µ, Σ) E[x] = µ E[x 2 ] = µ 2 + σ 2 var[x] = σ 2 (1.18) { 1 exp 1 } (2π) D/2 Σ 1/2 2σ (x 2 µ)t Σ 1 (x µ) D Σ Σ (1.19) N x = (x 1,, x N ) µ σ 2 p(x µ, σ 2 ) = ln p(x µ, σ 2 ) = 1 2σ 2 N N(x n µ, σ 2 ) (1.20) (x n µ) 2 N 2 ln σ2 N ln(2π) (1.21) 2 µ ML = 1 N σ 2 ML = 1 N x N (x n µ ML ) 2 (1.22) 4
µ, σ 2 E[µ ML ] = µ E[σ 2 ML ] = ( N 1 N σ 2 = ) σ 2 (1.23) N N 1 σ2 ML (1.24) 1.2.5 x = (x 1,, x N ) T t = (t 1,, t N ) T x t x t y(x, w) p(t x, w, β) = N(t y(x, w), β 1 ) (1.25) N p(t x, w, β) = N(t n y(x n, w), β 1 ) (1.26) ln p(t x, w, β) = β 2 {y(x n, w) t n } 2 + N 2 ln β N ln(2π) (1.27) 2 w β 1 β = 1 N {y(x n, w ML ) t n } 2 (1.28) w ( α (M+1)/2 { p(w α) = N(w 0, α 1 I) = exp 2π) α } 2 wt w (1.29) M w M + 1 α w p(w x, t, α, β) p(t x, w, β)p(w α) (1.30) β 2 {y(x n, w) t n } 2 + α 2 wt w (1.31) w 5
1.2.6 3.3 1.3 1.4 1.5 x t x t x X C 1 C 2 x p(c k x) p(c k x) = p(x C k)p(c k ) p(x) (1.32) 1.5.1 x R k C k p() = p(x, C 2 )dx + p(x, C 1 )dx (1.33) R 1 R 2 K p() = K k=1 R k p(x, C k )dx (1.34) x p(c k x) 6
1.5.2 E[L] = L k j p(x, C k )dx (1.35) R j k, j x L kl p(x, C k ) (1.36) j k 1.5.3 1.5.4 1.5.5 x t y(x) L(t, y(x)) E[L] = L(t, y(x))p(x, t)dxdt (1.37) E[L] = {y(x) t} 2 p(x, t)dxdt (1.38) δe[l] δy(x) = 2 {y(x) t}p(x, t)dt = 0 (1.39) y(x) tp(x, t)dt y(x) = = tp(t x)dt = E t [t x] (1.40) p(x) {y(x) t} 2 = {y(x) E t [t x] + E t [t x] t} 2 = {y(x) E t [t x]} 2 + 2{y(x) E t [t x]}{e t [t x] t} + 2{E t [t x] t} 2 (1.41) 7
{E t [t x] t}p(x, t)dt = 0 (1.42) E[L] = {y(x) E t [t x]} 2 p(x)dx + var[t x]p(x)dx (1.43) var[t x] = {t E t [t x]} 2 p(t x)dt (1.44) E[L q ] = {y(x) t} q p(x, t)dxdt (1.45) 1.6 H[x] = p(x) log 2 p(x) (1.46) x H[x] = p(x) ln p(x)dx (1.47) x, y H[y x] = p(y, x) ln p(y x)dydx (1.48) x y H[x, y] = p(y, x) ln p(y, x)dydx = H[y x] + H[x] (1.49) 8
1.6.1 p(x)t q(x) KL(p q) = = ( p(x) ln q(x)dx p(x) ln ) p(x) ln p(x)dx { } q(x) dx (1.50) p(x) p(x)t q(x) p(x) q(x) KL(p q) 0 p(x) = q(x) p(x) > 0 p(x)dx = 1 f ( ) f (g(x))p(x)dx f g(x)p(x)dx (1.51) f (b) f (a) + f (a)(b a) (1.52) b = a b g(x) a g(x)p(x)dx p(x) g(x) f ln g(x) q(x)/p(x) x y I[x, y] KL(p(x, y) p(x)p(y)) = p(x, y) ln ( p(x)p(y) p(x, y) I[x, y] 0 ) dxdy (1.53) I[x, y] = H[x] H[x y] = H[y] H[y x] (1.54) 2 2.1 x {0, 1} Bern(x µ) = µ x (1 µ) 1 x (2.1) 9
x = 0, 1 1 µ, µ E[x] = µ var[x] = µ(1 µ) (2.2) D = (x 1,, x n ) p(d µ) = ln p(d µ) = N p(x n µ) = N µ x n (1 µ) 1 x n ln p(x n µ) = µ ML = 1 N {x n ln µ + (1 x n ) ln(1 µ)} (2.3) x n (2.4) N x = 1 Bin(m N, µ) = N m E[x] = Nµ N m µm (1 µ) N m N! (N m)!m! (2.5) var[x] = Nµ(1 µ) (2.6) 2.1.1 (0, 1) Beta(µ a, b) Γ(x) E[µ] = var[µ] = Γ(a + b) Γ(a)Γ(b) µa 1 (1 µ) b 1 0 u x 1 e u du (2.7) a a + b ab (a + b) 2 (a + b + 1) 10 (2.8)
x = 1 m x = 0 l µ µ p(µ m, l, a, b) = Γ(m + a + l + b) Γ(m + a)γ(l + b) µm+a 1 (1 µ) l+b 1 (2.9) p(x = 1 m, l, a, b) = = = 1 0 1 0 p(x = 1 µ)p(µ m, l, a, b)dµ µp(µ m, l, a, b)dµ m + a m + a + l + b (2.10) 2.2 K 1 K K µ k p(x µ) = K k=1 µ x k k (2.11) E[x µ] = p(x µ)x = µ (2.12) x N x 1,, x N D p(d µ) = m k = N K k=1 µ x nk k = K k=1 µ m k k x nk (2.13) n µ k µ k = 1 K K m k ln µ k + λ µ k 1 (2.14) k=1 k=1 0 µ ML k = m k N (2.15) 11
µ N m 1,, m K Mult(m 1,, m K µ, N) = N = m 1 m K N K m 1 m K N! m 1! m K! k=1 µ m k k (2.16) 2.2.1 α Dir(µ α) = α 0 = Γ(a 0 ) Γ(a 1 ) Γ(a K ) K k=1 µ α k 1 k K α k (2.17) k=1 µ K k=1 µ k = 1 K 1 p K (µ 1,, µ K 1 ) = Γ(a 0 ) K 1 Γ(a 1 ) Γ(a K ) k=1 µ α k 1 k K 1 1 µ k 0 < µ k, K 1 k=1 µ k < 1 k=1 α K 1 (2.18) c 0 µ a 1 (c µ) b 1 a+b 1 Γ(a)Γ(b) dµ = c Γ(a + b) (2.19) µ cµ µ K 1 1 K 2 k=1 µ k 0 µ α K 1 1 M 1 K 2 1 µ k µ K 1 k=1 α K 1 dµ K 1 = Γ(α K 1)Γ(α K 1 ) Γ(α K 1 + α K ) K 2 1 µ k k=1 α K 1 +α K 1 (2.20) p(µ D, α) p(d µ)p(µ α) p(µ D, α) = Dir(µ α + m) = Γ(a 0 + N) Γ(a 1 + m 1 ) Γ(a K + m k ) K k=1 µ α k+m k 1 k (2.21) 12
2.3 1 x N(x µ, σ 2 ) = { 1 exp 1 } (x (2πσ 2 µ)2 ) 1/2 2σ2 µ σ 2 D { 1 N(x µ, Σ) = exp 1 } (2π) D/2 Σ 1/2 2 (x µ)σ 1 (x µ) (2.22) (2.23) E[x] = µ cov[x] = Σ (2.24) 2.3.1 x N(x µ, Σ) D 2 x a, x b µ, Σ x = x a µ = µ a Σ = Σ aa x b µ b Σ ba Σ ab Σ bb (2.25) Λ = Λ aa Λ ab (2.26) Λ ba Λ bb x b x a p(x a x b ) p(x a, x b ) p(xa, x b )dx a = N(x a µ a b, Λ 1 aa ) µ a b = µ a Λ 1 aa Λ ab (x b µ b ) (2.27) 2.3.2 p(x a ) = p(x a, x b )dx b = N(x a µ a, Σ aa ) (2.28) 13
2.3.3 p(x) = N(x µ, Λ 1 ) p(y x) = N(y Ax + b, L 1 ) (2.29) z T = (x T, y T ) µ E[z] = cov[z] = R 1 = Λ + AT LA Aµ + b LA A T L (2.30) L p(y) = p(y x)p(x)dx = N(y Aµ + b, L 1 + AΛ 1 A T ) p(x y) = N(x Σ{A T L(y b) + Λµ}, Σ) Σ = (Λ + A T LA) 1 (2.31) 2.3.4 X = (x 1,, x N ) T ln p(x µ, Σ) = ND 2 ln(2π) N 2 ln Σ 1 2 (x n µ) T Σ 1 (x n µ) (2.32) µ ML = 1 N Σ ML = 1 N x n (x n µ ML )(x n µ ML ) T (2.33) Σ = 1 N 1 E[µ ML ] = µ E[Σ ML ] = N 1 N Σ (2.34) (x n µ ML )(x n µ ML ) T (2.35) 14
2.3.5 2.3.6 1 N x = {x 1,, x N } σ 2 N 1 p(x µ) = p(x n µ) = exp (2πσ 2 ) N/2 1 2σ 2 µ N = 1 σ 2 N = (x n µ) 2 (2.36) p(µ) = N(µ µ 0, σ 2 0 ) (2.37) p(µ x) = 1 C p(x µ)p(µ) = N(µ µ N, σ 2 N ) (2.38) σ 2 Nσ 2 0 + µ Nσ 2 0 σ2 0 + Nσ 2 0 + µ σ2 ML 1 + N σ 2 σ 2 0 µ ML = 1 N x n (2.39) λ 1/σ 2 N p(x λ) = N(x n µ, λ 1 ) λ N/2 exp λ 2 (x n µ) 2 (2.40) Gam(λ a 0, b 0 ) 1 Γ(a) ba 0 0 λa 0 1 exp( b 0 λ) (2.41) E[λ] = a b a var[λ] = (2.42) b 2 15
p(λ x) = 1 C p(x λ)p(λ) = Gam(λ a N, b N ) (2.43) a N = a 0 + N 2 b N = b 0 + 1 2 (x n µ) 2 = b 0 + N 2 σ2 ML (2.44) p(µ, λ) = N(µ µ 0, (β 0 λ) 1 )Gam(λ a 0, b 0 ) (2.45) a 0 = (1 + β 0 )/2 µ N, β N, b N 2.3.7 t 2.3.8 [0, 2π) p(θ θ 0, m) = I 0 (m) = 1 2πI 0 (m) exp{m cos(θ θ 0)} 1 2π 2π {θ 1,, θ N } ln p(d θ 0, m) = N ln(2π) N ln I 0 (m) + m 0 exp{m cos θ}dθ (2.46) cos(θ n θ 0 ) (2.47) θ 0 0 sin(θ n θ 0 ) = 0 (2.48) θ ML { } 0 = tan 1 n sin θ n n cos θ n (2.49) {(cos θ i, sin θ i )} m I 0 (m ML) I 0 (m ML) = 1 cos(θ n θ0 ML ) (2.50) N 16
2.3.9 K π k = 1 0 π k 1 (2.51) k=1 π k p(x) = X = {x 1,, x N } K ln p(x π, µ, Σ) = ln π k N(x n µ k, Σ k ) K π k N(x µ k, Σ k ) (2.52) k=1 k=1 (2.53) 2.4 η p(x η) = h(x)g(η) exp{η T u(x)} g(η) = 1 h(x) exp{ηt u(x)}dx (2.54) 2.4.1 ln g(η) = E[u(x)] (2.55) X = {x 1,, x N } N p(x η) = h(x) g(η)n exp ηt u(x n ) 0 (2.56) ln g(η ML ) = 1 u(x n ) (2.57) N n u(x n ) 17
2.4.2 p(η χ, ν) = f (χ, ν)g(η) ν exp{νη T χ} (2.58) f (χ, ν) p(η X, χ, ν) g(η) ν+n exp ηt u(x n ) + νχ (2.59) 2.4.3 2.5 x i i x n i i p i = n i (2.60) N i N 2.5.1 k(u) 0 k(u)du = 1 (2.61) p(x) = 1 N 1 ( x h D k xn ) h (2.62) k p(x) = 1 1 exp { x x n 2 } N (2πh 2 ) D/2 2h 2 (2.63) 18
2.5.2 3 N {x n } t n x t y(x) p(t x) 3.1 M w i ϕ i (x) M 1 y(x, w) = w j ϕ j (x) = w T ϕ(x) (3.1) j=0 ϕ 0 = 1 M 1 w i 3.1.1 y(x, w) p(t x, w, β) = N(t y(x, w), β 1 ) (3.2) N p(t X, w, β) = N N(t n w T ϕ(x n ), β 1 ) (3.3) ln p(t X, w, β) = ln N(t n w T ϕ(x), β 1 ) = N 2 ln β N 2 (2π) βe D(w) E D (w) = 1 {t n w T ϕ(x n )} 2 (3.4) 2 19
E D (w) w w i E D (w) = M 1 t n w j ϕ j (x) ϕ i(x n ) (3.5) ϕ i (x n ) = Φ ni 0 j=0 Φ ni t n = M 1 Φ n j Φ ni w j (3.6) j=0 β w ML = ( Φ T Φ ) 1 Φ T t (3.7) 1 β ML = 1 N {t n wml T ϕ(x n)} 2 (3.8) 3.1.2 3.1.3 3.1.4 3.1.5 K y(x, w) = W T ϕ(x) (3.9) p(t x, W, β) = N(t W T ϕ(x), β 1 I) (3.10) 20
n t T n T ln p(t, X, W, β) = ln N(t n W T ϕ(x n ), β 1 T ) = NK ( β ) 2 ln β 2π 2 t n W T ϕ(x n ) 2 (3.11) W W ML = ( Φ T Φ ) 1 Φ T T (3.12) 3.2 x t 1.5.5 E[L] = {y(x) t} 2 p(x, t)dxdt (3.13) h(x) = E[t x] = tp(t x)dt (3.14) 1.5.5 y(x) E[L] = {y(x) h(x)} 2 dx + {h(x) t} 2 p(x, t)dxdt (3.15) {y(x; D) h(x)} 2 = {y(x; D) E D [y(x; D)] + E D [y(x; D)] h(x)} 2 = {y(x; D) E D [y(x; D)]} 2 + {E D [y(x; D)] h(x)} 2 + 2{y(x; D) E D [y(x; D)]}{E D [y(x; D)] h(x)} (3.16) D E D [{y(x; D) h(x)} 2 ] = {E D [y(x; D)] h(x)} 2 + E D [{y(x; D) E D [y(x; D)]} 2 ] (3.17) 21
E D [E[L]] = () 2 + + () 2 = {E D [y(x; D)] h(x)} 2 p(x)dx = E D [{y(x; D) E D [y(x; D)]} 2 ]p(x)dx = {h(x) t} 2 p(x, t)dxdt (3.18) {(x i, t i )} N p(x i, t i )dx i dt i (3.19) i=1 3.3 3.3.1 p(w) = N(w m 0, S 0 ) (3.20) X = (x 1,, x N ), t = (t 1,, t N ) p(w t, X) p(w t, X)p(t, X) = p(t X, w)p(x w)p(w) (3.21) X w p(x w) w 3.1.1 p(w t, X) p(t X, w)p(w) (3.22) p(t X, w, β) = ϕ i (x n ) = Φ ni N N(t n w T ϕ(x n ), β 1 ) (3.23) p(w t, X) = N(w m N, S N ) ( m N = S N S 1 0 m 0 + βφ T t ) S 1 N = S 1 0 + βφ T Φ (3.24) 22
3.3.2 w x t p(t x, t, X) = p(t x, w)p(w t, X)dw (3.25) p(t x, w, β) = N(t w T ϕ(x), β 1 ) p(w t, X, β) = N(w m N, S N ) (3.26) p(t x, t, X) = N(t m T N ϕ(x), σ2 N (x)) σ 2 N (x) = 1 β + ϕ(x)t S N ϕ(x) (3.27) 3.3.3 w 0 y(x, m N ) = m T N ϕ(x) = βϕ(x)t S N Φ T t = m N = βs N Φ T t (3.28) βϕ(x) T S N ϕ(x n )t n (3.29) k(x, x ) = βϕ(x) T S N ϕ(x ) (3.30) y(x, m N ) = k(x, x n )t n (3.31) S N x n w S 1 N = βφt Φ (3.32) k(x, x n ) = 1 (3.33) 23
x 3.14 w k(x, x n ) = β ϕ i (x)s Ni j ϕ j (x n ) n i j = β ϕ i (x)s Ni j ϕ j (x n )ϕ 0 (x n ) = n i j ϕ i (x)i i0 i = 1 (3.34) 3.4 3.5 x t y(x, w) w p(t w, β) = N(t y(x, w), β 1 ) (3.35) p(w α) = N(w 0, α 1 I) (3.36) p(w t, α, β) = N(w m N, S N ) m N = βs N Φ T t S 1 N = αi + βφ T Φ (3.37) α, β p(t t) = p(t w, β)p(w t, α, β)p(α, β t)dwdαdβ (3.38) p(α, β t) p(t α, β)p(α, β) (3.39) 24
3.5.1 p(t α, β) p(t α, β) = p(t w, β)p(w α)dw (3.40) p(t α, β) = ( β N/2 ( α ) M/2 2π) 2π exp{ E(w)}dw E(w) = βe D (w) + αe W (w) = β 2 t Φw 2 + α 2 wt w (3.41) exp{ E(w)}dw = exp{ E(m N )}(2π) M/2 S 1 N 1/2 (3.42) ln p(t α, β) = M 2 ln α + N 2 ln β E(m N) 1 2 ln S 1 N N ln(2π) (3.43) 2 3.5.2 (3.86) (3.89) m N α 3.5.3 3.6 4 x K C k 25
4.1 4.1.1 2 K = 2 y(x) = w T x + w 0 (4.1) x y(x) 0 C 1 y(x) < 0 C 2 4.1.2 K y k (x) = wk T x + w k0 (4.2) j k y k (x) > y j (x) x C k 2 x A, x B R k 2 x C R k y k (x C ) = y k (λx A + (1 λ)x B ) = λy k (x A ) + (1 λ)y k (x B ) λy j (x A ) + (1 λ)y j (x B ) = y j (x C ) (4.3) 4.1.3 3.1 t 1-of-K y k (x) = D w k j ϕ j (x) (4.4) j=0 ϕ 0 (x) = 1 ϕ j (x) = x j ( j 1) x = (1, x T ) T K y(x) = W T x (4.5) ϕ i (x n ) = Φ ni X ni = x ni W = ( X T X ) 1 X T T (4.6) 4.2 26
4.1.4 2 D 1 C 1 C 2 m 1 = 1 N 1 y = w T x (4.7) x n, m 2 = 1 x n, (4.8) N n C 2 1 n C 2 m k = w T m k (4.9) m 2 m 1 = w T (m 2 m 1 ) (4.10) 2 s 2 k = n C k (w T x n m k ) 2 (4.11) J(w) = (m 2 m 1 ) 2 s 2 1 + s2 2 J(w) = wt S B w w T S W w S B = (m 2 m 1 )(m 2 m 1 ) T S W = (x n m 1 )(x n m 1 ) T + n C 1 (4.12) n C 2 (x n m 2 )(x n m 2 ) T (4.13) w ( (4.22) w ) (w T S B w)s W w = (w T S W w)s B w (4.14) S B w (m 2 m 1 ) w w S 1 W (m 2 m 1 ) (4.15) 27
4.1.5 4.1.6 4.1.7 4.2 p(x C k ) p(c k ) 2 p(c 1 x) = = p(x C 1 )p(c 1 ) p(x C 1 )p(c 1 ) + p(x C 2 )p(c 2 ) 1 = σ(a) (4.16) 1 + exp( a) a = ln p(x C 2)p(C 2 ) p(x C 2 )p(c 2 ) σ(a) K > 2 p(c k x) = = p(x C k )p(c k ) j p(x C j )p(c j ) exp(a k ) j exp(a j ) (4.17) (4.18) a k = ln(p(x C k )p(c k )) (4.19) 28
4.2.1 C k p(x C k ) = { 1 exp 1 } (2π) D/2 Σ 1/2 2 (x µ k) T Σ 1 (x µ k ) (4.20) 2 p(c 1 x) = σ(w T x + w 0 ) w = Σ 1 (µ 1 µ 2 ) w 0 = 1 2 µt 1 Σ 1 µ 1 + 1 2 µt 2 Σ 1 µ 2 + ln p(c 1) p(c 2 ) (4.21) p(c k x) = exp(a k (x)) j exp(a j (x)) a k (x) = w T k x + w k0 w k = Σ 1 µ k w k0 = 1 2 µt k Σ 1 µ k + ln p(c k ) (4.22) 4.2.2 2 p(c 1 ) = π, p(c 2 ) = 1 π p(x n, C 1 ) = p(c 1 )p(x n C 1 ) = πn(x n µ 1, Σ) p(x n, C 2 ) = p(c 2 )p(x n C 2 ) = (1 π)n(x n µ 2, Σ) (4.23) {x n, t n } t n = 1 C 1 t n = 0 C 2 N p(t, X π, µ 1, µ 2, Σ) = [πn(x n µ 1, Σ)] t n [(1 π)n(x n µ 2, Σ)] 1 t n (4.24) 29
0 π = µ 1 = µ 2 = N 1 N 1 + N 2 1 t n x n N 1 1 N 2 n=2 (1 t n )x n N i C i Σ = N 1 N S 1 + N 2 N S 2 1 S i = (x u i )(x u i ) T (4.25) N i n C 1 4.2.3 4.2.4 4.3 4.3.1 4.3.2 2 ϕ C 1 p(c 1 ϕ) = y(ϕ) = σ(w T ϕ) (4.26) ϕ ϕ x p(t w) = N y t n n (1 y n ) 1 t n (4.27) 30
y n = p(c 1 ϕ n ) E(w) = ln p(t w) = {t n ln y n + (1 t n ) ln(1 y n )} (4.28) y n = σ(w T ϕ n ) w E(w) = (y n t n )ϕ n (4.29) 4.3.3 E(w) w (new) = w (old) H 1 E(w) H = E(w) (4.30) E D (w) = 1 {t n w T ϕ(x n )} 2 2 (4.31) E(w) = (w T ϕ n t n )ϕ n = Φ T Φw Φ T t H = E(w) = Φ T Φ (4.32) w (new) = w (old) (Φ T Φ) 1 { Φ T Φw Φ T t } = (Φ T Φ) 1 Φ T t (4.33) w E(w) = H = R (y n t n )ϕ n = Φ T (y t) y n (1 y n )ϕ n ϕ T n = Φ T RΦ (4.34) R nn = y n (1 y n ) (4.35) 31
4.3.4 p(c k ϕ) = y k (ϕ) = exp(a k) j exp(a j ) (4.36) a k = w T k ϕ (4.37) w k 1-of-K y k (ϕ n ) = y nk p(t w 1,, w K ) = N k=1 K p(c k ϕ n ) t nk = E(w 1,, w K ) = ln p(t w 1,, w K ) = w j E(w 1,, w K ) = M M N K k=1 k=1 y t nk nk (4.38) K t nk ln y nk (4.39) (y n j t n j )ϕ n (4.40) wk w j E(w 1,, w K ) = y nk (I k j y n j )ϕ n ϕ T n (4.41) H v = (v T 1,, vt K )T v T Hv = K y nk (I k j y n j )vk T ϕ nϕ T n v j = k, j=1 = K y nk (I k j y n j )vk T ϕ nϕ T n v j k, j=1 y nk (v T ϕ k ) 2 y nk vk T ϕ k k k y nk a k y n j a j k j 2 2 0 (4.42) a k = v T k ϕ n 32
4.3.5 ϕ t p(t η = ψ( f (w T ϕ)), s) = 1 ( t ) { ηt } s h g(η) exp s s g y t (4.43) 1 g(η) = 1 s h ( ) { t s exp ηt } (4.44) s dt y [t η] = s d ln g(η) (4.45) dη y η η = ψ(y) f w {ϕ n, t n } ln p(t η, s) = = ln p(t n η n, s) { w ln g(η n ) + η nt n s } + w (4.46) w ln p(t η, s) = = { d ln g(η n ) + t } n dηn dy n a n dη n s dy n da n 1 s {t n y n }ψ (y n ) f (a n )ϕ n (4.47) a n = w T ϕ f 1 (y) = ψ(y) (4.48) f f (ψ(y)) = y f (ψ)ψ (y) = 1 E(w) = 1 s {y n t n }ϕ n (4.49) (4.124) p(t w T ϕ, s) = 1 ( t ) { w s h g(w T T } ϕt ϕ) exp s s (4.50) 33
4.4 p(z) = 1 f (z) (4.51) Z z 0 d f (z) dz = 0 (4.52) z=z0 A = d2 ln f (z) dz2 (4.53) z=z0 ( A ) 1/2 q(z) = exp { A } 2π 2 (z z 0) 2 (4.54) 4.4.1 BIC 4.5 4.5.1 2 w t p(t w) = N y t n n {1 y n } 1 t n w y n = σ(w T ϕ n ) (4.55) p(w) = N(w m 0, S 0 ) (4.56) 34
p(w t) p(w)p(t w) (4.57) ln p(w t) = 1 2 (w m 0) T S 1 0 (w m 0) + {t n ln y n + (1 t n ) ln(1 y n )} + (4.58) 2 S 1 N = ln p(w t) = S 1 0 + y n (1 y n )ϕ n ϕ T n (4.59) q(w) = N(w w MAP, S N ) (4.60) w MAP 4.5.2 ϕ p(c 1 ϕ, t) = p(c 1 ϕ, w)p(w t)dw σ(w T ϕ)q(w)dw (4.61) σ(w T ϕ) = δ(a w T ϕ)σ(a)da (4.62) σ(w T ϕ)q(w)dw = p(a) = σ(a)p(a)da δ(a w T σ)q(w)dw (4.63) 2.3.2 p(a) (w a ) µ a = E[a] = p(a)ada = q(w)w T ϕdw = wmap T ϕ σ 2 a = var[a] = p(a){a 2 E[a] 2 }da = q(w){(w T ϕ) 2 (wmap T ϕ)2 }dw = ϕ T S N ϕ (4.64) 35
p(c 1 ϕ, t) = σ(a)p(a)da = σ(a)n(a µ a, σ 2 a)da (4.65) 5 5.1 w {x i } {y k } M D y k (w, w) = σ w (2) k j h w (1) ji x i + w (1) j0 + w(2) k0 j=1 M = σ j=0 w (2) k j h i=1 D i=0 w (1) ji x i (5.1) h 5.2 z k = h w k j z j (5.2) j 5.1.1 5.2 5.2.1 36
5.2.2 5.2.3 5.2.4 5.3 5.3.1 E(w) = E n (w) (5.3) a j = w ji z i i z j = h(a j ) (5.4) E n = E n a j w ji a j w ji = δ j z i (5.5) δ j E n a j (5.6) δ j E n E n a k = a j a k k a j = h (a j ) w k j δ k (5.7) j k 37
5.3.2 5.3.3 5.3.4 J ki y k x i (5.8) J ki = y k x i = = = = y k a j a j j x i y k w ji a j j y k a l w ji a j l l a j w ji h y k (a j ) w l j (5.9) a l j l 5.15 J ki = y k y k = w l j h (a j )w ji (5.10) x i a l l j 5.4 2 2 E w ji w lk (5.11) 38
5.4.1 2 E w 2 ji 2 E a 2 j = 2 E z 2 a 2 i j = h (a j ) 2 2 E n w k j w k j a kk k a k + h (a j ) k w k j E n a k (5.12) 2 2 E a 2 j h (a j ) 2 k w 2 2 E n k j + h (a a 2 j ) k k w k j E n a k (5.13) 5.4.2 E = 1 2 (y n t n ) 2 (5.14) H = E = y n ( y n ) T + (y n t n ) y n (5.15) 5.4.3 5.4.4 5.4.5 39
5.4.6 H H v v T H v T H = v T ( E) (5.16) v T R{ } R{ f } = i j v i j f (5.17) w i j 2 a j = w ji x i i z j = h(a j ) y k = w k j z j (5.18) R{a j } = v ji x i i j R{z j } = h (a j )R{a j } R{y k } = w k j R{z j } + v k j z j (5.19) j j δ k E y k = y k t k δ j E a j = h (a j ) w k j δ k (5.20) k R{δ k } = R{y k } R{δ j } = h (a j )R{a j } w k j δ k + h (a j ) v k j δ k + h (a j ) w k j R{δ k } k k k (5.21) 1 E w k j = δ k z j E w ji = δ j x i (5.22) 40
v T H { } E R w k j { } E R w ji = R{δ k }z j + δ k R{z j } = x i R{δ j } (5.23) 5.5 Ẽ(w) = E(w) + λ 2 wt w (5.24) w 5.5.1 x y z j = h w ji x i + w j0 i y k = w k j z j + w k0 (5.25) j {(x n, t n )} w {(ax n +b, t n )} w w ji = 1 a w ji w j0 = w j0 b w ji (5.26) a λ λ 1 2 i w 2 + λ 2 w 2 (5.27) 2 w W 1 w W 1 λ 1 a 2 λ 1 W 1, W 2 1,2 p(w α 1, α 2 ) exp α 1 2 w 2 α 2 2 w W 1 w W 2 w 2 (5.28) 41
5.5.2 5.5.3 5.5.1 5.5.4 x n 1 ξ x n s(x n, ξ) s(x, 0) = x τ n = s(x n, ξ) ξ (5.29) ξ=0 k ξ y k D y k x i ξ = ξ=0 x i ξ = ξ=0 i=1 D J ki τ i (5.30) i=1 Ẽ = E + λω Ω = 1 2 n k ( ynk ξ ) 2 = 1 ξ=0 2 n k D J nki τ ni i 2 (5.31) 5.5.5 E = 1 2 {y(x) t} 2 p(t x)p(x)dxdt (5.32) 1 ξ Ẽ = 1 {y(s(x, ξ)) t} 2 p(t x)p(x)dxp(ξ)dtdξ (5.33) 2 42
p(ξ) ξ s s(x, ξ) = x + ξτ + 1 2 ξ2 τ + O(ξ 3 ) (5.34) τ ξ = 0 s(x, ξ) ξ 2 y(s(x, ξ)) = y(x) + ξτ T y(x) + ξ2 2 [ (τ ) T y(x) + τ T y(x)τ ] + O(ξ 3 ) (5.35) Ẽ = 1 {y(x) t} 2 p(t x)p(x)dxdt 2 + E[ξ] {y(x) t}τ T y(x)p(t x)p(x)dxdt + E[ξ 2 ] 1 [y(x) { t (τ ) T y(x) + τ T y(x)τ } 2 + (τ T y(x)) 2] p(t x)p(x)dxdt + O(ξ 3 ) (5.36) 0 E[ξ] = 0 E[ξ 2 ] = λ Ẽ = E + λω Ω = 1 [{y(x) { E[t x]} (τ ) T y(x) + τ T y(x)τ } 2 + (τ T y(x)) 2] p(x)dx (5.37) 1.5.5 E[t x] O(ξ 2 ) y(x) = E[t x) + O(ξ 2 ) (5.38) Ω Ω = 1 (τ T y(x)) 2 p(x)dx (5.39) 2 5.5.6 43
5.5.7 w p(w) = p(w i ) p(w i ) = i M π j N(w i µ j, σ 2 j ) (5.40) j=1 Ẽ(w) = E(w) + Ω(w) M Ω(w) = ln π j N(w i µ j, σ 2 j ) (5.41) i j=1 γ j (w) = π jn(w µ j, σ 2 j ) k π k N(w µ k, σ 2 k ) (5.42) Ẽ w i + j γ j (w i ) (w i µ j ) σ 2 j (5.43) Ẽ µ j = Ẽ σ j = γ j (w i ) (µ j w i ) i σ 2 j γ j (w i ) 1 (w i µ j ) 2 σ (5.44) j i π j j π j = 1 π 0 {η j } π j = exp(η j ) Mk=1 exp(η k ) σ 3 j (5.45) Ẽ η j = {π j γ j (w i )} (5.46) i 5.6 p(t x, w) = K k=1 π k (x, w)n(t, µ k (x, w), σ 2 k (x, w)i) (5.47) 44
{(x n, t n )} w π k (x, w), µ k (x, w), σ 2 k (x, w) π k = exp(a π k ) Kl=1 exp(a π l ) σ k = exp(a σ k ) µ k j = a µ k j (5.48) w x a K E(w) = ln π k (x n, w)n(t n µ k (x n, w), σ 2 k (x n, w)i) k=1 (5.49) γ nk (t n x n ) = π k N(t n µ k (x n, w), σ 2 k (x n, w)i) Kl=1 π l N(t n µ k (x n, w), σ 2 k (x n, w)i) (5.50) E n a π k E n a µ kl E n a σ k = π k γ nk µ kl t nl = γ nk σ 2 k = γ nk L t n µ k 2 (5.51) σ 2 k L t 5.7 5.7.1 t x y(x, w) p(t x, w, β) = N(t y(x, w), β 1 ) (5.52) w p(w α) = N(w 0, α 1 I) (5.53) N p(d w, β) = N(t n y(x, w), β 1 ) (5.54) 45
p(w D, α, β) p(w α)p(d w, β) (5.55) y(x, w) w () ln p(w D) = α 2 wt w β 2 {y(x n, w) t n } 2 + (5.56) w MAP A = ln p(w D, α, β) = αi + βh (5.57) p(w D) q(w D) = N(w w MAP, A 1 ) (5.58) x t p(t x, D) = p(t x, w)q(w D)dw (5.59) y(x, w) y(x, w MAP ) + g T (w w MAP ) g = w y(x, w) w=wmap (5.60) p(t x, w, β) = N(t y(x, w MAP ) + g T (w w MAP ), β 1 ) (5.61) p(t x, D, α, β) = N(t y(x, w MAP ), σ 2 (x)) σ 2 (x) = β 1 + g T A 1 g (5.62) 5.7.2 46
5.7.3 6 D = {(x n, t n )} x y(x) k(x, x ) = k(x, x) y(x) = k(x, x n ) f n (D) (6.1) n k(x, x ) 6.1 w J(w) = 1 {w T ϕ(x n ) t n } 2 + λ 2 2 wt w (6.2) y(x) = w T ϕ(x) = k(x) T (K + λt N ) 1 t (6.3) w w K nm = ϕ(x n ) T ϕ(x m ) = k(x n, x m ) (6.4) 6.2 47
6.3 RBF (radial basis function) ν(ξ) E = 1 2 {y(x n + ξ) t n } 2 ν(ξ)dξ (6.5) y(x) y(x) = h(x x n ) = t n h(x x n ) ν(x x n ) N ν(x x n) (6.6) E = 1 {y(x) t n } 2 ν(x x n )dx (6.7) 2 N y(x) = t nν(x x n ) N ν(x x (6.8) n) h(x x n ) x x n Nadaraya-Watson 6.3.1 Nadaraya-Watson {x n, t n } p(x, t) Parzen y(x) = E[t x] = = tp(x, t)dt p(x, t)dt p(x, t) = 1 f (x x n, t t n ) (6.9) N tp(t x)dt = t f (x xn, t t n )dt n f (x x m, t t m )dt (6.10) f (x, t)tdt = 0 (6.11) 48
g(x) = f (x, t)dt (6.12) y(x) = = n g(x x n )t n m g(x x m ) k(x, x n )t n (6.13) n k(x, x n ) = g(x x n) m g(x x m ) (6.14) 6.4 6.4.1 x y(x) = w T ϕ(x) (6.15) w p(w) = N(w 0, α 1 I) (6.16) x 1,, x N y(x 1 ),, y(x N ) y y = Φw (6.17) Φ nk = ϕ k (x n ) E[y] = ΦE[w] = 0 cov[y] = E[yy T ] = ΦE[ww T ]Φ T = 1 α ΦΦT = K (6.18) K K nm = k(x n, x m ) = 1 α ϕ(x n) T ϕ(x m ) (6.19) 49
6.4.2 y n t n = y n + ϵ n (6.20) p(t n y n ) = N(t n y n, β 1 ) (6.21) y = (y 1,, y N ) T t = (t 1,, t N ) T p(t y) = N(t y, β 1 I N ) (6.22) p(y) p(y) = N(y 0, K) (6.23) p(t) p(t) = p(t y)p(y)dy = N(t 0, C) C(x n, x m ) = k(x n, x m ) + β 1 δ nm (6.24) { k(x n, x m ) = θ 0 exp θ } 1 2 x n x m 2 + θ 2 + θ 3 x T n x m (6.25) x 1,, x N t 1,, t N x N+1 t N+1 p(t N+1 ) = N(t N+1 0, C N+1 ) (6.26) t N+1 (t 1,, t N, t N+1 ) T C N+1 = C N k T k (6.27) c p(t N+1 t) = N(t N+1 k T CN 1 t, c kt CN 1 k) (6.28) 50
6.4.3 θ ln p(t θ) = 1 2 ln C N 1 2 tt CN 1 t N ln(2π) (6.29) 2 ln p(t θ) = 1 ( θ i 2 Tr C 1 N ) C N θ i + 1 2 tt C 1 N C N CN 1 θ t (6.30) i 6.4.4 6.4.5 x 1,, x N t N = (t 1,, t N ) T t {0, 1} 2 a(x) y = σ(a) y (0, 1) a t p(t a) = σ(a) t (1 σ(a)) 1 t (6.31) a p(a N+1 ) = N(a N+1 0, C N+1 ) (6.32) C(x n, x m ) = k(x n, x m ) + νδ nm (6.33) N N + 1 p(t N+1 = 1 t N ) = p(t N+1 = 1 a N+1 )p(a N+1 t N )da N+1 (6.34) p(t N+1 = 1 a N+1 ) = σ(a N+1 ) (6.35) p(a N+1 t N ) = p(a N+1 a N )p(a N t N )da N p(a N+1 a N ) = N(a N+1 k T CN 1 a N, c k T CN 1 k) (6.36) 51
6.4.6 p(a N t N ) p(a N t N ) p(a N ) + p(t N a N ) N N p(t N a N ) = σ(a n ) t n (1 σ(a n )) 1 t n = e a nt n σ( a n ) (6.37) a N a N 1 Ψ(a N ) Ψ(a N ) = ln p(a N ) + ln p(t N a N ) = 1 2 at N C 1 N a N N 2 ln(2π) 1 2 ln C N + t T N a N ln(1 + e a n ) (6.38) Ψ(a N ) = t N σ N C 1 N a N Ψ(a N ) = W N C 1 N (6.39) σ N σa n W N σ(a n )(1 σ(a n )) a new N = a old N ( Ψ(a N)) 1 Ψ(a N ) = a old N + (W N + C 1 N ) 1 (t N σ N C 1 N a N) = C N (I + W N C N ) 1 (t N σ N C 1 N a N) (6.40) 206 p(a N t N ) q(a N t N ) = N(a N a N, (W N + C N ) 1 ) (6.41) a N Φ(a N) p(a N+1 t N ) p(a N+1 t N ) N(a N+1 k T (t σ N ), c k T (W 1 N + C N) 1 k) (6.42) θ p(t N θ) p(t N θ) = p(t N a N )p(a N θ)da N (6.43) 52
Ψ(a N ) (4.135) ln p(t N θ) Ψ(a N ) 1 2 ln W N + CN 1 + N ln(2π) (6.44) 2 C N θ a N θ C N ln p(t N θ) θ j = 1 2 a T N C 1 N C N CN 1 θ a 1 N j 1 2 Tr [ (I + C N W N ) 1 W N C N θ j ] (6.45) θ j ln W N + C 1 N = (W Tr N + CN 1 ( = Tr (W N + C 1 ( ln C N = Tr θ j C 1 N ) 1 θ j N ) 1 CN 1 ) C N CN 1 θ j ) ( = Tr CN 1 NW N + I) 1 C N θ j ) C 1 N C N θ j [ I (CN W N + I) 1] (C N W N + I) = C N W N I (C N W N + I) 1 = C N W N (C N W N + I) 1 (6.46) W N (I + C N W N ) 1 a N Ψ(a N ) a N 0 1 2 = 1 2 a n ln W N + C N 1 a n θ j [(I + C N W N ) 1 C N ] nn σ n(1 σ n)(1 2σ n) a n θ j (6.47) σ n = σ(a n) (6.84) θ j a N = C N a N (t N σ N ) C N W N θ j θ j θ j a N = (I + W N C N ) 1 C N (t N σ N ) (6.48) θ j θ j 53
6.4.7 7 7.1 y(x) = w T ϕ(x) + b (7.1) 2 N x 1,, x N t 1,, t N (t n { 1, 1}) x y(x) w b n t n y(x n ) > 0 x n t n y(x n ) w = t n(w T ϕ(x n ) + b) w (7.2) y = w T x + b (7.3) { } 1 max w,b w min[t n(w T ϕ(x n ) + b)] n (7.4) arg max max w, b t n (w T ϕ(x n ) + b) = 1 (7.5) t n (w T ϕ(x n ) + b) 1 (7.6) w, b 54
t n w 1 w 2 E a n L(w, b, a) = 1 2 w 2 a n {t n (w T ϕ(x n ) + b) 1} (7.7) a n 0 t n y(x n ) 1 0 a n {t n y(x n ) 1} = 0 (7.8) w b w = 0 = a n t n ϕ(x n ) a n t n (7.9) w, b L(a) = a n 1 2 m=1 a n a m t n t m k(x n, x m ) (7.10) k(x, x ) = ϕ(x) T ϕ(x ) a a 1, a 2 L(a 1 ) < L(a 2 ) a 1 w 2 a n y(x) y(x) = a n t n k(x, x n ) + b (7.11) a n = 0 t n y(x n ) = 1 a x n t n t n a m t m k(x n, x m ) + b = 1 (7.12) m S S t n b = 1 N S t n a m t m k(x n, x m ) (7.13) m S 55
g 1 (x) = g 2 (x) = 0 ϵ g 1 (x)ϵ = 0 and g 2 (x)ϵ = 0 f (x)ϵ = 0 (7.14) f (x) = λ 1 g 1 (x) + λ 2 g 2 (x) (7.15) 7.1.1 t n y(x n ) 1 (7.16) ξ n 0 t n y(x n ) 1 ξ n (7.17) C ξ n + 1 2 w 2 (7.18) C L(w, b, ξ, a, µ) = 1 2 w 2 + C ξ n a n {t n y(x n ) 1 + ξ n } µ n ξ n (7.19) a n 0 t n y(x n ) 1 + ξ n 0 a n (t n y(x n ) 1 + ξ n ) = 0 µ n 0 ξ n 0 µ n ξ n = 0 (7.20) 56
L w = 0 w = a n t n ϕ(x n ) L b = 0 a n t n = 0 L ξ n = 0 a n = C µ n (7.21) L(a) = a n 1 2 m=1 a n a m t n t m k(x n, x m ) (7.22) 0 a n C a n t n = 0 (7.23) a n > 0 t n y(x n ) = 1 ξ n (7.24) 0 < a n < C ξ n = 0 t n y(x n ) = 1 t n a m t m k(x n, x m ) + b = 1 (7.25) m S b b = 1 N M t n a m t m k(x n, x m ) (7.26) n M m S M 0 < a n < C 7.1.2 7.1.3 SVM 57
7.1.4 SVM SVM 1 2 {y n t n } 2 + λ 2 w 2 (7.27) 0 y(x t < ϵ E ϵ (y(x) t) = y(x t ϵ (7.28) C E ϵ (y(x n ) t n ) + 1 2 w 2 (7.29) C t n y(x n ) + ϵ + ξ n t n y(x n ) ϵ ˆξ n (7.30) (ξ n + ˆξ n ) + 1 2 w 2 (7.31) a n 0, â n 0, µ n 0, ˆµ n 0 L = C (ξ n + ˆξ n ) + 1 2 w 2 (µ n ξ n + ˆµ n ˆξ n ) a n (ϵ + ξ n + y n t n ) â n (ϵ + ˆξ n y n + t n ) (7.32) L w = 0 w = (a n â n )ϕ(x n ) L b = 0 (a n â n ) = 0 L ξ n = 0 a n + µ n = C L ˆξ n = 0 â n + ˆµ n = C (7.33) 58
L(a, â) = 1 (a n â n )(a m â m )k(x n, x m ) 2 m=1 (a n + â n ) + (a n â n )t n (7.34) a n â n µ n ˆµ n 0 a n C 0 â n C (7.35) a n (ϵ + ξ n + y n t n ) = 0 â n (ϵ + ˆξ n y n + t n ) = 0 (C a n )ξ n = 0 (C â n ) ˆ ξ n = 0 (7.36) 0 < a n < C ξ n = 0 ϵ + y n t n = 0 b = t n ϵ w T ϕ(x n ) = t n ϵ (a m â m )k(x n, x m ) (7.37) b m=1 7.1.5 7.2 7.2.1 RVM x t p(t x, w, β) = N(t y(x), β 1 ) M y(x) = w i ϕ i (x) = w T ϕ(x) y(x) = i=1 w n k(x, x n ) + b (7.38) 59
X t N p(t X, w, β) = p(t n x n, w, β) (7.39) w w i α i p(w α) = M i=1 N(w i 0, α 1 i ) (7.40) p(w t, X, α, β) = N(w m, Σ) m = βσφ T t Σ = ( A + βφ T Φ ) 1 (7.41) Φ ni = ϕ i (x n ) A = diag(α i ) α, β w p(t X, α, β) = p(t X, w, β)p(w α)dw ln p(t X, α, β) = ln N(t 0, C) = 1 2 {N ln(2π) + ln C + tt C 1 t} (7.42) C = β 1 I + ΦA 1 Φ T (7.43) (7.87) (7.89) α, β x t p(t x, X, t, α, β ) = p(t x, w, β )dw = N(t m T ϕ(x), σ 2 (x)) σ 2 (x) = (β ) 1 + ϕ(x) T Σϕ(x) (7.44) 7.2.2 (7.85) α i α i C α i C = β 1 I + α 1 j φ j φ T j + α i φ i φ T i j i = C i + α 1 i φ i φ T i (7.45) 60
φ i = (ϕ i (x 1 ),, ϕ i (x N )) T C = C 1 (a + α 1 i φ T i C 1 i φ i) C 1 = C 1 i φ iφ T i C 1 i α i + φ T i C 1 i C 1 i φ i (7.46) (7.85) L(α) = L(α i ) + λ(α i ) λ(α i ) = 1 ln α i ln(α + s i ) + q2 i 2 α i + s i s i = φ T i C 1 i φ i q i = φ T i C 1 i t dλ(α i ) dα i = α 1 i s 2 i (q 2 i s i ) 2(α i + s i ) 2 (7.47) α i 0 q 2 i < s i α i w i 0 w i 0 q 2 i > s i α i = s 2 i q 2 i s i (7.48) 7.2.3 RVM 8 8.1 8.1.1 61
8.1.2 8.1.3 8.1.4 8.2 3 a,b,c p(a, b c) = p(a c)p(b c) (8.1) a b c a b c (8.2) 8.2.1 3 3 3 p(a, b, c) = p(a)p(c a)p(b c) (8.3) c a, b tail-to-tail p(a, b, c) = p(a)p(b a)p(c b) (8.4) c a, b head-to-tail p(a, b, c) = p(a)p(b)p(c a, b) (8.5) c a, b head-to-head tail-to-tail head-to-head a b c (8.6) x y y x 62
8.2.2 D A, B, C A B C a, b c C head-to-tail tail-to-tail d C head-to-head d C a b C a A, b B a b C A B C 91 8.3 8.3.1 8.3.2 8.3.3 8.3.4 8.4 8.4.1 p(x) = 1 Z ψ 1,2(x 1, x 2 )ψ 2,3 (x 2, x 3 ) ψ N 1,N (x N 1, x N ) (8.7) 63
x n p(x n ) p(x n ) = p(x) x 1 x n 1 x n+1 x N = 1 Z ψ n 1,n (x n 1, x n ) ψ 2,3 (x 2, x 3 ) ψ 1,2 (x 1, x 2 ) x n 1 x 2 x 1 ψ n,n+1 (x n, x n+1 ) ψ N 1,N (x N 1, x N ) x n+1 x N 1 Z µ α(x n )µ β (x n ) (8.8) µ α (x 2 ) = ψ 1,2 (x 1, x 2 ) (8.9) x 1 µ α (x n ) = ψ n 1,n (x n 1, x n )µ α (x n 1 ) (8.10) x n 1 µ α µ β 8.4.2 8.4.3 x p(x) = f s (x s ) (8.11) s x i f s x i f s 2 8.4.4 x p(x) p(x) = F s (x, X s ) (8.12) s ne(x) 64
ne(x) x F s (x, X s ) = f s (x, x 1,, x M )G 1 (x 1, X s1 ) G M (x M, X sm ) (8.13) F s p(x) = F s (x, X s ) s ne(x) X s = µ fs x(x) (8.14) s ne(x) µ fs x(x) F s (x, X s ) (8.15) X s µ µ fs x(x) = = x 1 x M f s (x, x 1,, x M ) x 1 x M f s (x, x 1,, x M ) x m ne( f s )\x x m ne( f s )\x G m (x m, X sm ) X sm µ xm f s (x m ) (8.16) µ xm f s (x m ) G m (x m, X sm ) (8.17) X sm G m (x m, X sm ) = F l (x m, X lm ) (8.18) f l ne(x m )\ f s µ xm f s (x m ) = = f l ne(x m )\ f s F l (x m, X lm ) X lm f l ne(x m )\ f s µ fl x m (x m ) (8.19) µ fs x(x) F G 8.46-48 8.4.5 max-sum x max p(x max ) = max p(x) (8.20) x 65
max p(x) = 1 x Z max x 1 max[ψ 1,2 (x 1, x 2 ) ψ N 1,N (x N 1, x N )] x N = 1 [ [ [ ] ]] Z max max ψ 1,2 (x 1, x 2 ) max ψ N 1,N (x N 1, x N ) x 1 x 2 x N (8.21) max p(x) = max x x = f s ne(x) f s ne(x) µ fs x(x) µ fs x(x) max X s F s (x, X s ) [ ] max F s (x, X s ) X s = max x 1 x M f s (x, x 1,, x M ) = max x 1 x M f s (x, x 1,, x M ) x m ne( f s )\x x m ne( f s )\x [ ] max G m (x m, X sm ) X sm f l ne(x m )\ f s µ fl x m (x m ) (8.22) (8.4.4 G 8.98 8.4.6 8.4.7 9 EM 66
9.1 K-means {x 1,, x N } K K 2 r nk {0, 1}(k = 1,, K) µ k (k = 1,, K) J = k=1 K r nk x n µ k 2 (9.1) r nk n 1 k r nk µ k µ k r nk 1 k = arg min j x n µ j 2 r nk = 0 J µ k = 2 r nk (x n µ k ) = 0 µ k = (9.2) n r nk x n n r nk (9.3) K-means 9.1.1 9.2 0 π k 1 K π k = 1 (9.4) k=1 {π k } K p(x) = π k N(x µ k, Σ k ) (9.5) k=1 67
1-of-K K 2 z z x p(z k = 1) = π k p(x z k = 1) = N(x µ k, Σ k ) (9.6) p(x) = p(z)p(x z) = z K N(x µ k, Σ k ) (9.7) p(x, z) x z γ(z k ) p(z k = 1 x) = k=1 p(z k = 1)p(x z k = 1) Kj=1 p(z j = 1)p(x z j = 1) = π k N(x µ k, Σ k ) Kj=1 π j N(x µ j, Σ j ) (9.8) 9.2.1 {x 1,, x N } K ln p(x π, µ, Σ) = ln π k N(x µ k, Σ k ) k=1 (9.9) µ j = x n N(x n x n, σ 2 j I) = 1 (2π) D/2 1 σ D j σ j 0 (9.10) 9.2.2 EM µ k 0 0 = γ(z nk ) = γ(z nk )Σ 1 k (x n µ k ) π k N(x n µ k, Σ k ) Kj=1 π j N(x n µ j, Σ j ) (9.11) 68
Σ k µ k = N k = 1 N k γ(z nk )x n γ(z nk ) (9.12) Σ k 0 Σ k = 1 N k γ(z nk )(x n µ k )(x n µ k ) T (9.13) π k K ln p(x π, µ, Σ) + λ π k 1 (9.14) 0 = k=1 N(x n µ k, Σ k ) Kj=1 π j N(x n µ j, Σ j ) + λ (9.15) π k = N k N EM 1. µ k M k π k 2. E (9.16) γ(z nk ) = π k N(x n µ k, Σ k ) Kj=1 π j N(x n µ j, Σ j ) (9.17) 3. M µ new k = Σ new k = 1 N k 1 N k γ(z nk )x n γ(z nk )(x n µ new k )(x n µ new π new k = N k N N k = γ(z nk ) (9.18) 69 k ) T
4. ln p(x π, µ, Σ) = K ln π k N(x µ k, Σ k ) 2 k=1 (9.19) 9.2.1 9.3 EM X Z θ ln p(x θ) = ln p(x, Z θ) Z (9.20) EM p(x, Z θ) EM X Z p(x, Z θ) p(x θ) θ 1. θ old 2. E p(z X, θ old ) 3. M θ new θ new = arg max Q(θ, θ old ) θ Q(θ, θ old ) = p(z X, θ old ) ln p(x, Z θ) (9.21) Z 4. θ old θ new 2 9.3.1 9.3.2 K-means 9.3.3 70
9.3.4 EM 9.4 EM X, Z θ p(x, Z θ) p(x θ) = p(x, Z θ) (9.22) Z EM p(x θ) p(x, Z θ) q(z) ln p(x θ) = L(q, θ) + KL(q p) { } p(x, Z θ) L(q, θ) = q(z) ln q(z) Z { } p(z X, θ) KL(q p) = q(z) ln q(z) Z (9.23) ln p(x, Z θ) = ln p(z X, θ) + ln p(x θ) (9.24) KL(q p) KL KL(q p) 0 q = p EM = ln p(x θ old ) { p(x, Z θ p(z X, θ old old } ) ) ln p(z X, θ old ) Z { p(x, Z θ p(z X, θ old new } ) ) ln p(z X, θ old ) Z { p(x, Z θ p(z X, θ old new } ) { p(z X, θ ) ln p(z X, θ old new } ) ) ln p(z X, θ old ) p(z X, θ old ) Z Z = ln p(x θ new ) (9.25) q(z) KL 2 θ new 3 KL 71
10 10.1 9 Z X p(x, Z) q(z) ln p(x) = L(q) + KL(q p) { p(x, Z) L(q) = q(z) ln q(z) KL(q p) = q(z) ln { p(z X) q(z) } dz } dz (10.1) 10.1.1 Z Z i (i = 1,, M) q(z) = M q i (Z i ) (10.2) i=1 L(q) q j (Z j ) q j (Z j ) q j L(q) = = = q i ln p(x, Z) ln q i dz i i q j ln p(x, Z) q i dz i i j dz j q j ln q j dz j q i ln q i dz i i j q j ln p(x, Z j )dz j q j ln q j dz j + const (10.3) ln p(x, Z j ) = E i j [ln p(x, Z)] + const E i j [ln p(x, Z)] = ln p(x, Z) q i dz i (10.4) const p(x, Z j ) L(q) q j (Z j ) L(q) q j (Z j ) p(x, Z j ) KL i j 72
q j (Z j ) = p(x, Z j ) q j (Z j) ln q j (Z j) = E i j [ln p(x, Z)] + const (10.5) q j (Z j) q j (Z j) = exp(e i j [ln p(x, Z)]) exp(ei j [ln p(x, Z)])dZ j (10.6) 10.1.2 KL KL(p q) ln p(x) 10.1.3 10.1.4 Z m p(m) q(z, m) = q(z m)q(m) { } p(z, m X) ln p(x) = L q(z m)q(m) ln q(z m)q(m) m Z { } p(z, X, m) L = q(z m)q(m) ln q(z m)q(m) L q(m) m Z (10.7) q(m) p(m) exp{l m } { } p(z, X m) L m = q(z m) ln q(z m) Z (10.8) { } p(z, X, m) f = q(z m) ln + λ q(z m)q(m) q(m) 1 (10.9) Z m 73
q(m) f q(m) { } p(z, X, m) = q(z m) ln q(z m)q(m) Z = ln p(m) ln q(m) + q(z m) ln Z q(z m) + λ Z { p(z, X m) q(z m) } 1 + λ = 0 (10.10) 10.2 X = {x 1,, x N } Z = {z 1,, z N } z n (k = 1,, K) 1 π Z p(z π) = N K k=1 π z nk k (10.11) p(x Z, µ, Λ) = N K k=1 N(x n µ k, Λ 1 k )z nk (10.12) µ, Λ, π π m 0 = 0 p(π) = Dir(π α 0 ) = C(α 0 ) K k=1 π α 0 1 k p(µ, Λ) = p(µ Λ)p(Λ) K = N(µ k m 0, (β 0 Λ) 1 )W(Λ k W 0, ν 0 ) (10.13) k=1 10.2.1 p(x, Z, π, µ, Λ) = p(x Z, µ, Λ)p(Z π)p(π)p(µ Λ)p(Λ) (10.14) 74
q(z, π, µ, Λ) = q(z)q(π, µ, Λ) (10.15) (10.9) ln q (Z) = E π,µ,λ [ln p(x, Z, π, µ, Λ)] + const = E π [ln p(z π)] + E µ,λ [ln p(x Z, µ, Λ)] + const (10.16) ln q (Z) = K z nk ln ρ nk + const k=1 ln ρ nk = E[ln π k ] + 1 2 E[ln Λ k ] D 2 ln(2π) 1 2 E µ k,λ k [(x n µ k ) T Λ k (x n µ k )] (10.17) q (Z) N K k=1 ρ z nk nk (10.18) q (Z) = r nk = N K k=1 r z nk nk ρ nk Kj=1 ρ n j (10.19) N K Z k=1 N K nk = ρ nk (10.20) ρ z nk k=1 q (Z) E[z nk ] = r nk (10.21) N k = x k = S k = 1 r nk N k 1 N k r nk x n r nk (x n x k )(x n x k ) T (10.22) 75
q(π, µ, Λ) (10.9) ln q (π, µ, Λ) = ln p(π) + + K k=1 K ln p(µ k, Λ k ) + E Z [ln p(z π)] k=1 E[z nk ] ln N(x n µ k, Λ 1 k ) + const (10.23) K q (π, µ, Λ) = q (π) q (µ k, Λ k ) (10.24) k=1 π K K ln q (π) = (α 0 1) ln π k + r nk ln π k + const (10.25) k=1 k=1 q (π) = Dir(π α) (10.26) α α k = α 0 + N k (10.27) q (µ k, Λ k ) q (µ k, Λ k ) = N(µ k m k, (β k Λ k ) 1 )W(Λ k W k, ν k ) (10.28) β k = β 0 + N k m k = 1 (β 0 m 0 + N k x k ) β k W 1 k = W 1 0 + N k S k + β 0N k β 0 + N k ( x k m 0 )( x k m 0 ) T ν k = ν 0 + N k (10.29) E[z nk ] = r nk E µk,λ k [(x n µ k ) T Λ k (x n µ k )] = Dβ 1 k + ν k (x n m k ) T W k (x n m k ) D ( ) νk + 1 i ln Λ k E[ln Γ k ] = ψ + D ln 2 + ln W k 2 i=1 ln π k E[ln π k ] = ψ(α k ) ψ α k (10.30) k 76
10.2.2 10.2.3 ˆx ẑ p( ˆx X) = p( ˆx ẑ, µ, Λ)p(ẑ π)p(π, µ, Λ X)dπdµdΛ (10.31) ẑ K p( ˆx X) = π k N( ˆx µ k, Λ 1 )p(π, µ, Λ X)dπdµdΛ (10.32) k=1 p(π, µ, Λ X) q(π)q(µ, Λ) p( ˆx X) K π k N( ˆx µ k, Λ 1 )q(π)q(µ k, Λ k )dπdµ k dλ k k=1 1ˆα K α k St( ˆx m k, L k, ν k + 1 D) (10.33) k=1 t L k = (ν k + 1 D)β k 1 + β k W k (10.34) 10.2.4 10.2.5 10.3 77
10.4 10.5 f (x) = exp( x) x x = ξ η = exp( ξ) y(x) = f (ξ) + f (ξ)(x ξ) (10.35) y(x) = exp( ξ) exp( ξ)(x ξ) (10.36) y(x, η) = ηx η + η ln( η) (10.37) η f (x) y(x, η) f (x) = max{ηx η + η ln( η)} (10.38) η y = ηx g(η) g(η) = max x {ηx f (x)} (10.39) x y f (x) = max{ηx g(η)} (10.40) η 10.6 10.6.1 t N p(t) = p(t w)p(w)dw = p(t n w) p(w)dw (10.41) 78
p(t w) = σ(a) t {1 σ(a)} 1 t ( ) ( 1 1 = 1 + e a 1 1 + e a a = w T ϕ ) 1 t = e at e a 1 + e a = eat σ( a) (10.42) σ(z) σ(ξ) exp{(z ξ)/2 λ(ξ)(z 2 xi 2 )} [ 1 λ(ξ) = σ(ξ) 1 ] 2ξ 2 (10.43) p(t w) = e at σ( a) e at σ(ξ) exp{ (a + ξ)/2 λ(ξ)(a 2 ξ 2 )} (10.44) p(t, w) = p(t w)p(w) h(w, xi)p(w) (10.45) ξ {ξ n } N h(w, ξ) = σ(ξ n ) exp{w T ϕ n t n (w T ϕ n + ξ n )/2 λ(ξ n )( w T ϕ n 2 ξn)} 2 (10.46) ln{p(t w)p(w)} ln p(w) + p(w) w + {ln σ(ξ n ) + w T ϕ n t n (w T ϕ n + ξ n )/2 λ(ξ n )( w T ϕ n 2 ξ 2 n)} (10.47) 1 2 (w m 0) T S0 1 (w m 0) { w T ϕ n (t n 1/2) λ(ξ n )w T (ϕ n ϕ T n )w } + const (10.48) w q(w) = N(w m N, S N ) m N = S N S 1 0 m 0 + (t n 1/2)ϕ n S 1 N = S 1 0 + 2 λ(ξ n )ϕ n ϕ T n (10.49) q(w) 79
10.7 {ξ n } ln p(t) = ln p(t w)p(w) ln h(w, ξ)p(w)dw = L(ξ) (10.50) EM ξ old Q(ξ, ξ old ) = Q(ξ, ξ old ) = E[ln{h(w, ξ)p(w)}] (10.51) { ln σ(ξn ) ξ n /2 λ(ξ n )(ϕ T n E[ww T ]ϕ n ξn) } 2 + const (10.52) const ξ ξ n 0 λ(ξ) λ (ξ) 0 L(ξ) 0 = λ (ξ n )(ϕ T E[ww T ]ϕ n ξ 2 n) (10.53) (ξ new n ) 2 = ϕ T E[ww T ]ϕ n = ϕ T (S N + m N m T N )ϕ n (10.54) L(ξ) = 1 2 ln S N S 0 + + 1 2 mt N S 1 N m N 1 2 mt 0 S 1 0 m 0 { ln σ(ξ n ) 1 2 ξ n + λ(ξ n )ξ 2 n } (10.55) 10.7.1 w p(w α) = N(w 0, α 1 I) (10.56) α p(α) = Gam(α a 0, b 0 ) (10.57) 80
p(t) = q(w, α) (10.152) p(w, α, t)dwdα p(w, α, t) = p(t w)p(w α)p(α) (10.58) ln p(t) = L(q) + KL(q p) { p(w, α, t) L(q) = q(w, α) ln q(w, α) KL(q p) = q(w, α) ln { p(w, α t) q(w, α) } dwdα } dwdα (10.59) ln p(t) L(q) L(q, ξ) { } h(w, ξ)p(w α)p(α) = q(w, α) ln dwdα (10.60) q(w, α) (10.9) q(w) ln q(w) = E[α] 2 wt w + q(α) q(w, α) = q(w)q(α) (10.61) ln q(w) = E α [ln{h(w, ξ)p(w α)p(α)}] + const = ln h(w, ξ) + E α [ln p(w α)] + const (10.62) { (tn 1/2)w T ϕ n λ(ξ n )w T ϕ n ϕ T n w } + const (10.63) q(w) = N(w µ N, Σ N ) Σ 1 N µ N = (t n 1/2)ϕ n Σ 1 N = E[α]I + 2 λ(ξ n )ϕ n ϕ T n (10.64) ln q(α) = E w [ln p(w α)] + ln p(α) + const = M 2 ln α α 2 E[wT w] + (a 0 1) ln α b 0 α + const (10.65) 81
q(α) = Gam(α a N, b N ) a N = a 0 + M 2 b N = b 0 + 1 2 E w[w T w] (10.66) ξ n L(q, ξ) ξ α L(q, ξ) = q(w) ln h(w, ξ)dw + const (10.67) (ξ new n ) 2 = ϕ T n (Σ N + µ N µ T N )ϕ n (10.68) q(w), q(α), ξ E[α] = a N b N E[ww T ] = Σ N + µ N µ T N (10.69) 10.8 EP p(z) KL(p q) q(z) q(z) q(z) = h(z)g(η) exp{η T u(z)} (10.70) η KL KL(p q) = ln g(η) η T E p(z) [u(z)] + const (10.71) η 0 ln g(η) = E p(z) [u(z)] (10.72) (2.226) E q(z) [u(z)] = E p(z) [u(z)] (10.73) q(z) N(z µ, Σ) µ Σ p(z) KL 82
p(d, θ) = f i (θ) (10.74) ( f i (D, θ) ) i p(θ D) = 1 f i (θ) (10.75) p(d) i EP p(d) = f i (θ)dθ (10.76) q(θ) = 1 Z i i f i (θ) (10.77) 1. f j (θ) 2. q \ j (θ) = q(θ) f j (θ) Z j = q \ j (θ) f j (θ)dθ (10.78) q new (θ) ( f j (θ)q \ j (θ) ) KL qnew (θ) Z j (10.79) f j (θ) 3. f q new (θ) j (θ) = Z j q \ j (θ) (10.80) 10.8.1 83
10.8.2 EP 11 p(z) f (z) E[ f ] = f (z)p(z)dz (11.1) p(z) z (l) (l = 1,, L) ˆ f = 1 L L f (z (l) ) (11.2) l=1 ˆ f = 1 L L f (z (l) ) l=1 var[ ˆ f ] = 1 L E [ ( f E[ f ]) 2] (11.3) 11.1 11.1.1 (0, 1) z y = f (z) y y p(y) = p(z) dz dy (11.4) y z z = h(y) y p(ŷ)dŷ (11.5) p(y) y y = h 1 (z) 84
11.1.2 p(z) p(z) z p(z) Z p(z) = 1 p(z) (11.6) Z p p(z) Z p 1. q(z) 2. k z kq(z) p(z) 3. z 0 q(z) 4. u 0 [0, kq(z 0 )] 5. u 0 p(z 0 ) p(z) 11.1.3 11.1.4 z p(z) z E[ f ] 1 L L p(z (l) ) f (z (l) ) (11.7) l=1 z q(z) E[ f ] = f (z)p(z)dz = f (z) p(z) q(z) q(z)dz 1 L L l=1 p(z (l) ) q(z (l) ) f (z(l) ) (11.8) 85
p(z) = p(z)z p q(z) = q(z)z q E[ f ] = = Z q Z p Z q Z p 1 L f (z)p(z)dz f (z) p(z) q(z) q(z)dz L r l f (z (l) ) (11.9) l=1 11.1.5 SIR 11.1.6 EM EM M θ Q(θ, θ old = p(z X, θ old ln p(z, X θ)dz (11.10) p(z X, θ old {Z (l) } Q(θ, θ old ) 1 L L ln p(z (l), X θ) (11.11) l=1 EM p(z X) p(θ Z, X) IP p(θ) θ (l) p(z θ (l), X) Z (l) (I ) θ p(θ X) 1 L L p(θ Z (l), X) (11.12) l=1 (P ) 86
11.2 p(z) = p(z)/z p Metropolis 1. z (0) 2. z (τ) q(z z (τ) ) z (τ+1) q(z A z B ) = q(z B z A ) 3. ( A(z, z (τ) ) = min 1, p(z ) ) p(z (τ) ) (11.13) z z (τ+1) z (τ+1) = z (τ) 11.2.1 p(z (m+1) z (1),, z (m) ) = p(z (m+1) z (m) ) (11.14) p(z (0) ) T m (z (m), z (m+1) ) p(z (m+1) p(z (m) ) m p(z (m+1) ) = z (m) p(z (m+1) z (m) )p(z (m) ) (11.15) p (z) = T(z, z)p (z ) (11.16) z p (z) p (z) p (z)t(z, z ) = p (z )T(z, z) (11.17) p (z) p (z )T(z, z) = z z p (z)t(z, z ) = p (z) p(z z) = p (z) (11.18) z 87
11.2.2 Metropolis-Hastings p(z) = p(z)/z p 1. z (0) 2. z (τ) q(z z (τ) ) z (τ+1) 3. ( A(z, z (τ) ) = min 1, p(z )q(z (τ) z ) ) p(z (τ) )q(z z τ ) (11.19) z z (τ+1) z (τ+1) = z (τ) Metropolis T(z, z ) = q(z z)a(z, z) (11.20) p(z)q(z z)a(z z) = min(p(z)q(z z), p(z )q(z z )) = min(p(z )q(z z ), p(z)q(z z)) = p(z )q(z z )A(z, z ) (11.21) Metropolis-Hastings p(z) 11.3 p(z) = p(z 1,, z M ) 1. z (0) 2. τ = 1,, T z (τ+1) 1 p(z 1 z (τ) 2,, z(τ) M ). z (τ+1) j. z (τ+1) M p(z j z (τ+1) 1,, z (τ+1) j 1,, z(τ) j+1,, z(τ) M ) p(z M z (τ+1) 1,, z (τ+1) M 1 ) 88
q(z z) = p(z k z \k) Metropolis-Hastings z \k z z k p(z )q(z z ) p(z)q(z z) = p(z k z \k )p(z \k )p(z k z \k ) p(z k z \k )p(z \k )p(z k z \k) = 1 (11.22) A(z, z) = 1 z \k = z \k 11.4 p(z) u 1/Z p 0 u p(z) ˆp(z, u) = (11.23) 0 0 u p(z) z 0 u p(z) u u {z : p(z) > u} z 11.5 11.5.1 11.5.2 11.6 89
12 12.1 12.1.1 D {x n } M < D D u 1 u T 1 x n x = 1 N x n (12.1) u T 1 x n 1 N {u T 1 x n u T 1 x}2 = u T 1 Su 1 (12.2) S = 1 N (x n x)(x n x) T (12.3) u u T 1 Su 1 + λ 1 (1 u T 1 u 1) (12.4) Su 1 = λ 1 u 1 (12.5) u T 1 u T 1 Su 1 = λ 1 (12.6) u 1 S 12.1.2 D u T i u j = δ i j (12.7) 90
x n = D α ni u i (12.8) i=1 x n = D (x T n u i )u i (12.9) i=1 M x n = M D z ni u i + b i u i (12.10) i=1 i=m+1 {z ni } {b i } J = 1 N x n x n 2 (12.11) {u i }, {z ni }, {b i } {z ni } b i J x n z n j = x T n u j b j = x T u j (12.12) D x n x n = {(x n x) T u i }u i (12.13) i=m+1 J = 1 N D D (x T n u i x T u i ) 2 = u T i Su i (12.14) i=m+1 i=m+1 u i (i > M) S D J = λ i (12.15) i=m+1 91
12.1.3 x n x n = x = D ( x T u i )u i (12.16) i=1 M (x T n u i )u i + i=1 = x + D ( x T n u i )u i i=m+1 M (x T n u i x T n u i )u i (12.17) D M i=1 ρ i j = 1 N (x ni x i ) ρ i (x n j x j ) ρ j (12.18) SU = UL U L y n = L 1/2 U T (x n x) (12.19) 0 1 N y n yn T = I (12.20) 12.1.4 D X n (x n x) T N D S = N 1 X T X 1 N XT Xu i = λ i u i (12.21) D D N N 1 D N + 1 0 0 X 1 N XXT (Xu i ) = λ(xu i ) (12.22) N 92
12.2 M z p(z) x p(x z) p(z) = N(z 0, I) p(x z) = N(x W z + µ, σ 2 I) (12.23) p(x) = p(x z)p(z)dz = N(x µ, C) C = W W T + σ 2 I (12.24) p(x) µ, W, σ 2 W W W T R W = W R p(z x) = N(z M 1 W T (x µ), σ 2 M 1 ) M = W T W + σ 2 I (12.25) 12.2.1 X = {x n } ln p(x µ, W, σ 2 ) = ln p(x n W, µ, σ 2 ) = ND 2 ln(2π) N 2 ln C 1 2 (x n µ) T C 1 (x n µ) (12.26) µ µ ML = 1 N x n (12.27) W W ML = U M (L M σ 2 I) 1/2 R (12.28) 93
U M D M S M M L M M M W σ 2 σ 2 ML = 1 D M D i=m+1 λ i (12.29) σ 2 0 (12.24) E[z x] = M 1 WML T (x x) (12.30) E[z x] (WML T W ML) 1 WML T (x x) = L M UM T (x x) (12.31) A {x Ax = 0} R M R D 12.2.2 EM EM EM ln p(x, Z µ, W, σ 2 ) = {ln p(x n z n ) + ln p(z n )} (12.32) Q(θ, θ old ) = E[ln p(x, Z µ, W, σ 2 )] { D = 2 ln(2πσ2 ) + 1 2 Tr(E[z nzn T ]) + + 1 2σ x 2 n µ 2 1 σ 2 E[z]T W T (x n µ) 1 2σ Tr(E[z nz T 2 n ]W T W ) + M } 2 ln(2π) (12.33) 94
µ E[z n ] = M 1 old W T old (x n x) E[z n z T n ] = σ 2 old M 1 old + E[z n]e[z n ] T (12.34) Q 1 W new = (x n x)e[z n ] T E[z n zn T ] σ new = 1 ND { xn x 2 2E[z n ] T Wnew(x T n x) + Tr(E[z n z T n ]W T neww new ) } (12.35) 12.2.3 12.2.4 D Ψ p(x z) = N(x W z + µ, Ψ) (12.36) p(x) = N(x µ, C) C = W W T + Ψ (12.37) µ x W, Ψ EM E E[z n ] = G old W T old Ψ 1 old (x n x) E[z n z T n ] = G old + E[z n ]E[z n ] T G = (I + W T Ψ 1 W ) 1 (12.38) W new = (x n x)e[z n ] T E[z n zn T ] Ψ new = diag S W 1 new N 1 E[z n ](x n x) T (12.39) 95
12.3 {x n } n x n = 0 M ϕ(x) M M n ϕ(x n ) = 0 C = 1 N ϕ(x n )ϕ(x n ) T (12.40) i = 1,, M C 1 N Cv i = λ i v i (12.41) ϕ(x n ){ϕ(x n ) T v i } = λ i v i (12.42) v i ϕ(x n ) v i = a in ϕ(x n ) (12.43) ({ϕ(x n )} N a in ) 1 N ϕ(x n )ϕ(x n ) T a im ϕ(x m ) = λ i a in ϕ(x n ) (12.44) ϕ(x l ) T ϕ(x n ) T ϕ(x n ) = k(x n, x m ) 1 N m=1 m=1 k(x l, x n ) a im k(x n, x m ) = λ i a in k(x l, x n ) (12.45) 0 1 = v T i v i = m=1 K 2 a i = λ i NKa i (12.46) Ka i = λ i Na i (12.47) a in a im ϕ(x n ) T ϕ(x m ) = a T i Ka i = λ i Na T i a i (12.48) 96
x i y i (x) = ϕ(x) T v i = a in ϕ(x) T ϕ(x) = a in k(x, x n ) (12.49) ϕ(x n ) 0 ϕ(x n ) = ϕ(x n ) 1 N ϕ(x l ) (12.50) l=1 K nm = ϕ(x n ) T ϕ(x m ) = (x n ) T ϕ(x m ) 1 N 1 N (x n ) T ϕ(x l ) l=1 (x l ) T ϕ(x m ) + 1 (x N 2 j ) T ϕ(x l ) l=1 j=1 l=1 = k(x n, x m ) 1 N 1 N k(x l, x m ) l=1 k(x n, x l ) + 1 N 2 l=1 k(x j, x l ) (12.51) j=1 l=1 K = K I N K K1 N + 1 N K1 N (12.52) 1 N 1/N 12.4 12.4.1 12.4.2 97
12.4.3 13 13.1 N p(x 1,, x N ) = p(x 1 ) p(x N x 1,, x n 1 ) (13.1) n=2 N p(x 1,, x N ) = p(x 1 ) p(x n x n 1 ) (13.2) n n 1 p(x n x 1,, x n 1 ) = p(x n x n 1 ) (13.3) n=2 N p(x 1,, x N ) = p(x 1 )p(x 2 x 1 ) p(x n x n 1, x n 2 ) (13.4) n=3 x n z n N N p(x 1,, x N, z 1,, z N ) = p(z 1 ) p(z n z n 1 ) p(x n z n ) (13.5) n=2 z n z n 1 z n+1 z n+1 z n 1 z n (13.6) 98
13.2 z n K A jk p(z nk = 1 z n 1, j = 1) p(z n z n 1, A) = π p(z 1 π) = K k=1 K k=1 K j=1 A z n 1, jz nk jk (13.7) π z 1k k (13.8) z n x n ϕ p(x n z n, ϕ) = K p(x n ϕ k ) z nk (13.9) k=1 A ϕ n N N p(x, Z θ) = p(z 1 π) p(z n z n 1, A) p(x m z m, ϕ) (13.10) n=2 θ = {π, A, ϕ} A k j A jk left-to-righthmm m=1 13.2.1 HMM X = {x 1,, x N } HMM EM Q(θ, θ old ) = p(z X, θ old ) ln p(x, Z θ) = Z p(z X, θ old ) ln p(z 1 π) + ln p(z n z n 1, A) + ln p(x n z n, ϕ) Z n=2 (13.11) γ(z n ) = p(z n X, θ old ) ξ(z n 1, z n ) = p(z n 1, z n X, θ old ) (13.12) γ(z nk ) = p(z nk = 1 X, θ old ) ξ(z n 1, j, z nk ) = p(z n 1, j = z nk = 1 X, θ old ) (13.13) 99
Q(θ, θ old ) = + K K K γ(z 1k ) ln π k + ξ(z n 1, j, z nk ) ln A jk k=1 n=2 j=1 k=1 K γ(z nk ) ln p(x n ϕ k ) (13.14) k=1 M γ(z n ) ξ(z n 1, z n ) θ = {π, A, ϕ} Q(θ, θ old ) π k = A jk = γ(z 1k ) Kj=1 γ(z 1 j ) N n=2 ξ(z n 1, jz nk ) Kl=1 N n=2 ξ(z n 1, jz nl ) (13.15) 13.2.2 EM γ(z nk ) ξ(z n 1, j, z nk ) p(x z n ) = p(x 1,, x n z n )p(x n+1,, x N z n ) p(x 1,, x n 1 x n, z n ) = p(x 1,, x n 1 z n ) p(x 1,, x n 1 z n 1, z n ) = p(x 1,, x n 1 z n 1 ) p(x n+1,, x N z n z n+1 ) = p(z n+1,, x N z n+1 ) p(x n+2,, x N z n+1 x n+1 ) = p(z n+2,, x N z n+1 ) p(x z n 1 x n ) = p(x 1,, x n 1 z n 1 )p(x n z n )p(x n+1,, x N z n ) p(x N+1 X, z N+1 = p(x N+1 z N+1 ) p(z N+1 z N, X) = p(z N+1 z N ) (13.16) γ(z n ) γ(z n ) = p(z n X) = p(x z n)p(z n ) p(x) = p(x 1,, x n, z n )p(x n+1,, x N ) = α(z n)β(z n ) p(x) p(x) (13.17) α(z n ) p(x 1,, x n, z n ) β(z n ) p(x n+1,, x N z n ) (13.18) 100
α, β β(z n ) α(z n ) = p(x 1,, x n, z n ) = p(x 1,, x n z n )p(z n ) = p(x n z n )p(x 1,, x n 1 z n )p(z n ) = p(x n z n ) p(x 1,, x n 1, z n 1, z n ) z n 1 = p(x n z n ) p(x 1,, x n 1, z n z n 1 )p(z n 1 ) z n 1 = p(x n z n ) p(x 1,, x n 1 z n 1 )p(z n z n 1 )p(z n 1 ) z n 1 = p(x n z n ) p(x 1,, x n 1, z n 1 )p(z n z n 1 ) z n 1 = p(x n z n ) α(z n 1 )p(z n z n 1 ) (13.19) z n 1 α(z 1 ) = p(x 1, z 1 ) = p(z 1 )p(x 1 z 1 ) = K {π k p(x 1 ϕ k )} z 1k (13.20) k=1 β(z n ) = p(x n+1,, x N z n ) = p(x n+1,, x N, z n+1 z n ) z n+1 = p(x n+1,, x N z n, z n+1 )p(z n+1 z n ) z n+1 = p(x n+1,, x N z n+1 )p(z n+1 z n ) z n+1 = p(x n+1,, x N z n+1 )p(x n+1 z n+1 )p(z n+1 z n ) z n+1 = β(z n+1 )p(x n+1 z n+1 )p(z n+1 z n ) (13.21) z n+1 (13.33) n = N α p(z N X) = p(x, z N)β(z N ) p(x) β(z N ) = 1 (13..33) z n p(x) = α(z n )β(z n ) z n = (13.22) z N α(z N ) (13.23) 101
ξ(z n 1, z n ) ξ(z n 1, z n ) = p(z n 1, z X) = p(x z n 1, z n )p(z n 1, z n ) p(x) = p(x,, x n 1 z n 1 )p(x n z n )p(x n+1,, x N z n )p(z n z n 1 )p(z n 1 ) p(x) = α(z n 1 p(x n z n )p(z n z n 1 )β(z n ) p(x) p(x N+1 X) = = = = = = (13.24) p(x N+1, z N+1 X) z N+1 p(x N+1 z N+1 )p(z N+1 X) z N+1 p(x N+1 z N+1 ) p(z N+1, z N X) z N+1 z N p(x N+1 z N+1 ) p(z N+1 z N )p(z N X) z N+1 z N p(x N+1 z N+1 ) p(z N+1 z N ) p(z N, X) p(x) z N+1 z N 1 p(x N+1 z N+1 ) p(z N+1 z N )α(z N ) (13.25) p(x) z N+1 z N 13.2.3 HMM 13.2.4 α(z n ) ˆα(z n ) = p(z n x 1,, x n ) = α(z n ) p(x 1,, x n ) (13.26) c n = p(x n x 1,, x n 1 ) (13.27) n p(x 1,, x n ) = c m (13.28) m=1 102
n α(z n ) = p(z n x 1,, x n )p(x 1,, x n ) = c m ˆα(z n) (13.29) α c n ˆα(z n ) = p(x n z n ) ˆα(z n 1 )p(z n z n 1 ) (13.30) z n 1 β ˆβ(z n ) = β(z n ) N m=n+1 c = m m=1 p(x n+1,, x N z n ) p(x n+1,, x N x 1,, x n ) (13.31) c n+1 ˆβ(z n ) = ˆβ(z n+1 )p(x n+1 z n+1 )p(z n+1 z n ) (13.32) z n+1 p(x) = N c n γ(z n ) = ˆα(z n )ˆβ(z n ) ξ(z n 1, z n ) = (c n ) 1 ˆα(z n 1 )p(x n z n )p(z n z n 1 )ˆβ(z n ) (13.33) 13.2.5 Viterbi {x 1,, x N } z n w(z n ) = max z 1,,z n 1 ln p(x 1,, x n, z 1,, z n ) (13.34) w(z n+1 ) = max z 1,,z n ln p(x 1,, x n+1, z 1,, z n+1 ) = max z 1,,z n [ ln p(x1,, x n, z 1,, z n ) + ln p(z n+1 z n ) + ln p(x n+1 z n+1 ) ] = ln p(x n+1 z n+1 ) + max z n {ln p(z n+1 z n ) + w(z n )} (13.35) w(z 1 ) = ln p(z 1 ) + ln p(x 1 z 1 ) (13.36) n = 1 max z n w(z n ) = max p(x, Z) (13.37) Z 103
13.2.6 13.3 p(z n z n 1 ) = N(z n Az n 1, Γ) p(x n z n ) = N(x n Cz n, Σ) p(z 1 ) = N(z 1 µ 0, P 0 ) (13.38) θ = {A, Γ, C, Σ, µ 0, P 0 } 13.3.1 LDS LDS Linear Dynamical System HMM ˆα(z n ) = p(z n x 1,, x n ) (13.39) ˆα(z n ) = N(z n µ n, V n ) (13.40) HMM c n ˆα(z n ) = p(x n z n ) ˆα(z n 1 )p(z n z n 1 )dz n 1 (13.41) µ n = Aµ n 1 + K n (x n CAµ n 1 ) V n = (I K n C)P n 1 c n = N(x n CAµ n 1, CP n 1 C T + Σ) (13.42) P n 1 = AV n 1 A T + Γ K n = P n 1 C T (CP n 1 C T + Σ) 1 (13.43) c 1 ˆα(z 1 ) = p(z 1 )p(x 1 z 1 ) (13.44) 104
µ 1 = µ 0 + K 1 (x 1 Cµ 0 ) V 1 = (I K 1 C)P 0 c 1 = N(x 1 Cµ 0, CP n 1 C T + Σ) K 1 = P 0 C T (CP 0 C T + Σ) 1 (13.45) γ(z n ) = p(z n X) (13.46) γ(z n ) = ˆα(z n )ˆβ(z n ) = (z n ˆµ n, ˆV n ) (13.47) ˆβ(z n ) ˆµ n, Vˆ n c n+1 ˆβ(z n ) = ˆβ(z n+1 )p(x n+1 z n+1 )p(z n+1 z n )dz n+1 (13.48) ˆµ = µ n + J n ( ˆµ n+1 Aµ n ) ˆV n = V n + J n ( ˆV n+1 P n )Jn T (13.49) J n = V n A T (P n ) 1 (13.50) ξ(z n 1, z n ) = (c n ) 1 ˆα(z n 1 )p(x n z n )p(z n z n 1 )ˆβ(z n ) = N(z n 1 µ n 1, V n 1 )N(z n Az n 1, Γ)N(x n Cz n, Σ)N(z n ˆµ n, ˆV n ) c n ˆα(z n ) (13.51) [ ˆµ n 1, ˆµ n ] T cov[z n 1, z n ] = J n 1 ˆV n (13.52) 13.3.2 LDS EM θ = {A, Γ, C, Σ, µ 0, P 0 } p(z X, θ old ) 105
E[z n ] = µ ˆ n E[z n z T n 1 ] = ˆV n J T n 1 + ˆµ n ˆµ T n 1 E[z n z T n ] = ˆV n + ˆµ n ˆµ T n (13.53) ln p(x, Z θ) = ln p(z 1 µ 0, P 0 ) + + ln p(z n z n 1, A, Γ) n=2 ln p(x n z n, C, Σ) (13.54) Q(θ, θ old ) = ln p(x, Z θ)p(z X, θ old )dz = E Z θ old[ln p(x, Z θ)] (13.55) µ 0 P 0 Q(θ, θ old ) = 1 [ ] 1 2 ln P 0 E Z θ old 2 (z 1 µ 0 ) T P0 1 (z 1 µ 0 ) + const (13.56) 2.3.4 µ new 0 = E[z 1 ] P new 0 = E[z 1 z T 1 ] E[z 1]E[z T 1 ] (13.57) A Γ Q(θ, θ old ) = N 1 2 ln Γ E Z θ old 1 2 (z n Az n 1 ) T Γ 1 (z n Az n 1 ) + const n=2 (13.58) A A new = E[z n zn 1 T ] E[z n 1 zn 1 T ] n=2 n=2 1 (13.59) 106
Q(θ, θ old ) = E Z θ old 1 [z(n) i z(n 1) h A ih ]Γ 1 i j 2 [z(n) j A jk z(n 1) k ] + const n=2 Q(θ, θ old ) = 1 A αβ 2 E Z θ old z(n 1) β Γ 1 α j [z(n) j A jk z(n 1) k ] n=2 + [z(n) i z(n 1) h A ih ]Γ 1 iα z(n 1) ] β = E Z θ old z(n 1) β Γ 1 α j [z(n) j A jk z(n 1) k ] A Q(θ, θ old) = E Z θ old n=2 z n 1 [Γ 1 (z n Az n 1 )] T n=2 = E Z θ old z n 1 (z n Az n 1 ) T (Γ 1 ) T = 0 (13.60) E Z θ old n=2 (z n Az n 1 )zn 1 T = 0 (13.61) n=2 z n z(n) Γ Γ new = 1 N 1 { E[zn zn T ] A new E[z n 1 zn T ] n=2 E[z n z T n 1 ](Anew ) T + A new E[z n 1 z T n 1 ](Anew ) T } (13.62) Γ 1 i j = Γ 1 iα Γ Γ 1 β j αβ Γ αβ ln Γ = Γ 1 βα (13.63) Q(θ, θ old ) = N 1 Γ 1 βα Γ αβ 2 + 1 2 E Z θ old [z(n) i z(n 1) h A T hi ]Γ 1 iα Γ 1 β j [z(n) j A jk z(n 1) k ] n=2 Γ Q(θ, θ old) = N 1 Γ 1 + E 2 Z θ old Γ 1 (z n Az n 1 )(zn T zn 1 T AT )Γ 1 = 0 (13.64) Γ = n=2 1 N 1 E Z θ old [ (zn Az n 1 )(z T n z T n 1 AT ) ] (13.65) C Σ Q(θ, θ old ) = N 2 ln Σ E Z θ 1 old (x n Cz n ) T Σ 1 (x n Cz n ) 2 + const n=2 (13.66) 107
C new = Σ new = 1 N x n E[zn T ] E[z n zn T ] 1 { E[xn x T n ] C new E[z n ]x T n x n E[z T n ](C new ) T + C new E[z n z T n ](C new ) T } (13.67) 13.3.3 LDS 13.3.4 X n p(z n X n ) L z n E[ f (z n )] = f (z n )p(z n X n )dz n = f (z n )p(z n x n, X n 1 )dz n = f (z n ) p(x n, z n X n 1 ) dz n p(x n X n 1 ) = f (zn )p(x z n )p(z n X n 1 )dz n p(xn z n )p(z n X n 1 )dz n = l l=1 f (z(l) n )p(x n z n (l) ) l l=1 p(x n z n (l) ) l w n (l) f (z n (l) ) (13.68) l=1 w n (l) p(x n z n (l) ) = Lm=1 p(x n z n (m) ) (13.69) 14 108
14.1 p(x) = = p(x, z) z K π k N(x µ k, Σ k ) (14.1) k=1 X = {x 1,, x N } N N p(x) = p(x n ) = p(x n, z n ) (14.2) z n x n z n h = 1,, H p(h) H p(x) = p(x h)p(h) (14.3) k=1 p(h X) 14.2 X = {x 1,, x N } M N N M y m (x) y COM (x) = 1 y m (x) (14.4) M h(x) y m (x) = h(x) + ϵ m (x) (14.5) [ E x {ym (x) h(x)} 2] = E x [ϵ m (x) 2 ] (14.6) m=1 E AV = 1 M E x [ϵ m (x) 2 ] (14.7) M m=1 109
1 M E COM = E x 2 y m (x) h(x) M m=1 1 M = E x 2 ϵ m (x) M m=1 (14.8) 0 E x [ϵ m (x)] = 0 E x [ϵ m (x)ϵ l (x)] = 0 (14.9) E COM = 1 M E AV (14.10) 14.3 t 1,, t N x 1,, x N y(x) { 1, 1} AdaBoost 1. n = 1,, N {w n } w (1) n 2. m = 1,, M (a) y m (x) J m = = 1/N w (m) n I(y m (x n ) t n ) (14.11) I(y m (x n ) t n ) y m (x n ) t n 1 0 (b) N ϵ m = w(m) n I(y m (x n ) t n ) N w(m) n { } 1 ϵm α m = ln ϵ m (14.12) (c) w (m+1) n = w (m) n exp{α n I(y m (x n ) t n )} (14.13) 3. M Y M (x) = sign α m y m (x) (14.14) m=1 110