diff --git a/fluents/lib/engines.py b/fluents/lib/engines.py index 821dc2b..508027d 100644 --- a/fluents/lib/engines.py +++ b/fluents/lib/engines.py @@ -19,14 +19,10 @@ def pca(a, aopt, scale='scores', mode='normal'): m, n = a.shape if m*10.>n: - v, s, ut = dot(a.T, a) - s = sqrt(s) - eigvals = s - u = u.T - vt = v.T + u, s, vt = esvd(a) else: u, s, vt = svd(a, full_matrices=0) - eigvals = (1./m)*s + eigvals = (1./m)*s T = u*s T = T[:,:aopt] P = vt[:aopt,:].T @@ -231,3 +227,29 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0): def m_shape(array): return matrix(array).shape + +def esvd(data,economy=1): + """SVD with the option of economy sized calculation + Calculate subspaces of X'X or XX' depending on the shape + of the matrix. + + Good for extreme fat or thin matrices + + Numpy supports this by setting full_matrices=0 + """ + m, n = data.shape + if m>=n: + u, s, vt = svd(dot(data.T, data)) + u = dot(data, vt.T) + v = vt.T + for i in xrange(n): + s[i] = norm(u[:,i]) + u[:,i] = u[:,i]/s[i] + else: + u, s, vt = svd(data, data.T) + v = dot(u.T, data) + for i in xrange(m): + s[i] = norm(v[i,:]) + v[i,:] = v[i,:]/s[i] + + return u, s, v