Dimensionality Reduction with Principal Component Analysis : Quiz 1

Edited by / 박상은 (CheezEun) CheezEun

장성준 (junnei) junnei

- numpy - matplotlib.pyplot - scipy - sklearn
from sklearn.datasets import fetch_olivetti_faces

faces_all = fetch_olivetti_faces()

# Load Data and show face images

import matplotlib.pyplot as plt
import numpy as np

face_images = []
for K in range(10):
    face_images.append(faces_all.images[ == K][0])
face_images = np.array(face_images)

def face_plot(faces, H, W,, title=None):
    Plot face images
    Args :
        faces(numpy): The face images with size of (None, rows, cols)
        H    (int)  : # rows of subplots.
        W    (int)  : # cols of subplots.
        cmap ( )    : Color map of matplotlib
        title(list) : Figure titles
    assert(faces.shape[0] >= H*W), 'Please check H or W, H*W should be smaller than the number of face images'
    if title is None:
        title = [''] * faces.shape[0]
    elif len(title) < faces.shape[0]:
        title = title * 2
    fig = plt.figure(figsize=(10,5))
    plt.subplots_adjust(top=1, bottom=0, hspace=0, wspace=0.05)
    for n in range(H*W):
        ax = fig.add_subplot(H, W, n+1)
        ax.imshow(faces[n], cmap = cmap)

face_plot(face_images, 2, 5)

from sklearn.decomposition import PCA

# PCA components 개수에 따른 정보 손실 정도 비교

def face_pca_recon(face_images, n_components):
    # (None, w, h) -> (None, w*h)
    face_datas = face_images.reshape(face_images.shape[0], -1)
    # Setting PCA object
    pca = PCA(n_components=n_components)
    # Encode with PCA
    pca_face_datas = pca.fit_transform(face_datas)
    # Decode with PCA
    face_datas_inv = pca.inverse_transform(pca_face_datas)
    # Recover into image; (None, w*h) -> (None, w, h)
    face_images_inv = face_datas_inv.reshape(face_images.shape)
    return face_images_inv

face_images_inv = face_pca_recon(face_images, 2)
face_plot(face_images_inv, 2, 5)

face_images_inv = face_pca_recon(face_images, 5)
face_plot(face_images_inv, 2, 5)

face_images_inv = face_pca_recon(face_images, 10)
face_plot(face_images_inv, 2, 5)

# 각각의 component가 어떤 feature를 이끌어내는가?
#   component의 번호가 클 수록(즉, 작은 eigenvalue에 해당하는 eigenvector일수록)

pca = PCA(n_components = 5)
pca.fit_transform(face_images.reshape(face_images.shape[0], -1))

_, h, w = face_images.shape

face_mean = pca.mean_.reshape(h,w)
face_cp1 = pca.components_[0].reshape(h,w)
face_cp2 = pca.components_[1].reshape(h,w)

faces = np.array([face_mean, face_cp1, face_cp2])
face_plot(faces, 1, 3, title=['Mean', 'Component 1', 'Component 2'])

def pca_feature(pca, component):

    faces = []
    title = []
    w = np.array(range(-5, 7, 1))
    face_cp = pca.components_[component - 1].reshape(64,64)
    for n in range(12):
        faces.append(face_mean + w[n] * face_cp)
        title.append('Weight : ' + str(w[n]))

    faces = np.array(faces)
    face_plot(faces, 2, 6, title=title)

pca_feature(pca, 1)





