Fuzzy C Means

Algoritma FCM (Fuzzy C-Means) Clustering adalah salah satu algoritma yang digunakan dalam pengolahan citra. . Algoritma ini merupakan penggabungan dari Algoritma Fuzzy Logic dan Algoritma K-Means Clustering. K-Means Clustering adalah salah satu algoritma klasifikasi data yang cukup banyak dipakai untuk memecahkan masalah. Hanya saja metode tersebut tidak memiliki nilai pengembalian berupa sebuah nilai pembanding untuk masing-masing cluster, sehingga digunakan algoritma Fuzzy untuk menghitung skor dari sebuah data.

Contoh Penggunaan FCM pada data:

1. Persiapkan Environment

import pandas as pd
from pandas import DataFrame
import random
import numpy as np
from IPython.display import HTML, display
from tabulate import tabulate
from math import log
from sklearn.feature_selection import mutual_info_classif

def table(df): display(HTML(tabulate(df, tablefmt='html', headers='keys', showindex=False)))
Data = pd.read_csv('data1.csv', sep=',')
Data = Data[['O-Ring','Thermal', 'Temperature', 'Leak', 'Temporal']].sample(6, random_state=42)
D = Data.values
print("Table (D) >>")
table(D)
Table (D) >>
0 1 2 3 4
6 0 75200 16
6 1 63200 10
6 0 66 50 1
6 1 57200 9
6 0 81200 18
6 0 67200 13
n, m, c, w, T, e, P0, t = *D.shape, 3, 2, 10, 0.1, 0, 1
print("Variables >>")
print(" n = %d\n m = %d\n c = %d\n w = %d\n T = %d\n e = %f\n P0 = %d\n t = %d" % (n, m, c, w, T, e, P0, t))

Dimana n = Jumlah Sampel

m = Jumlah Fitur

c = Jumlah Cluster

w =Tingkat blur/fuzzy

T = Batas maks Iterasi

e = Akurasi

Pt = Fungsi Objektif ke-1

t = Iterasi ke-t

Variables >>
 n = 6
 m = 5
 c = 3
 w = 2
 T = 10
 e = 0.100000
 P0 = 0
 t = 1

2. Membuat Matriks derajat cluster

random.seed(42)
U = np.array([[random.uniform(0, 1) for _ in range(c)] for _ in range(n)])
print("U >>\n")
print(U)
U >>

[[0.6394268  0.02501076 0.27502932]
 [0.22321074 0.73647121 0.67669949]
 [0.89217957 0.08693883 0.42192182]
 [0.02979722 0.21863797 0.50535529]
 [0.02653597 0.19883765 0.64988444]
 [0.54494148 0.22044062 0.58926568]]

3. Menghitung Pusat Cluster

# Caution: NP Array is math-agnostic (column-by-column)
def cluster(U, D, x, y): return sum([U[i,y]**w*D[i,x] for i in range(n)])/sum([U[i,y]**w for i in range(n)])
V = np.array([[cluster(U,D,x,y) for x in range(m)] for y in range(c)])
print("V >>\n")
print(V)
V >>

[[6.00000000e+00 3.26487360e-02 6.84657455e+01 1.23129308e+02
  7.54380404e+00]
 [6.00000000e+00 8.59703678e-01 6.39459452e+01 1.98348517e+02
  1.05098259e+01]
 [6.00000000e+00 4.10760713e-01 6.81254962e+01 1.84623123e+02
  1.17372391e+01]]

4. Hitung Fungsi Objektif pada t

def objective(V,U,D): return sum([sum([sum([(D[i,j]-V[k,j])**2 for j in range(m)])*(U[i,k]**w) for k in range(c)]) for i in range(n)])
Pt = objective(V,U,D)
print("Pt >>\n")
print(Pt)
Pt >>

12771.30605980444

5. Hitung Ulang Matrik Derajat Cluster

def converge(V,D,i,k): return (sum([(D[i,j]-V[k,j])**2 for j in range(m)])**(-1/(w-1)))/sum([sum([(D[i,j]-V[k,j])**2 for j in range(m)])**(-1/(w-1)) for k in range(c)])
print("U >>\n")
np.array([[converge(V,D,i,k) for k in range(c)] for i in range(n)])
U >>


array([[1.67779651e-02, 6.48642159e-01, 3.34579876e-01],
       [6.46312425e-04, 9.84911124e-01, 1.44425634e-02],
       [6.49354017e-01, 1.58552395e-01, 1.92093588e-01],
       [7.64136468e-03, 8.66887418e-01, 1.25471217e-01],
       [3.06668583e-02, 5.40465577e-01, 4.28867564e-01],
       [2.95350488e-03, 9.23775417e-01, 7.32710785e-02]])

6. Cek apakah sudah berhenti atau loop kembali

def iterate(U):
    V = np.array([[cluster(U, D, x, y) for x in range(m)] for y in range(c)])
    return np.array([[converge(V,D,i,k) for k in range(c)] for i in range(n)]), objective(V,U,D)

def fuzzyCM(U):
    #U = np.array([[random.uniform(0, 1) for _ in range(c)] for _ in range(n)])

    U, P2, P, t = *iterate(U), 0, 1
    while abs(P2 - P) > e and t < T:
        U, P2, P, t = *iterate(U), P2, t+1
    return U, t

FuzzyResult, FuzzyIters = fuzzyCM(U)
print("Iterating %d times, fuzzy result >> \n" % FuzzyIters)
print(FuzzyResult)
Iterating 7 times, fuzzy result >>

[[3.65865187e-04 4.10626890e-02 9.58571446e-01]
 [6.31837603e-05 9.94611632e-01 5.32518423e-03]
 [1.00000000e+00 8.69545397e-12 8.58646829e-12]
 [1.09520345e-03 9.48952481e-01 4.99523153e-02]
 [4.81513773e-04 2.62180565e-02 9.73300430e-01]
 [1.16981231e-03 7.98268525e-01 2.00561663e-01]]

7. Ambil Nilai Terbesar pada kolom sebagai cluster

table(DataFrame([D[i].tolist()+[np.argmax(FuzzyResult[i].tolist())] for i in range(n)], columns=Data.columns.tolist()+["Cluster Index"]))
O-Ring Thermal Temperature Leak Temporal Cluster Index
6 0 75 200 16 2
6 1 63 200 10 1
6 0 66 50 1 0
6 1 57 200 9 1
6 0 81 200 18 2
6 0 67 200 13 1