← 포트폴리오 홈으로 돌아가기

머신러닝 프로젝트 라이브러리

직접 구현한 머신러닝 알고리즘과 인터랙티브 예측 모델을 확인해보세요.

항공 사고 생존율 예측 시뮬레이터

Random Forest 모델을 기반으로, 다양한 비행 상황을 직접 입력하여 생존율을 예측해볼 수 있는 인터랙티브 웹 애플리케이션입니다.

시뮬레이터 실행하기 →

알고리즘 구현 코드

다중 선형 회귀

집값 예측 모델

K-최근접 이웃 (KNN)

붓꽃 품종 분류

결정 트리

붓꽃 품종 분류

로지스틱 회귀

붓꽃 품종 이진 분류

One-vs-All 로지스틱 회귀

붓꽃 품종 다중 분류

import pandas as pd import numpy as np class MultipleLinearRegression: def __init__(self): self.weights = None self.bias = 0 def fit(self, X, y): X_with_bias = np.hstack((np.ones((X.shape[0], 1)), X)) self.weights = np.linalg.inv(X_with_bias.T @ X_with_bias) @ X_with_bias.T @ y def predict(self, X): X_with_bias = np.hstack((np.ones((X.shape[0], 1)), X)) return X_with_bias @ self.weights def score(self, X, y): predictions = self.predict(X) ss_res = np.sum((y - predictions) ** 2) ss_tot = np.sum((y - np.mean(y)) ** 2) return 1 - ss_res / ss_tot

import numpy as np from collections import Counter class KNNClassifier: def __init__(self, k=5): self.k = k def fit(self, X, y): self.X_train = X self.y_train = y def predict(self, X): return [self._predict_single(x) for x in X] def _predict_single(self, x): distances = [np.sqrt(np.sum((x_train - x)**2)) for x_train in self.X_train] k_indices = np.argsort(distances)[:self.k] k_nearest_labels = [self.y_train[i] for i in k_indices] most_common = Counter(k_nearest_labels).most_common(1) return most_common[0][0]

import numpy as np class Node: def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None): self.feature = feature self.threshold = threshold self.left = left self.right = right self.value = value class DecisionTreeClassifier: def __init__(self, max_depth=100, min_samples_split=2): self.max_depth = max_depth self.min_samples_split = min_samples_split self.root = None def fit(self, X, y): self.root = self._grow_tree(X, y) def _grow_tree(self, X, y, depth=0): n_samples, n_features = X.shape n_labels = len(np.unique(y)) if (depth >= self.max_depth or n_labels == 1 or n_samples < self.min_samples_split): leaf_value = self._most_common_label(y) return Node(value=leaf_value) feat_idxs = np.random.choice(n_features, n_features, replace=False) best_feat, best_thresh = self._best_criteria(X, y, feat_idxs) left_idxs, right_idxs = self._split(X[:, best_feat], best_thresh) left = self._grow_tree(X[left_idxs, :], y[left_idxs], depth + 1) right = self._grow_tree(X[right_idxs, :], y[right_idxs], depth + 1) return Node(best_feat, best_thresh, left, right) def _best_criteria(self, X, y, feat_idxs): best_gain = -1 split_idx, split_thresh = None, None for feat_idx in feat_idxs: X_column = X[:, feat_idx] thresholds = np.unique(X_column) for threshold in thresholds: gain = self._information_gain(y, X_column, threshold) if gain > best_gain: best_gain = gain split_idx = feat_idx split_thresh = threshold return split_idx, split_thresh def _information_gain(self, y, X_column, split_thresh): parent_entropy = self._entropy(y) left_idxs, right_idxs = self._split(X_column, split_thresh) if len(left_idxs) == 0 or len(right_idxs) == 0: return 0 n = len(y) n_l, n_r = len(left_idxs), len(right_idxs) e_l, e_r = self._entropy(y[left_idxs]), self._entropy(y[right_idxs]) child_entropy = (n_l / n) * e_l + (n_r / n) * e_r ig = parent_entropy - child_entropy return ig def _split(self, X_column, split_thresh): left_idxs = np.argwhere(X_column <= split_thresh).flatten() right_idxs = np.argwhere(X_column > split_thresh).flatten() return left_idxs, right_idxs def _entropy(self, y): hist = np.bincount(y) ps = hist / len(y) return -np.sum([p * np.log2(p) for p in ps if p > 0]) def _most_common_label(self, y): from collections import Counter counter = Counter(y) most_common = counter.most_common(1)[0][0] return most_common def predict(self, X): return np.array([self._traverse_tree(x, self.root) for x in X]) def _traverse_tree(self, x, node): if node.value is not None: return node.value if x[node.feature] <= node.threshold: return self._traverse_tree(x, node.left) return self._traverse_tree(x, node.right)

import numpy as np class LogisticRegression: def __init__(self, lr=0.01, n_iters=1000): self.lr = lr self.n_iters = n_iters self.weights = None self.bias = None def fit(self, X, y): n_samples, n_features = X.shape self.weights = np.zeros(n_features) self.bias = 0 for _ in range(self.n_iters): linear_model = np.dot(X, self.weights) + self.bias y_predicted = self._sigmoid(linear_model) dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y)) db = (1 / n_samples) * np.sum(y_predicted - y) self.weights -= self.lr * dw self.bias -= self.lr * db def predict(self, X): linear_model = np.dot(X, self.weights) + self.bias y_predicted = self._sigmoid(linear_model) y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted] return np.array(y_predicted_cls) def _sigmoid(self, z): return 1 / (1 + np.exp(-z))

import numpy as np class LogisticRegression: # ... (Same as the binary Logistic Regression class) ... def __init__(self, lr=0.01, n_iters=1000): self.lr, self.n_iters = lr, n_iters def fit(self, X, y): n_samples, n_features = X.shape self.weights = np.zeros(n_features); self.bias = 0 for _ in range(self.n_iters): linear = np.dot(X, self.weights) + self.bias; y_pred = 1 / (1 + np.exp(-linear)) dw = (1/n_samples) * np.dot(X.T, (y_pred - y)); db = (1/n_samples) * np.sum(y_pred - y) self.weights -= self.lr * dw; self.bias -= self.lr * db def predict_proba(self, X): return 1 / (1 + np.exp(-(np.dot(X, self.weights) + self.bias))) class OneVsAll: def __init__(self, n_classes, lr=0.01, n_iters=1000): self.n_classes = n_classes self.lr = lr self.n_iters = n_iters self.classifiers = [] def fit(self, X, y): for i in range(self.n_classes): y_i = np.where(y == i, 1, 0) classifier = LogisticRegression(self.lr, self.n_iters) classifier.fit(X, y_i) self.classifiers.append(classifier) def predict(self, X): probabilities = np.array([clf.predict_proba(X) for clf in self.classifiers]) return np.argmax(probabilities, axis=0)