From 3c5e41078677253e41926140f0a38dda65eb45b8 Mon Sep 17 00:00:00 2001 From: shivasubrahmanya Date: Wed, 31 Dec 2025 22:36:32 +0530 Subject: [PATCH 1/3] Add Gaussian Naive Bayes classifier --- machine_learning/naive_bayes.py | 110 ++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 machine_learning/naive_bayes.py diff --git a/machine_learning/naive_bayes.py b/machine_learning/naive_bayes.py new file mode 100644 index 000000000000..1e6855d39b75 --- /dev/null +++ b/machine_learning/naive_bayes.py @@ -0,0 +1,110 @@ +""" +Naive Bayes Classifier implementation. + +This module implements Gaussian Naive Bayes from scratch without using +external machine learning libraries. + +References: +https://en.wikipedia.org/wiki/Naive_Bayes_classifier +""" + +from typing import List, Dict +import math + + +def gaussian_probability(x: float, mean: float, variance: float) -> float: + """ + Calculate Gaussian probability density. + + >>> round(gaussian_probability(1.0, 1.0, 1.0), 3) + 0.399 + >>> gaussian_probability(1.0, 1.0, 0.0) + 0.0 + """ + if variance == 0: + return 0.0 + + exponent = math.exp(-((x - mean) ** 2) / (2 * variance)) + return (1 / math.sqrt(2 * math.pi * variance)) * exponent + + +class GaussianNaiveBayes: + """ + Gaussian Naive Bayes classifier. + """ + + def __init__(self) -> None: + self.class_priors: Dict[int, float] = {} + self.means: Dict[int, List[float]] = {} + self.variances: Dict[int, List[float]] = {} + + def fit(self, features: List[List[float]], labels: List[int]) -> None: + """ + Train the Gaussian Naive Bayes classifier. + + :param features: Feature matrix + :param labels: Class labels + :raises ValueError: If input sizes mismatch + + >>> model = GaussianNaiveBayes() + >>> model.fit([[1.0], [2.0], [3.0]], [0, 0, 1]) + """ + if len(features) != len(labels): + raise ValueError("Features and labels must have the same length") + + separated: Dict[int, List[List[float]]] = {} + for feature_vector, label in zip(features, labels): + separated.setdefault(label, []).append(feature_vector) + + total_samples = len(labels) + + for label, rows in separated.items(): + self.class_priors[label] = len(rows) / total_samples + + transposed = list(zip(*rows)) + self.means[label] = [sum(col) / len(col) for col in transposed] + + self.variances[label] = [ + sum((x - mean) ** 2 for x in col) / len(col) + for col, mean in zip(transposed, self.means[label]) + ] + + def predict(self, features: List[List[float]]) -> List[int]: + """ + Predict class labels for input features. + + :param features: Feature matrix + :return: Predicted labels + + >>> model = GaussianNaiveBayes() + >>> X = [[1.0], [2.0], [3.0], [4.0]] + >>> y = [0, 0, 1, 1] + >>> model.fit(X, y) + >>> model.predict([[1.5], [3.5]]) + [0, 1] + """ + predictions: List[int] = [] + + for row in features: + class_scores: Dict[int, float] = {} + + for label in self.class_priors: + score = math.log(self.class_priors[label]) + + for index, value in enumerate(row): + mean = self.means[label][index] + variance = self.variances[label][index] + probability = gaussian_probability(value, mean, variance) + + if probability > 0: + score += math.log(probability) + + class_scores[label] = score + + predicted_label = max( + class_scores.items(), + key=lambda item: item[1], + )[0] + predictions.append(predicted_label) + + return predictions From 6dd885cecf6396b9fa33c57b6d8652a642f0538d Mon Sep 17 00:00:00 2001 From: shivasubrahmanya Date: Wed, 31 Dec 2025 22:41:18 +0530 Subject: [PATCH 2/3] Add Gaussian Naive Bayes classifier --- machine_learning/naive_bayes.py | 41 +++++++++++++++------------------ 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/machine_learning/naive_bayes.py b/machine_learning/naive_bayes.py index 1e6855d39b75..9b48587c12f0 100644 --- a/machine_learning/naive_bayes.py +++ b/machine_learning/naive_bayes.py @@ -8,7 +8,7 @@ https://en.wikipedia.org/wiki/Naive_Bayes_classifier """ -from typing import List, Dict +from typing import Dict, List, Tuple import math @@ -21,11 +21,12 @@ def gaussian_probability(x: float, mean: float, variance: float) -> float: >>> gaussian_probability(1.0, 1.0, 0.0) 0.0 """ - if variance == 0: + if variance == 0.0: return 0.0 - exponent = math.exp(-((x - mean) ** 2) / (2 * variance)) - return (1 / math.sqrt(2 * math.pi * variance)) * exponent + exponent = math.exp(-((x - mean) ** 2) / (2.0 * variance)) + coefficient = 1.0 / math.sqrt(2.0 * math.pi * variance) + return coefficient * exponent class GaussianNaiveBayes: @@ -61,12 +62,11 @@ def fit(self, features: List[List[float]], labels: List[int]) -> None: for label, rows in separated.items(): self.class_priors[label] = len(rows) / total_samples - transposed = list(zip(*rows)) - self.means[label] = [sum(col) / len(col) for col in transposed] - + columns = list(zip(*rows)) + self.means[label] = [sum(col) / len(col) for col in columns] self.variances[label] = [ sum((x - mean) ** 2 for x in col) / len(col) - for col, mean in zip(transposed, self.means[label]) + for col, mean in zip(columns, self.means[label]) ] def predict(self, features: List[List[float]]) -> List[int]: @@ -86,25 +86,22 @@ def predict(self, features: List[List[float]]) -> List[int]: predictions: List[int] = [] for row in features: - class_scores: Dict[int, float] = {} + scores: List[Tuple[int, float]] = [] for label in self.class_priors: - score = math.log(self.class_priors[label]) + log_likelihood = math.log(self.class_priors[label]) for index, value in enumerate(row): - mean = self.means[label][index] - variance = self.variances[label][index] - probability = gaussian_probability(value, mean, variance) - - if probability > 0: - score += math.log(probability) + probability = gaussian_probability( + value, + self.means[label][index], + self.variances[label][index], + ) + if probability > 0.0: + log_likelihood += math.log(probability) - class_scores[label] = score + scores.append((label, log_likelihood)) - predicted_label = max( - class_scores.items(), - key=lambda item: item[1], - )[0] - predictions.append(predicted_label) + predictions.append(max(scores, key=lambda pair: pair[1])[0]) return predictions From 5d3907f0886fe92d8aff2328bede9162dfddb7a1 Mon Sep 17 00:00:00 2001 From: shivasubrahmanya Date: Thu, 1 Jan 2026 21:48:34 +0530 Subject: [PATCH 3/3] Fix typing and formatting for Multinomial Naive Bayes --- machine_learning/multinomial_naive_bayes.py | 113 ++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 machine_learning/multinomial_naive_bayes.py diff --git a/machine_learning/multinomial_naive_bayes.py b/machine_learning/multinomial_naive_bayes.py new file mode 100644 index 000000000000..7912d15bc71e --- /dev/null +++ b/machine_learning/multinomial_naive_bayes.py @@ -0,0 +1,113 @@ +""" +Multinomial Naive Bayes Classifier implementation. + +This module implements Multinomial Naive Bayes from scratch without using +external machine learning libraries. It is commonly used for text +classification tasks such as spam detection. + +References: +https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_bayes +""" + +import math + + +class MultinomialNaiveBayes: + """ + Multinomial Naive Bayes classifier. + """ + + def __init__(self, alpha: float = 1.0) -> None: + """ + Initialize the classifier. + + :param alpha: Laplace smoothing parameter + """ + if alpha <= 0: + raise ValueError("Alpha must be greater than 0") + + self.alpha = alpha + self.class_priors: dict[int, float] = {} + self.feature_log_prob: dict[int, list[float]] = {} + self.num_features: int = 0 + + def fit(self, features: list[list[int]], labels: list[int]) -> None: + """ + Train the Multinomial Naive Bayes classifier. + + :param features: Feature matrix (counts of features) + :param labels: Class labels + :raises ValueError: If input sizes mismatch + + >>> model = MultinomialNaiveBayes() + >>> X = [[2, 1], [1, 1], [0, 2]] + >>> y = [0, 0, 1] + >>> model.fit(X, y) + """ + if len(features) != len(labels): + raise ValueError("Features and labels must have the same length") + + if not features: + raise ValueError("Feature matrix must not be empty") + + self.num_features = len(features[0]) + + separated: dict[int, list[list[int]]] = {} + for row, label in zip(features, labels): + separated.setdefault(label, []).append(row) + + total_samples = len(labels) + + for label, rows in separated.items(): + self.class_priors[label] = math.log(len(rows) / total_samples) + + feature_counts = [0] * self.num_features + total_count = 0 + + for row in rows: + for index, value in enumerate(row): + feature_counts[index] += value + total_count += value + + self.feature_log_prob[label] = [ + math.log( + (count + self.alpha) + / (total_count + self.alpha * self.num_features) + ) + for count in feature_counts + ] + + def predict(self, features: list[list[int]]) -> list[int]: + """ + Predict class labels for input features. + + :param features: Feature matrix + :return: Predicted labels + + >>> model = MultinomialNaiveBayes() + >>> X = [[2, 1], [1, 1], [0, 2]] + >>> y = [0, 0, 1] + >>> model.fit(X, y) + >>> model.predict([[1, 0], [0, 2]]) + [0, 1] + """ + predictions: list[int] = [] + + for row in features: + class_scores: dict[int, float] = {} + + for label in self.class_priors: + score = self.class_priors[label] + + for index, value in enumerate(row): + score += value * self.feature_log_prob[label][index] + + class_scores[label] = score + + predicted_label = max( + class_scores.items(), + key=lambda item: item[1], + )[0] + predictions.append(predicted_label) + + return predictions