From 3c5e41078677253e41926140f0a38dda65eb45b8 Mon Sep 17 00:00:00 2001
From: shivasubrahmanya <shivakc004@gmail.com>
Date: Wed, 31 Dec 2025 22:36:32 +0530
Subject: [PATCH 1/3] Add Gaussian Naive Bayes classifier

---
 machine_learning/naive_bayes.py | 110 ++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 machine_learning/naive_bayes.py

diff --git a/machine_learning/naive_bayes.py b/machine_learning/naive_bayes.py
new file mode 100644
index 000000000000..1e6855d39b75
--- /dev/null
+++ b/machine_learning/naive_bayes.py
@@ -0,0 +1,110 @@
+"""
+Naive Bayes Classifier implementation.
+
+This module implements Gaussian Naive Bayes from scratch without using
+external machine learning libraries.
+
+References:
+https://en.wikipedia.org/wiki/Naive_Bayes_classifier
+"""
+
+from typing import List, Dict
+import math
+
+
+def gaussian_probability(x: float, mean: float, variance: float) -> float:
+    """
+    Calculate Gaussian probability density.
+
+    >>> round(gaussian_probability(1.0, 1.0, 1.0), 3)
+    0.399
+    >>> gaussian_probability(1.0, 1.0, 0.0)
+    0.0
+    """
+    if variance == 0:
+        return 0.0
+
+    exponent = math.exp(-((x - mean) ** 2) / (2 * variance))
+    return (1 / math.sqrt(2 * math.pi * variance)) * exponent
+
+
+class GaussianNaiveBayes:
+    """
+    Gaussian Naive Bayes classifier.
+    """
+
+    def __init__(self) -> None:
+        self.class_priors: Dict[int, float] = {}
+        self.means: Dict[int, List[float]] = {}
+        self.variances: Dict[int, List[float]] = {}
+
+    def fit(self, features: List[List[float]], labels: List[int]) -> None:
+        """
+        Train the Gaussian Naive Bayes classifier.
+
+        :param features: Feature matrix
+        :param labels: Class labels
+        :raises ValueError: If input sizes mismatch
+
+        >>> model = GaussianNaiveBayes()
+        >>> model.fit([[1.0], [2.0], [3.0]], [0, 0, 1])
+        """
+        if len(features) != len(labels):
+            raise ValueError("Features and labels must have the same length")
+
+        separated: Dict[int, List[List[float]]] = {}
+        for feature_vector, label in zip(features, labels):
+            separated.setdefault(label, []).append(feature_vector)
+
+        total_samples = len(labels)
+
+        for label, rows in separated.items():
+            self.class_priors[label] = len(rows) / total_samples
+
+            transposed = list(zip(*rows))
+            self.means[label] = [sum(col) / len(col) for col in transposed]
+
+            self.variances[label] = [
+                sum((x - mean) ** 2 for x in col) / len(col)
+                for col, mean in zip(transposed, self.means[label])
+            ]
+
+    def predict(self, features: List[List[float]]) -> List[int]:
+        """
+        Predict class labels for input features.
+
+        :param features: Feature matrix
+        :return: Predicted labels
+
+        >>> model = GaussianNaiveBayes()
+        >>> X = [[1.0], [2.0], [3.0], [4.0]]
+        >>> y = [0, 0, 1, 1]
+        >>> model.fit(X, y)
+        >>> model.predict([[1.5], [3.5]])
+        [0, 1]
+        """
+        predictions: List[int] = []
+
+        for row in features:
+            class_scores: Dict[int, float] = {}
+
+            for label in self.class_priors:
+                score = math.log(self.class_priors[label])
+
+                for index, value in enumerate(row):
+                    mean = self.means[label][index]
+                    variance = self.variances[label][index]
+                    probability = gaussian_probability(value, mean, variance)
+
+                    if probability > 0:
+                        score += math.log(probability)
+
+                class_scores[label] = score
+
+            predicted_label = max(
+                class_scores.items(),
+                key=lambda item: item[1],
+            )[0]
+            predictions.append(predicted_label)
+
+        return predictions

From 6dd885cecf6396b9fa33c57b6d8652a642f0538d Mon Sep 17 00:00:00 2001
From: shivasubrahmanya <shivakc004@gmail.com>
Date: Wed, 31 Dec 2025 22:41:18 +0530
Subject: [PATCH 2/3] Add Gaussian Naive Bayes classifier

---
 machine_learning/naive_bayes.py | 41 +++++++++++++++------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/machine_learning/naive_bayes.py b/machine_learning/naive_bayes.py
index 1e6855d39b75..9b48587c12f0 100644
--- a/machine_learning/naive_bayes.py
+++ b/machine_learning/naive_bayes.py
@@ -8,7 +8,7 @@
 https://en.wikipedia.org/wiki/Naive_Bayes_classifier
 """
 
-from typing import List, Dict
+from typing import Dict, List, Tuple
 import math
 
 
@@ -21,11 +21,12 @@ def gaussian_probability(x: float, mean: float, variance: float) -> float:
     >>> gaussian_probability(1.0, 1.0, 0.0)
     0.0
     """
-    if variance == 0:
+    if variance == 0.0:
         return 0.0
 
-    exponent = math.exp(-((x - mean) ** 2) / (2 * variance))
-    return (1 / math.sqrt(2 * math.pi * variance)) * exponent
+    exponent = math.exp(-((x - mean) ** 2) / (2.0 * variance))
+    coefficient = 1.0 / math.sqrt(2.0 * math.pi * variance)
+    return coefficient * exponent
 
 
 class GaussianNaiveBayes:
@@ -61,12 +62,11 @@ def fit(self, features: List[List[float]], labels: List[int]) -> None:
         for label, rows in separated.items():
             self.class_priors[label] = len(rows) / total_samples
 
-            transposed = list(zip(*rows))
-            self.means[label] = [sum(col) / len(col) for col in transposed]
-
+            columns = list(zip(*rows))
+            self.means[label] = [sum(col) / len(col) for col in columns]
             self.variances[label] = [
                 sum((x - mean) ** 2 for x in col) / len(col)
-                for col, mean in zip(transposed, self.means[label])
+                for col, mean in zip(columns, self.means[label])
             ]
 
     def predict(self, features: List[List[float]]) -> List[int]:
@@ -86,25 +86,22 @@ def predict(self, features: List[List[float]]) -> List[int]:
         predictions: List[int] = []
 
         for row in features:
-            class_scores: Dict[int, float] = {}
+            scores: List[Tuple[int, float]] = []
 
             for label in self.class_priors:
-                score = math.log(self.class_priors[label])
+                log_likelihood = math.log(self.class_priors[label])
 
                 for index, value in enumerate(row):
-                    mean = self.means[label][index]
-                    variance = self.variances[label][index]
-                    probability = gaussian_probability(value, mean, variance)
-
-                    if probability > 0:
-                        score += math.log(probability)
+                    probability = gaussian_probability(
+                        value,
+                        self.means[label][index],
+                        self.variances[label][index],
+                    )
+                    if probability > 0.0:
+                        log_likelihood += math.log(probability)
 
-                class_scores[label] = score
+                scores.append((label, log_likelihood))
 
-            predicted_label = max(
-                class_scores.items(),
-                key=lambda item: item[1],
-            )[0]
-            predictions.append(predicted_label)
+            predictions.append(max(scores, key=lambda pair: pair[1])[0])
 
         return predictions

From 5d3907f0886fe92d8aff2328bede9162dfddb7a1 Mon Sep 17 00:00:00 2001
From: shivasubrahmanya <shivakc004@gmail.com>
Date: Thu, 1 Jan 2026 21:48:34 +0530
Subject: [PATCH 3/3] Fix typing and formatting for Multinomial Naive Bayes

---
 machine_learning/multinomial_naive_bayes.py | 113 ++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 machine_learning/multinomial_naive_bayes.py

diff --git a/machine_learning/multinomial_naive_bayes.py b/machine_learning/multinomial_naive_bayes.py
new file mode 100644
index 000000000000..7912d15bc71e
--- /dev/null
+++ b/machine_learning/multinomial_naive_bayes.py
@@ -0,0 +1,113 @@
+"""
+Multinomial Naive Bayes Classifier implementation.
+
+This module implements Multinomial Naive Bayes from scratch without using
+external machine learning libraries. It is commonly used for text
+classification tasks such as spam detection.
+
+References:
+https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_bayes
+"""
+
+import math
+
+
+class MultinomialNaiveBayes:
+    """
+    Multinomial Naive Bayes classifier.
+    """
+
+    def __init__(self, alpha: float = 1.0) -> None:
+        """
+        Initialize the classifier.
+
+        :param alpha: Laplace smoothing parameter
+        """
+        if alpha <= 0:
+            raise ValueError("Alpha must be greater than 0")
+
+        self.alpha = alpha
+        self.class_priors: dict[int, float] = {}
+        self.feature_log_prob: dict[int, list[float]] = {}
+        self.num_features: int = 0
+
+    def fit(self, features: list[list[int]], labels: list[int]) -> None:
+        """
+        Train the Multinomial Naive Bayes classifier.
+
+        :param features: Feature matrix (counts of features)
+        :param labels: Class labels
+        :raises ValueError: If input sizes mismatch
+
+        >>> model = MultinomialNaiveBayes()
+        >>> X = [[2, 1], [1, 1], [0, 2]]
+        >>> y = [0, 0, 1]
+        >>> model.fit(X, y)
+        """
+        if len(features) != len(labels):
+            raise ValueError("Features and labels must have the same length")
+
+        if not features:
+            raise ValueError("Feature matrix must not be empty")
+
+        self.num_features = len(features[0])
+
+        separated: dict[int, list[list[int]]] = {}
+        for row, label in zip(features, labels):
+            separated.setdefault(label, []).append(row)
+
+        total_samples = len(labels)
+
+        for label, rows in separated.items():
+            self.class_priors[label] = math.log(len(rows) / total_samples)
+
+            feature_counts = [0] * self.num_features
+            total_count = 0
+
+            for row in rows:
+                for index, value in enumerate(row):
+                    feature_counts[index] += value
+                    total_count += value
+
+            self.feature_log_prob[label] = [
+                math.log(
+                    (count + self.alpha)
+                    / (total_count + self.alpha * self.num_features)
+                )
+                for count in feature_counts
+            ]
+
+    def predict(self, features: list[list[int]]) -> list[int]:
+        """
+        Predict class labels for input features.
+
+        :param features: Feature matrix
+        :return: Predicted labels
+
+        >>> model = MultinomialNaiveBayes()
+        >>> X = [[2, 1], [1, 1], [0, 2]]
+        >>> y = [0, 0, 1]
+        >>> model.fit(X, y)
+        >>> model.predict([[1, 0], [0, 2]])
+        [0, 1]
+        """
+        predictions: list[int] = []
+
+        for row in features:
+            class_scores: dict[int, float] = {}
+
+            for label in self.class_priors:
+                score = self.class_priors[label]
+
+                for index, value in enumerate(row):
+                    score += value * self.feature_log_prob[label][index]
+
+                class_scores[label] = score
+
+            predicted_label = max(
+                class_scores.items(),
+                key=lambda item: item[1],
+            )[0]
+            predictions.append(predicted_label)
+
+        return predictions