fix comment counting

ManfredHair · ManfredHair · commit 2af6b5bbb288 · 2025-03-31T19:59:06.000-03:00
diff --git a/spice/analyze.py b/spice/analyze.py
@@ -2,7 +2,7 @@
 
 # gustavo testando alguma coisa 
 from spice.analyzers.identation import detect_indentation
-
+from spice.utils.get_langague import detect_language
 
 
 # this is the analyze function
@@ -26,6 +26,8 @@ def analyze_file(file_path: str, selected_stats=None):
         "file_name": os.path.basename(file_path)
     }
     
+    LANG = detect_language(file_path)
+    
     # read the code file only once and load it into memory
     with open(file_path, "r", encoding="utf-8") as file:
         code = file.read()
@@ -38,7 +40,7 @@ def analyze_file(file_path: str, selected_stats=None):
     # comment line count if requested
     if "comment_line_count" in selected_stats:
         from spice.analyzers.count_comment_lines import count_comment_lines
-        results["comment_line_count"] = count_comment_lines(code)
+        results["comment_line_count"] = count_comment_lines(code, LANG)
 
     # @gtins botei sua funcao aqui pq ela usa o codigo raw e nao o tokenizado, ai so tirei ela ali de baixo pra nao ficar chamando o parser sem precisar
     # edit: ok i see whats going on, instead of appending the results to the resuls, this will itself print the results to the terminal
diff --git a/spice/analyzers/count_comment_lines.py b/spice/analyzers/count_comment_lines.py
@@ -1,18 +1,63 @@
-# this will count comment lines, since our AST/Parser doesn't include comment lines, this needs to be done in the tokenized output of the lexer
-# not sure about that first line, im pretty sure like about 200% sure this is analyzing the raw code and not the tokenized code but ok
+# this will count comment lines for Python, JavaScript, Ruby, and Go
 # COMMENT LINE IS A LINE THAT EXCLUSIVELY HAS A COMMENT
-# so like: y = 5 #sets y to 5 IS NOT A COMMENT LINE!!!!!!!!
-def count_comment_lines(code):
-    """Count lines that are exclusively comments (no code on the same line)"""
+def count_comment_lines(code, lang):
     # split the code into lines
     lines = code.splitlines()
     comment_count = 0
     
+    # Set language-specific comment markers
+    if lang.lower() == "python":
+        single_comment = "#"
+        multi_start = '"""'
+        alt_multi_start = "'''"
+    elif lang.lower() == "javascript" or lang.lower() == "go":
+        single_comment = "//"
+        multi_start = "/*"
+    elif lang.lower() == "ruby":
+        single_comment = "#"
+        multi_start = "=begin"
+    else:
+        raise ValueError(f"Unsupported language: {lang}")
+    
+    # Track if we're inside a multi-line comment
+    in_multi_comment = False
+    
     for line in lines:
         # Remove leading whitespace
         stripped = line.strip()
-        # Check if this line consists only of a comment
-        if stripped and stripped.startswith('#'):
+        
+        # Skip empty lines
+        if not stripped:
+            continue
+            
+        # Handle multi-line comment blocks
+        if in_multi_comment:
+            comment_count += 1
+            # Check for end of multi-line comment
+            if lang == "python" and (stripped.endswith('"""') or stripped.endswith("'''")):
+                in_multi_comment = False
+            elif (lang == "javascript" or lang == "go") and "*/" in stripped:
+                in_multi_comment = False
+            elif lang == "ruby" and stripped == "=end":
+                in_multi_comment = False
+            continue
+        
+        # Check for start of multi-line comment
+        if lang == "python" and (stripped.startswith('"""') or stripped.startswith("'''")):
+            in_multi_comment = True
+            comment_count += 1
+            continue
+        elif (lang == "javascript" or lang == "go") and stripped.startswith("/*"):
+            in_multi_comment = True
+            comment_count += 1
+            continue
+        elif lang == "ruby" and stripped == "=begin":
+            in_multi_comment = True
+            comment_count += 1
+            continue
+        
+        # Check for single-line comments
+        if stripped.startswith(single_comment):
             comment_count += 1
     
     return comment_count
diff --git a/spice/utils/get_langague.py b/spice/utils/get_langague.py
@@ -0,0 +1,20 @@
+import os
+
+def detect_language(file_path):
+    _, ext = os.path.splitext(file_path)
+
+    if ext == ".rb":
+        return "ruby"
+    elif ext == ".py":
+        return "python"
+    elif ext == ".js":
+        return "javascript"
+    elif ext == ".go":
+        return "go"
+    else:
+        raise ValueError(f"Unsupported file extension: {ext}")
+
+# Example usage:
+if __name__ == "__main__":
+    for path in ["example.py", "example.js", "example.rb", "example.go"]:
+        print(f"{path}: {detect_language(path)}")