From 79b97411af586acdb36dfc4a8003a48779901547 Mon Sep 17 00:00:00 2001
From: yanfeng <winnelc@126.com>
Date: Mon, 9 Feb 2026 13:58:10 +0800
Subject: [PATCH 1/5] feat(auto_cl): add error rate threshold for punishment
 attenuation

Add new GFlag `auto_cl_error_rate_punish_threshold` to enable
error-rate-based punishment attenuation in AutoConcurrencyLimiter.

Problem: Low error rates (e.g., 1.3% sporadic timeouts) cause
disproportionate avg_latency inflation (+31%), leading the limiter
to mistakenly shrink max_concurrency and trigger ELIMIT rejections.

Solution: Inspired by Alibaba Sentinel's threshold-based approach:
- threshold=0 (default): Original behavior preserved (backward compat)
- threshold>0 (e.g., 0.1): Error rates below threshold produce zero
  punishment; above it, punishment scales linearly from 0 to full

Example: With threshold=0.1, a 5% error rate produces no punishment,
while a 50% error rate produces 44% of the original punishment.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/brpc/policy/auto_concurrency_limiter.cpp  |  30 ++-
 ...brpc_auto_concurrency_limiter_unittest.cpp | 208 ++++++++++++++++++
 2 files changed, 237 insertions(+), 1 deletion(-)
 create mode 100644 test/brpc_auto_concurrency_limiter_unittest.cpp
diff --git a/src/brpc/policy/auto_concurrency_limiter.cpp b/src/brpc/policy/auto_concurrency_limiter.cpp
index dd5a02ec99..220d8a1693 100644
--- a/src/brpc/policy/auto_concurrency_limiter.cpp
+++ b/src/brpc/policy/auto_concurrency_limiter.cpp
@@ -77,6 +77,14 @@ DEFINE_int32(auto_cl_latency_fluctuation_correction_factor, 1,
              "the value, the higher the tolerance for the fluctuation of the "
              "latency. If the value is too large, the latency will be higher "
              "when the server is overloaded.");
+DEFINE_double(auto_cl_error_rate_punish_threshold, 0,
+              "Threshold for error-rate-based punishment attenuation. "
+              "0 (default): no effect, original punishment logic is used. "
+              "> 0 (e.g. 0.1): error rates below this threshold produce zero "
+              "punishment; above it the punishment scales linearly from 0 to "
+              "full strength. Only effective when auto_cl_enable_error_punish "
+              "is true. Example: 0.1 means error rates below 10%% are not "
+              "punished.");
 
 AutoConcurrencyLimiter::AutoConcurrencyLimiter()
     : _max_concurrency(FLAGS_auto_cl_initial_max_concurrency)
@@ -236,7 +244,27 @@ void AutoConcurrencyLimiter::AdjustMaxConcurrency(int next_max_concurrency) {
 void AutoConcurrencyLimiter::UpdateMaxConcurrency(int64_t sampling_time_us) {
     int32_t total_succ_req = _total_succ_req.load(butil::memory_order_relaxed);
     double failed_punish = _sw.total_failed_us * FLAGS_auto_cl_fail_punish_ratio;
-    int64_t avg_latency = 
+
+    // Threshold-based attenuation: when auto_cl_error_rate_punish_threshold > 0,
+    // attenuate punishment based on error rate. Inspired by Sentinel's threshold-
+    // based circuit breaker: low error rates should not inflate avg_latency.
+    // Above threshold, punishment scales linearly from 0 to full strength.
+    // When threshold is 0 (default), this block is skipped entirely.
+    if (FLAGS_auto_cl_error_rate_punish_threshold > 0 && _sw.failed_count > 0) {
+        double threshold = FLAGS_auto_cl_error_rate_punish_threshold;
+        double error_rate = static_cast<double>(_sw.failed_count) /
+            (_sw.succ_count + _sw.failed_count);
+        if (error_rate <= threshold) {
+            // Error rate within dead zone, cancel punishment.
+            failed_punish = 0;
+        } else {
+            // Linear ramp: 0 at threshold, 1.0 at 100% error rate.
+            double punish_factor = (error_rate - threshold) / (1.0 - threshold);
+            failed_punish *= punish_factor;
+        }
+    }
+
+    int64_t avg_latency =
         std::ceil((failed_punish + _sw.total_succ_us) / _sw.succ_count);
     double qps = 1000000.0 * total_succ_req / (sampling_time_us - _sw.start_time_us);
     UpdateMinLatency(avg_latency);
diff --git a/test/brpc_auto_concurrency_limiter_unittest.cpp b/test/brpc_auto_concurrency_limiter_unittest.cpp
new file mode 100644
index 0000000000..7bece930c9
--- /dev/null
+++ b/test/brpc_auto_concurrency_limiter_unittest.cpp
@@ -0,0 +1,208 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "brpc/policy/auto_concurrency_limiter.h"
+#include "butil/time.h"
+#include "bthread/bthread.h"
+#include <gtest/gtest.h>
+
+namespace brpc {
+namespace policy {
+
+DECLARE_int32(auto_cl_sample_window_size_ms);
+DECLARE_int32(auto_cl_min_sample_count);
+DECLARE_int32(auto_cl_max_sample_count);
+DECLARE_bool(auto_cl_enable_error_punish);
+DECLARE_double(auto_cl_fail_punish_ratio);
+DECLARE_double(auto_cl_error_rate_punish_threshold);
+
+}  // namespace policy
+}  // namespace brpc
+
+class AutoConcurrencyLimiterTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+        // Save original values
+        orig_sample_window_size_ms_ = brpc::policy::FLAGS_auto_cl_sample_window_size_ms;
+        orig_min_sample_count_ = brpc::policy::FLAGS_auto_cl_min_sample_count;
+        orig_max_sample_count_ = brpc::policy::FLAGS_auto_cl_max_sample_count;
+        orig_enable_error_punish_ = brpc::policy::FLAGS_auto_cl_enable_error_punish;
+        orig_fail_punish_ratio_ = brpc::policy::FLAGS_auto_cl_fail_punish_ratio;
+        orig_error_rate_threshold_ = brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold;
+
+        // Set test-friendly values
+        brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 1000;
+        brpc::policy::FLAGS_auto_cl_min_sample_count = 5;
+        brpc::policy::FLAGS_auto_cl_max_sample_count = 200;
+        brpc::policy::FLAGS_auto_cl_enable_error_punish = true;
+        brpc::policy::FLAGS_auto_cl_fail_punish_ratio = 1.0;
+    }
+
+    void TearDown() override {
+        // Restore original values
+        brpc::policy::FLAGS_auto_cl_sample_window_size_ms = orig_sample_window_size_ms_;
+        brpc::policy::FLAGS_auto_cl_min_sample_count = orig_min_sample_count_;
+        brpc::policy::FLAGS_auto_cl_max_sample_count = orig_max_sample_count_;
+        brpc::policy::FLAGS_auto_cl_enable_error_punish = orig_enable_error_punish_;
+        brpc::policy::FLAGS_auto_cl_fail_punish_ratio = orig_fail_punish_ratio_;
+        brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = orig_error_rate_threshold_;
+    }
+
+private:
+    int32_t orig_sample_window_size_ms_;
+    int32_t orig_min_sample_count_;
+    int32_t orig_max_sample_count_;
+    bool orig_enable_error_punish_;
+    double orig_fail_punish_ratio_;
+    double orig_error_rate_threshold_;
+};
+
+// Helper function to add samples and trigger window completion
+void AddSamplesAndTriggerWindow(brpc::policy::AutoConcurrencyLimiter& limiter,
+                                 int succ_count, int64_t succ_latency,
+                                 int fail_count, int64_t fail_latency) {
+    int64_t now = butil::gettimeofday_us();
+
+    // Add successful samples
+    for (int i = 0; i < succ_count; ++i) {
+        limiter.AddSample(0, succ_latency, now);
+    }
+    // Add failed samples
+    for (int i = 0; i < fail_count; ++i) {
+        limiter.AddSample(1, fail_latency, now);
+    }
+
+    // Wait for window to expire and trigger update
+    bthread_usleep(brpc::policy::FLAGS_auto_cl_sample_window_size_ms * 1000 + 1000);
+
+    // Add one more sample to trigger window submission
+    limiter.AddSample(0, succ_latency, butil::gettimeofday_us());
+}
+
+// Test: When threshold is 0 (default), behavior is unchanged - punishment is applied
+TEST_F(AutoConcurrencyLimiterTest, ThresholdZeroPreservesOriginalBehavior) {
+    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0;
+    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;  // Short window for testing
+
+    brpc::policy::AutoConcurrencyLimiter limiter;
+
+    AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000);
+
+    // With threshold=0, failed_punish should NOT be attenuated
+    // avg_latency = (10*1000 + 90*100) / 90 = (10000 + 9000) / 90 = 211us
+    // This is significantly inflated from the actual success latency of 100us
+    // _min_latency_us should reflect this inflation
+    ASSERT_GT(limiter._min_latency_us, 150);  // Should be inflated
+}
+
+// Test: When error rate is below threshold, punishment is zero
+TEST_F(AutoConcurrencyLimiterTest, BelowThresholdZeroPunishment) {
+    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.2;  // 20% threshold
+    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
+
+    brpc::policy::AutoConcurrencyLimiter limiter;
+
+    AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000);
+
+    // With 10% error rate < 20% threshold, punishment should be zero
+    // avg_latency should be close to actual success latency of 100us
+    ASSERT_LT(limiter._min_latency_us, 150);  // Should NOT be inflated
+    ASSERT_GT(limiter._min_latency_us, 50);   // Should be valid (around 100us)
+}
+
+// Test: When error rate is above threshold, punishment scales linearly
+TEST_F(AutoConcurrencyLimiterTest, AboveThresholdLinearScaling) {
+    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;  // 10% threshold
+    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
+
+    brpc::policy::AutoConcurrencyLimiter limiter;
+
+    AddSamplesAndTriggerWindow(limiter, 50, 100, 50, 1000);
+
+    // With 50% error rate > 10% threshold:
+    // punish_factor = (0.5 - 0.1) / (1.0 - 0.1) = 0.4 / 0.9 = 0.444
+    // failed_punish = 50 * 1000 * 1.0 * 0.444 = 22222us
+    // avg_latency = (22222 + 50*100) / 50 = (22222 + 5000) / 50 = 544us
+    // This should be inflated, but less than threshold=0 case
+    ASSERT_GT(limiter._min_latency_us, 200);  // Should be somewhat inflated
+}
+
+// Test: Edge case - error rate exactly at threshold
+TEST_F(AutoConcurrencyLimiterTest, ExactlyAtThresholdZeroPunishment) {
+    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;  // 10% threshold
+    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
+
+    brpc::policy::AutoConcurrencyLimiter limiter;
+
+    AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000);
+
+    // At exactly threshold, punishment should be zero (boundary case)
+    // avg_latency should be close to actual success latency of 100us
+    ASSERT_LT(limiter._min_latency_us, 150);
+}
+
+// Test: No failed requests - threshold has no effect
+TEST_F(AutoConcurrencyLimiterTest, NoFailedRequestsThresholdNoEffect) {
+    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;
+    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
+
+    brpc::policy::AutoConcurrencyLimiter limiter;
+
+    AddSamplesAndTriggerWindow(limiter, 100, 100, 0, 0);
+
+    // No failed requests, so threshold logic shouldn't trigger
+    ASSERT_GT(limiter._min_latency_us, 0);    // Should have valid latency
+    ASSERT_LT(limiter._min_latency_us, 150);  // Should be close to 100us
+}
+
+// Test: Compare punishment at different thresholds for same error rate
+TEST_F(AutoConcurrencyLimiterTest, DifferentThresholdsDifferentPunishment) {
+    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
+
+    // Test with threshold = 0 (original behavior)
+    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0;
+    brpc::policy::AutoConcurrencyLimiter limiter1;
+    AddSamplesAndTriggerWindow(limiter1, 95, 100, 5, 1000);  // 5% error rate
+    int64_t latency_threshold_0 = limiter1._min_latency_us;
+
+    // Test with threshold = 0.1 (5% < 10%, in dead zone)
+    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;
+    brpc::policy::AutoConcurrencyLimiter limiter2;
+    AddSamplesAndTriggerWindow(limiter2, 95, 100, 5, 1000);  // 5% error rate
+    int64_t latency_threshold_10 = limiter2._min_latency_us;
+
+    // With threshold=0, latency should be inflated
+    // With threshold=0.1 and 5% error rate (below threshold), latency should not be inflated
+    ASSERT_GT(latency_threshold_0, latency_threshold_10);
+}
+
+// Test: Verify linear scaling formula
+TEST_F(AutoConcurrencyLimiterTest, LinearScalingFormula) {
+    // At 90% error rate, punishment factor should be 0.889
+    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;
+    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
+
+    brpc::policy::AutoConcurrencyLimiter limiter;
+
+    AddSamplesAndTriggerWindow(limiter, 10, 100, 90, 1000);
+
+    // With 90% error rate > 10% threshold:
+    // punish_factor = (0.9 - 0.1) / (1.0 - 0.1) = 0.8 / 0.9 = 0.889
+    // High punishment factor, latency should be significantly inflated
+    ASSERT_GT(limiter._min_latency_us, 500);
+}
+

From 5464889a142bfaf6cbd0a15c3181f08a9150f94b Mon Sep 17 00:00:00 2001
From: yanfeng <winnelc@126.com>
Date: Mon, 9 Feb 2026 18:18:35 +0800
Subject: [PATCH 2/5] test(auto_cl): improve unit tests based on code review

- Use synthetic timestamps instead of sleeping for deterministic tests
- Fix trigger sample counting to preserve exact error rates
- Consolidate 7 tests to 4 core tests with two-sided assertions
- Add expected value range validation in assertions

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 ...brpc_auto_concurrency_limiter_unittest.cpp | 137 +++++++-----------
 1 file changed, 49 insertions(+), 88 deletions(-)

diff --git a/test/brpc_auto_concurrency_limiter_unittest.cpp b/test/brpc_auto_concurrency_limiter_unittest.cpp
index 7bece930c9..b5e2f7a35c 100644
--- a/test/brpc_auto_concurrency_limiter_unittest.cpp
+++ b/test/brpc_auto_concurrency_limiter_unittest.cpp
@@ -72,13 +72,17 @@ class AutoConcurrencyLimiterTest : public ::testing::Test {
 };
 
 // Helper function to add samples and trigger window completion
+// Uses synthetic timestamps instead of sleeping for faster, deterministic tests.
+// The final successful sample is used as the trigger, so actual counts match
+// succ_count/fail_count exactly (preserving intended error rates).
 void AddSamplesAndTriggerWindow(brpc::policy::AutoConcurrencyLimiter& limiter,
                                  int succ_count, int64_t succ_latency,
                                  int fail_count, int64_t fail_latency) {
+    ASSERT_GT(succ_count, 0) << "Need at least 1 success to trigger window";
     int64_t now = butil::gettimeofday_us();
 
-    // Add successful samples
-    for (int i = 0; i < succ_count; ++i) {
+    // Add successful samples (reserve one for the trigger)
+    for (int i = 0; i < succ_count - 1; ++i) {
         limiter.AddSample(0, succ_latency, now);
     }
     // Add failed samples
@@ -86,123 +90,80 @@ void AddSamplesAndTriggerWindow(brpc::policy::AutoConcurrencyLimiter& limiter,
         limiter.AddSample(1, fail_latency, now);
     }
 
-    // Wait for window to expire and trigger update
-    bthread_usleep(brpc::policy::FLAGS_auto_cl_sample_window_size_ms * 1000 + 1000);
+    // Advance timestamp past window expiry instead of sleeping
+    int64_t after_window = now + brpc::policy::FLAGS_auto_cl_sample_window_size_ms * 1000 + 1000;
 
-    // Add one more sample to trigger window submission
-    limiter.AddSample(0, succ_latency, butil::gettimeofday_us());
+    // Use the final success sample to trigger window submission
+    limiter.AddSample(0, succ_latency, after_window);
 }
 
-// Test: When threshold is 0 (default), behavior is unchanged - punishment is applied
+// Test 1: Backward compatibility - threshold=0 preserves original punishment behavior
 TEST_F(AutoConcurrencyLimiterTest, ThresholdZeroPreservesOriginalBehavior) {
     brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0;
-    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;  // Short window for testing
+    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
 
     brpc::policy::AutoConcurrencyLimiter limiter;
-
     AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000);
 
-    // With threshold=0, failed_punish should NOT be attenuated
-    // avg_latency = (10*1000 + 90*100) / 90 = (10000 + 9000) / 90 = 211us
-    // This is significantly inflated from the actual success latency of 100us
-    // _min_latency_us should reflect this inflation
-    ASSERT_GT(limiter._min_latency_us, 150);  // Should be inflated
+    // 10% error rate, threshold=0 means full punishment applied
+    // avg_latency = (10*1000 + 90*100) / 90 = 211us
+    ASSERT_GT(limiter._min_latency_us, 180);
+    ASSERT_LT(limiter._min_latency_us, 250);
 }
 
-// Test: When error rate is below threshold, punishment is zero
+// Test 2: Dead zone - error rate below threshold produces zero punishment
 TEST_F(AutoConcurrencyLimiterTest, BelowThresholdZeroPunishment) {
     brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.2;  // 20% threshold
     brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
 
     brpc::policy::AutoConcurrencyLimiter limiter;
-
     AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000);
 
-    // With 10% error rate < 20% threshold, punishment should be zero
-    // avg_latency should be close to actual success latency of 100us
-    ASSERT_LT(limiter._min_latency_us, 150);  // Should NOT be inflated
-    ASSERT_GT(limiter._min_latency_us, 50);   // Should be valid (around 100us)
-}
-
-// Test: When error rate is above threshold, punishment scales linearly
-TEST_F(AutoConcurrencyLimiterTest, AboveThresholdLinearScaling) {
-    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;  // 10% threshold
-    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
-
-    brpc::policy::AutoConcurrencyLimiter limiter;
-
-    AddSamplesAndTriggerWindow(limiter, 50, 100, 50, 1000);
-
-    // With 50% error rate > 10% threshold:
-    // punish_factor = (0.5 - 0.1) / (1.0 - 0.1) = 0.4 / 0.9 = 0.444
-    // failed_punish = 50 * 1000 * 1.0 * 0.444 = 22222us
-    // avg_latency = (22222 + 50*100) / 50 = (22222 + 5000) / 50 = 544us
-    // This should be inflated, but less than threshold=0 case
-    ASSERT_GT(limiter._min_latency_us, 200);  // Should be somewhat inflated
+    // 10% error rate < 20% threshold, punishment should be zero
+    // avg_latency = 90*100 / 90 = 100us (no inflation)
+    ASSERT_GT(limiter._min_latency_us, 80);
+    ASSERT_LT(limiter._min_latency_us, 130);
 }
 
-// Test: Edge case - error rate exactly at threshold
+// Test 3: Boundary - error rate exactly at threshold produces zero punishment
 TEST_F(AutoConcurrencyLimiterTest, ExactlyAtThresholdZeroPunishment) {
     brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;  // 10% threshold
     brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
 
     brpc::policy::AutoConcurrencyLimiter limiter;
-
     AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000);
 
-    // At exactly threshold, punishment should be zero (boundary case)
-    // avg_latency should be close to actual success latency of 100us
-    ASSERT_LT(limiter._min_latency_us, 150);
+    // 10% error rate == 10% threshold, punishment should be zero
+    // avg_latency = 90*100 / 90 = 100us
+    ASSERT_GT(limiter._min_latency_us, 80);
+    ASSERT_LT(limiter._min_latency_us, 130);
 }
 
-// Test: No failed requests - threshold has no effect
-TEST_F(AutoConcurrencyLimiterTest, NoFailedRequestsThresholdNoEffect) {
-    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;
-    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
-
-    brpc::policy::AutoConcurrencyLimiter limiter;
-
-    AddSamplesAndTriggerWindow(limiter, 100, 100, 0, 0);
-
-    // No failed requests, so threshold logic shouldn't trigger
-    ASSERT_GT(limiter._min_latency_us, 0);    // Should have valid latency
-    ASSERT_LT(limiter._min_latency_us, 150);  // Should be close to 100us
-}
-
-// Test: Compare punishment at different thresholds for same error rate
-TEST_F(AutoConcurrencyLimiterTest, DifferentThresholdsDifferentPunishment) {
-    brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
-
-    // Test with threshold = 0 (original behavior)
-    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0;
-    brpc::policy::AutoConcurrencyLimiter limiter1;
-    AddSamplesAndTriggerWindow(limiter1, 95, 100, 5, 1000);  // 5% error rate
-    int64_t latency_threshold_0 = limiter1._min_latency_us;
-
-    // Test with threshold = 0.1 (5% < 10%, in dead zone)
-    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;
-    brpc::policy::AutoConcurrencyLimiter limiter2;
-    AddSamplesAndTriggerWindow(limiter2, 95, 100, 5, 1000);  // 5% error rate
-    int64_t latency_threshold_10 = limiter2._min_latency_us;
-
-    // With threshold=0, latency should be inflated
-    // With threshold=0.1 and 5% error rate (below threshold), latency should not be inflated
-    ASSERT_GT(latency_threshold_0, latency_threshold_10);
-}
-
-// Test: Verify linear scaling formula
-TEST_F(AutoConcurrencyLimiterTest, LinearScalingFormula) {
-    // At 90% error rate, punishment factor should be 0.889
-    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;
+// Test 4: Linear scaling - above threshold, punishment scales proportionally
+TEST_F(AutoConcurrencyLimiterTest, AboveThresholdLinearScaling) {
+    brpc::policy::FLAGS_auto_cl_error_rate_punish_threshold = 0.1;  // 10% threshold
     brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
 
-    brpc::policy::AutoConcurrencyLimiter limiter;
-
-    AddSamplesAndTriggerWindow(limiter, 10, 100, 90, 1000);
+    // Case A: 50% error rate
+    // punish_factor = (0.5 - 0.1) / (1.0 - 0.1) = 0.444
+    // failed_punish = 50 * 1000 * 0.444 = 22222us
+    // avg_latency = (22222 + 50*100) / 50 = 544us
+    {
+        brpc::policy::AutoConcurrencyLimiter limiter;
+        AddSamplesAndTriggerWindow(limiter, 50, 100, 50, 1000);
+        ASSERT_GT(limiter._min_latency_us, 450);
+        ASSERT_LT(limiter._min_latency_us, 650);
+    }
 
-    // With 90% error rate > 10% threshold:
-    // punish_factor = (0.9 - 0.1) / (1.0 - 0.1) = 0.8 / 0.9 = 0.889
-    // High punishment factor, latency should be significantly inflated
-    ASSERT_GT(limiter._min_latency_us, 500);
+    // Case B: 90% error rate (near full punishment)
+    // punish_factor = (0.9 - 0.1) / (1.0 - 0.1) = 0.889
+    // failed_punish = 90 * 1000 * 0.889 = 80000us
+    // avg_latency = (80000 + 10*100) / 10 = 8100us
+    {
+        brpc::policy::AutoConcurrencyLimiter limiter;
+        AddSamplesAndTriggerWindow(limiter, 10, 100, 90, 1000);
+        ASSERT_GT(limiter._min_latency_us, 7000);
+        ASSERT_LT(limiter._min_latency_us, 9000);
+    }
 }
 

From 76fdd95ef13e1ff10e36c1feab64dcf342587110 Mon Sep 17 00:00:00 2001
From: yanfeng <winnelc@126.com>
Date: Mon, 9 Feb 2026 18:20:06 +0800
Subject: [PATCH 3/5] docs(auto_cl): add parameter configuration section

- Document error punishment related GFlags
- Add detailed explanation for auto_cl_error_rate_punish_threshold
- Include table of all configurable parameters with defaults

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/cn/auto_concurrency_limiter.md | 44 +++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/docs/cn/auto_concurrency_limiter.md b/docs/cn/auto_concurrency_limiter.md
index 17ef5d7ec3..8768541130 100644
--- a/docs/cn/auto_concurrency_limiter.md
+++ b/docs/cn/auto_concurrency_limiter.md
@@ -154,3 +154,47 @@ netflix中的gradient算法公式为：max_concurrency = min_latency / latency *
 * gradient算法中的max_concurrency / latency从概念上和qps有关联（根据little's law)，但可能严重脱节。比如在重测
 min_latency前，若所有latency都小于min_latency，那么max_concurrency会不断下降甚至到0；但按照本算法，max_qps和min_latency仍然是稳定的，它们计算出的max_concurrency也不会剧烈变动。究其本质，gradient算法在迭代max_concurrency时，latency并不能代表实际并发为max_concurrency时的延时，两者是脱节的，所以max_concurrency / latency的实际物理含义不明，与qps可能差异甚大，最后导致了很大的偏差。
 * gradient算法的queue_size推荐为sqrt(max_concurrency)，这是不合理的。netflix对queue_size的理解大概是代表各种不可控环节的缓存，比如socket里的，和max_concurrency存在一定的正向关系情有可原。但在我们的理解中，这部分queue_size作用微乎其微，没有或用常量即可。我们关注的queue_size是给concurrency上升留出的探索空间: max_concurrency的更新是有延迟的，在并发从低到高的增长过程中，queue_size的作用就是在max_concurrency更新前不限制qps上升。而当concurrency高时，服务可能已经过载了，queue_size就应该小一点，防止进一步恶化延时。这里的queue_size和并发是反向关系。
+
+## 参数配置
+
+### 错误请求惩罚
+
+自适应限流在计算平均延时时，默认会将失败请求的延时也计入统计，以避免在下游服务异常时过度放大max_concurrency。相关参数如下：
+
+| GFlag | 默认值 | 说明 |
+|-------|--------|------|
+| auto_cl_enable_error_punish | true | 是否开启错误请求惩罚。关闭后失败请求不计入延时统计 |
+| auto_cl_fail_punish_ratio | 1.0 | 惩罚系数。值越大惩罚越激进，失败请求对平均延时的影响越大 |
+| auto_cl_error_rate_punish_threshold | 0 | 错误率惩罚阈值。见下文详细说明 |
+
+#### 错误率惩罚阈值
+
+`auto_cl_error_rate_punish_threshold`用于设置错误率"死区"，低于该阈值的错误率不会产生惩罚，避免少量错误请求对max_concurrency的过度影响。
+
+- **默认值为0**：保持原有行为，所有失败请求都会产生惩罚
+- **设置为正值（如0.1）**：
+  - 错误率 ≤ 阈值时：惩罚为0，平均延时仅由成功请求决定
+  - 错误率 > 阈值时：惩罚线性增长，从0逐步恢复到完整惩罚
+
+线性衰减公式：`punish_factor = (error_rate - threshold) / (1.0 - threshold)`
+
+**使用场景**：当服务存在少量固有错误（如个别请求参数异常）时，这些错误不应影响对服务处理能力的判断。通过设置合理的阈值（如0.05或0.1），可以过滤掉这部分噪声。
+
+**示例**：
+```
+# 错误率低于10%时不惩罚，高于10%时线性增加惩罚
+--auto_cl_error_rate_punish_threshold=0.1
+```
+
+### 其他参数
+
+| GFlag | 默认值 | 说明 |
+|-------|--------|------|
+| auto_cl_sample_window_size_ms | 1000 | 采样窗口时长（毫秒） |
+| auto_cl_min_sample_count | 100 | 采样窗口内的最小样本数，不足则丢弃该窗口 |
+| auto_cl_max_sample_count | 200 | 采样窗口内的最大样本数，超过则提前提交窗口 |
+| auto_cl_initial_max_concurrency | 40 | 初始最大并发数 |
+| auto_cl_alpha_factor_for_ema | 0.1 | EMA平滑系数，值越小单次采样窗口对结果影响越小 |
+| auto_cl_max_explore_ratio | 0.3 | 最大探索比例，值越大对延时波动的容忍度越高 |
+| auto_cl_min_explore_ratio | 0.06 | 最小探索比例，用于判断服务负载情况 |
+| auto_cl_noload_latency_remeasure_interval_ms | 50000 | 重测noload_latency的间隔（毫秒） |

From 0fecd8a1e274f04d4d84d409230312fca60c7b3f Mon Sep 17 00:00:00 2001
From: yanfeng <winnelc@126.com>
Date: Mon, 9 Feb 2026 19:50:53 +0800
Subject: [PATCH 4/5] test(auto_cl): fix comments and add Bazel target

- Fix avg_latency comments to reflect std::ceil() rounding behavior
- Add cc_test target in BUILD.bazel for Bazel CI coverage

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 test/BUILD.bazel                                | 13 +++++++++++++
 test/brpc_auto_concurrency_limiter_unittest.cpp |  8 ++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/test/BUILD.bazel b/test/BUILD.bazel
index 05420ae310..66aef4259e 100644
--- a/test/BUILD.bazel
+++ b/test/BUILD.bazel
@@ -269,6 +269,19 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "brpc_auto_concurrency_limiter_test",
+    srcs = [
+        "brpc_auto_concurrency_limiter_unittest.cpp",
+    ],
+    copts = COPTS,
+    deps = [
+        "//:brpc",
+        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
 refresh_compile_commands(
     name = "brpc_test_compdb",
     # Specify the targets of interest.
diff --git a/test/brpc_auto_concurrency_limiter_unittest.cpp b/test/brpc_auto_concurrency_limiter_unittest.cpp
index b5e2f7a35c..87619b7e8f 100644
--- a/test/brpc_auto_concurrency_limiter_unittest.cpp
+++ b/test/brpc_auto_concurrency_limiter_unittest.cpp
@@ -106,7 +106,7 @@ TEST_F(AutoConcurrencyLimiterTest, ThresholdZeroPreservesOriginalBehavior) {
     AddSamplesAndTriggerWindow(limiter, 90, 100, 10, 1000);
 
     // 10% error rate, threshold=0 means full punishment applied
-    // avg_latency = (10*1000 + 90*100) / 90 = 211us
+    // avg_latency = ceil((10*1000 + 90*100) / 90) = ceil(211.1) = 212us
     ASSERT_GT(limiter._min_latency_us, 180);
     ASSERT_LT(limiter._min_latency_us, 250);
 }
@@ -145,9 +145,9 @@ TEST_F(AutoConcurrencyLimiterTest, AboveThresholdLinearScaling) {
     brpc::policy::FLAGS_auto_cl_sample_window_size_ms = 10;
 
     // Case A: 50% error rate
-    // punish_factor = (0.5 - 0.1) / (1.0 - 0.1) = 0.444
-    // failed_punish = 50 * 1000 * 0.444 = 22222us
-    // avg_latency = (22222 + 50*100) / 50 = 544us
+    // punish_factor = (0.5 - 0.1) / (1.0 - 0.1) = 4/9 ≈ 0.444
+    // failed_punish = 50 * 1000 * (4/9) = 22222.2us
+    // avg_latency = ceil((22222.2 + 50*100) / 50) = ceil(544.4) = 545us
     {
         brpc::policy::AutoConcurrencyLimiter limiter;
         AddSamplesAndTriggerWindow(limiter, 50, 100, 50, 1000);

From a18e2bf857a95252e7806f431d77dc2cd96bb70b Mon Sep 17 00:00:00 2001
From: yanfeng <winnelc@126.com>
Date: Mon, 9 Feb 2026 19:58:16 +0800
Subject: [PATCH 5/5] feat(auto_cl): handle invalid threshold values gracefully

- Skip attenuation logic when threshold <= 0 or >= 1
- Update GFlag description to document valid range (0, 1)
- Add documentation for the new parameter

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/cn/auto_concurrency_limiter.md           | 40 ++++---------------
 src/brpc/policy/auto_concurrency_limiter.cpp  | 25 ++++++------
 ...brpc_auto_concurrency_limiter_unittest.cpp |  7 ++--
 3 files changed, 23 insertions(+), 49 deletions(-)

diff --git a/docs/cn/auto_concurrency_limiter.md b/docs/cn/auto_concurrency_limiter.md
index 8768541130..342e9ba641 100644
--- a/docs/cn/auto_concurrency_limiter.md
+++ b/docs/cn/auto_concurrency_limiter.md
@@ -155,46 +155,20 @@ netflix中的gradient算法公式为：max_concurrency = min_latency / latency *
 min_latency前，若所有latency都小于min_latency，那么max_concurrency会不断下降甚至到0；但按照本算法，max_qps和min_latency仍然是稳定的，它们计算出的max_concurrency也不会剧烈变动。究其本质，gradient算法在迭代max_concurrency时，latency并不能代表实际并发为max_concurrency时的延时，两者是脱节的，所以max_concurrency / latency的实际物理含义不明，与qps可能差异甚大，最后导致了很大的偏差。
 * gradient算法的queue_size推荐为sqrt(max_concurrency)，这是不合理的。netflix对queue_size的理解大概是代表各种不可控环节的缓存，比如socket里的，和max_concurrency存在一定的正向关系情有可原。但在我们的理解中，这部分queue_size作用微乎其微，没有或用常量即可。我们关注的queue_size是给concurrency上升留出的探索空间: max_concurrency的更新是有延迟的，在并发从低到高的增长过程中，queue_size的作用就是在max_concurrency更新前不限制qps上升。而当concurrency高时，服务可能已经过载了，queue_size就应该小一点，防止进一步恶化延时。这里的queue_size和并发是反向关系。
 
-## 参数配置
-
-### 错误请求惩罚
-
-自适应限流在计算平均延时时，默认会将失败请求的延时也计入统计，以避免在下游服务异常时过度放大max_concurrency。相关参数如下：
-
-| GFlag | 默认值 | 说明 |
-|-------|--------|------|
-| auto_cl_enable_error_punish | true | 是否开启错误请求惩罚。关闭后失败请求不计入延时统计 |
-| auto_cl_fail_punish_ratio | 1.0 | 惩罚系数。值越大惩罚越激进，失败请求对平均延时的影响越大 |
-| auto_cl_error_rate_punish_threshold | 0 | 错误率惩罚阈值。见下文详细说明 |
-
-#### 错误率惩罚阈值
+## 错误率惩罚阈值
 
 `auto_cl_error_rate_punish_threshold`用于设置错误率"死区"，低于该阈值的错误率不会产生惩罚，避免少量错误请求对max_concurrency的过度影响。
 
-- **默认值为0**：保持原有行为，所有失败请求都会产生惩罚
-- **设置为正值（如0.1）**：
-  - 错误率 ≤ 阈值时：惩罚为0，平均延时仅由成功请求决定
-  - 错误率 > 阈值时：惩罚线性增长，从0逐步恢复到完整惩罚
-
-线性衰减公式：`punish_factor = (error_rate - threshold) / (1.0 - threshold)`
+| GFlag | 默认值 | 有效范围 | 说明 |
+|-------|--------|----------|------|
+| auto_cl_error_rate_punish_threshold | 0 | [0, 1) | 错误率惩罚阈值，0表示禁用 |
 
-**使用场景**：当服务存在少量固有错误（如个别请求参数异常）时，这些错误不应影响对服务处理能力的判断。通过设置合理的阈值（如0.05或0.1），可以过滤掉这部分噪声。
+- **默认值为0**：禁用该功能，保持原有行为
+- **设置为有效值（如0.1）**：错误率 ≤ 阈值时惩罚为0；错误率 > 阈值时惩罚线性增长
+- **无效值处理**：≥1 的值会被忽略，等同于0
 
 **示例**：
 ```
 # 错误率低于10%时不惩罚，高于10%时线性增加惩罚
 --auto_cl_error_rate_punish_threshold=0.1
 ```
-
-### 其他参数
-
-| GFlag | 默认值 | 说明 |
-|-------|--------|------|
-| auto_cl_sample_window_size_ms | 1000 | 采样窗口时长（毫秒） |
-| auto_cl_min_sample_count | 100 | 采样窗口内的最小样本数，不足则丢弃该窗口 |
-| auto_cl_max_sample_count | 200 | 采样窗口内的最大样本数，超过则提前提交窗口 |
-| auto_cl_initial_max_concurrency | 40 | 初始最大并发数 |
-| auto_cl_alpha_factor_for_ema | 0.1 | EMA平滑系数，值越小单次采样窗口对结果影响越小 |
-| auto_cl_max_explore_ratio | 0.3 | 最大探索比例，值越大对延时波动的容忍度越高 |
-| auto_cl_min_explore_ratio | 0.06 | 最小探索比例，用于判断服务负载情况 |
-| auto_cl_noload_latency_remeasure_interval_ms | 50000 | 重测noload_latency的间隔（毫秒） |
diff --git a/src/brpc/policy/auto_concurrency_limiter.cpp b/src/brpc/policy/auto_concurrency_limiter.cpp
index 220d8a1693..51ea56d765 100644
--- a/src/brpc/policy/auto_concurrency_limiter.cpp
+++ b/src/brpc/policy/auto_concurrency_limiter.cpp
@@ -79,12 +79,11 @@ DEFINE_int32(auto_cl_latency_fluctuation_correction_factor, 1,
              "when the server is overloaded.");
 DEFINE_double(auto_cl_error_rate_punish_threshold, 0,
               "Threshold for error-rate-based punishment attenuation. "
-              "0 (default): no effect, original punishment logic is used. "
-              "> 0 (e.g. 0.1): error rates below this threshold produce zero "
-              "punishment; above it the punishment scales linearly from 0 to "
-              "full strength. Only effective when auto_cl_enable_error_punish "
-              "is true. Example: 0.1 means error rates below 10%% are not "
-              "punished.");
+              "Valid range: [0, 1). 0 (default) disables the feature. "
+              "Values >= 1 are ignored and treated as 0. "
+              "e.g. 0.1: error rates below 10%% produce zero punishment; "
+              "above it the punishment scales linearly from 0 to full strength. "
+              "Only effective when auto_cl_enable_error_punish is true.");
 
 AutoConcurrencyLimiter::AutoConcurrencyLimiter()
     : _max_concurrency(FLAGS_auto_cl_initial_max_concurrency)
@@ -245,12 +244,14 @@ void AutoConcurrencyLimiter::UpdateMaxConcurrency(int64_t sampling_time_us) {
     int32_t total_succ_req = _total_succ_req.load(butil::memory_order_relaxed);
     double failed_punish = _sw.total_failed_us * FLAGS_auto_cl_fail_punish_ratio;
 
-    // Threshold-based attenuation: when auto_cl_error_rate_punish_threshold > 0,
-    // attenuate punishment based on error rate. Inspired by Sentinel's threshold-
-    // based circuit breaker: low error rates should not inflate avg_latency.
-    // Above threshold, punishment scales linearly from 0 to full strength.
-    // When threshold is 0 (default), this block is skipped entirely.
-    if (FLAGS_auto_cl_error_rate_punish_threshold > 0 && _sw.failed_count > 0) {
+    // Threshold-based attenuation: when 0 < threshold < 1, attenuate punishment
+    // based on error rate. Inspired by Sentinel's threshold-based circuit breaker:
+    // low error rates should not inflate avg_latency. Above threshold, punishment
+    // scales linearly from 0 to full strength.
+    // Invalid values (<=0 or >=1) skip this block entirely, preserving original behavior.
+    if (FLAGS_auto_cl_error_rate_punish_threshold > 0 &&
+        FLAGS_auto_cl_error_rate_punish_threshold < 1.0 &&
+        _sw.failed_count > 0) {
         double threshold = FLAGS_auto_cl_error_rate_punish_threshold;
         double error_rate = static_cast<double>(_sw.failed_count) /
             (_sw.succ_count + _sw.failed_count);
diff --git a/test/brpc_auto_concurrency_limiter_unittest.cpp b/test/brpc_auto_concurrency_limiter_unittest.cpp
index 87619b7e8f..77163e2fb8 100644
--- a/test/brpc_auto_concurrency_limiter_unittest.cpp
+++ b/test/brpc_auto_concurrency_limiter_unittest.cpp
@@ -17,7 +17,6 @@
 
 #include "brpc/policy/auto_concurrency_limiter.h"
 #include "butil/time.h"
-#include "bthread/bthread.h"
 #include <gtest/gtest.h>
 
 namespace brpc {
@@ -156,9 +155,9 @@ TEST_F(AutoConcurrencyLimiterTest, AboveThresholdLinearScaling) {
     }
 
     // Case B: 90% error rate (near full punishment)
-    // punish_factor = (0.9 - 0.1) / (1.0 - 0.1) = 0.889
-    // failed_punish = 90 * 1000 * 0.889 = 80000us
-    // avg_latency = (80000 + 10*100) / 10 = 8100us
+    // punish_factor = (0.9 - 0.1) / (1.0 - 0.1) = 8/9 ≈ 0.889
+    // failed_punish = 90 * 1000 * (8/9) = 80000us
+    // avg_latency = ceil((80000 + 10*100) / 10) = ceil(8100) = 8100us
     {
         brpc::policy::AutoConcurrencyLimiter limiter;
         AddSamplesAndTriggerWindow(limiter, 10, 100, 90, 1000);