diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/Endpoints.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/Endpoints.java index d2fdaa26ef3..c4c0c9372f1 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/Endpoints.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/Endpoints.java @@ -55,7 +55,7 @@ import org.apache.accumulo.monitor.next.SystemInformation.TimeOrderedRunningCompactionSet; import org.apache.accumulo.monitor.next.ec.CompactorsSummary; import org.apache.accumulo.monitor.next.ec.CoordinatorSummary; -import org.apache.accumulo.monitor.next.sservers.ScanServerView; +import org.apache.accumulo.monitor.next.views.ServersView; import io.micrometer.core.instrument.Meter.Id; import io.micrometer.core.instrument.cumulative.CumulativeDistributionSummary; @@ -275,7 +275,7 @@ public Map getScanServerAllMetricSummary() { @Path("sservers/view") @Produces(MediaType.APPLICATION_JSON) @Description("Returns a UI-ready view model for the Scan Server status page") - public ScanServerView getScanServerPageView() { + public ServersView getScanServerPageView() { return monitor.getInformationFetcher().getSummaryForEndpoint().getScanServerView(); } diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/SystemInformation.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/SystemInformation.java index 3de740a1b8f..8063c3b828f 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/SystemInformation.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/SystemInformation.java @@ -58,7 +58,7 @@ import org.apache.accumulo.core.process.thrift.MetricResponse; import org.apache.accumulo.core.spi.balancer.TableLoadBalancer; import org.apache.accumulo.core.util.compaction.RunningCompactionInfo; -import org.apache.accumulo.monitor.next.sservers.ScanServerView; +import org.apache.accumulo.monitor.next.views.ServersView; import org.apache.accumulo.server.ServerContext; import org.apache.accumulo.server.conf.TableConfiguration; import org.apache.accumulo.server.metrics.MetricResponseWrapper; @@ -418,7 +418,7 @@ public Stream stream() { private final Set configuredCompactionResourceGroups = ConcurrentHashMap.newKeySet(); private long timestamp = 0; - private ScanServerView scanServerView; + private ServersView scanServerView; private final int rgLongRunningCompactionSize; public SystemInformation(Cache allMetrics, ServerContext ctx) { @@ -607,6 +607,9 @@ public void finish() { for (String rg : getResourceGroups()) { Set rgCompactors = getCompactorResourceGroupServers(rg); List metrics = queueMetrics.get(rg); + if (metrics == null || metrics.isEmpty()) { + continue; + } Optional queued = metrics.stream() .filter(fm -> fm.name().equals(Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_QUEUED.getName())) .findFirst(); @@ -620,6 +623,9 @@ public void finish() { // Check for idle compactors. Map rgMetrics = getCompactorResourceGroupMetricSummary(rg); + if (rgMetrics == null || rgMetrics.isEmpty()) { + continue; + } Optional> idleMetric = rgMetrics.entrySet() .stream().filter(e -> e.getKey().getName().equals(Metric.SERVER_IDLE.getName())) .findFirst(); @@ -642,14 +648,15 @@ public void finish() { } } + timestamp = System.currentTimeMillis(); + + // Compute ScanServer view Set scanServers = new HashSet<>(); sservers.values().forEach(scanServers::addAll); - int problemScanServerCount = (int) problemHosts.stream() - .filter(serverId -> serverId.getType() == ServerId.Type.SCAN_SERVER).count(); - var responses = allMetrics.getAllPresent(scanServers).values(); - timestamp = System.currentTimeMillis(); - scanServerView = ScanServerView.fromMetrics(responses, scanServers.size(), - problemScanServerCount, timestamp); + scanServerView = new ServersView( + scanServers, problemHosts.stream() + .filter(serverId -> serverId.getType() == ServerId.Type.SCAN_SERVER).count(), + allMetrics, timestamp); } public Set getResourceGroups() { @@ -743,7 +750,7 @@ public long getTimestamp() { return this.timestamp; } - public ScanServerView getScanServerView() { + public ServersView getScanServerView() { return this.scanServerView; } diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/sservers/ScanServerView.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/sservers/ScanServerView.java deleted file mode 100644 index 05ee018d82f..00000000000 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/sservers/ScanServerView.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.accumulo.monitor.next.sservers; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.stream.Stream; - -import org.apache.accumulo.core.metrics.Metric; -import org.apache.accumulo.core.metrics.flatbuffers.FMetric; -import org.apache.accumulo.core.metrics.flatbuffers.FTag; -import org.apache.accumulo.core.process.thrift.MetricResponse; -import org.apache.accumulo.monitor.next.SystemInformation; -import org.apache.accumulo.server.metrics.MetricResponseWrapper; - -/** - * Data Transfer Object (DTO) for the Monitor Scan Servers page. It transforms backend metrics into - * a UI-ready JSON response consumed by the frontend; each record component is serialized as a JSON - * field. - */ -public record ScanServerView(long lastUpdate, List servers, Status status) { - - /** - * all the data needed for a row in the table in monitor - */ - public record Row(String host, String resourceGroup, long lastContact, boolean metricsAvailable, - Number openFiles, Number queries, Number scannedEntries, Number queryResults, - Number queryResultBytes, Number busyTimeouts, Number reservationConflicts, - Number zombieThreads, Number serverIdle, Number lowMemoryDetected, - Number scansPausedForMemory, Number scansReturnedEarlyForMemory) { - } - - /** - * all the data needed for the ScanServer status indicator(s) - */ - public record Status(boolean hasScanServers, boolean hasProblemScanServers, - boolean hasMissingMetrics, int scanServerCount, int problemScanServerCount, - long missingMetricServerCount, String level, String message) { - } - - private static final String LEVEL_OK = "OK"; - private static final String LEVEL_WARN = "WARN"; - - public static ScanServerView fromMetrics(Collection responses, - int scanServerCount, int problemScanServerCount, long snapshotTime) { - var rows = rows(responses, snapshotTime); - Status status = buildStatus(scanServerCount, problemScanServerCount, rows); - return new ScanServerView(snapshotTime, rows, status); - } - - private static List rows(Collection responses, long nowMs) { - if (responses == null) { - return List.of(); - } - return responses.stream().map(response -> toRow(response, nowMs)) - .sorted(Comparator.comparing(Row::resourceGroup).thenComparing(Row::host)).toList(); - } - - private static Status buildStatus(int scanServerCount, int problemScanServerCount, - List rows) { - long missingMetricServerCount = rows.stream().filter(row -> !row.metricsAvailable()).count(); - boolean hasScanServers = scanServerCount > 0; - boolean hasProblemScanServers = problemScanServerCount > 0; - boolean hasMissingMetrics = missingMetricServerCount > 0; - - List warnings = new ArrayList<>(2); - if (hasProblemScanServers) { - warnings.add("one or more scan servers are unavailable"); - } - if (hasMissingMetrics) { - warnings.add("ScanServer metrics are not present (are metrics enabled?)"); - } - - if (warnings.isEmpty()) { - // no warnings, set status to OK - return new Status(hasScanServers, false, false, scanServerCount, 0, 0, LEVEL_OK, null); - } - - final String message = "WARN: " + String.join("; ", warnings) + "."; - return new Status(hasScanServers, hasProblemScanServers, hasMissingMetrics, scanServerCount, - problemScanServerCount, missingMetricServerCount, LEVEL_WARN, message); - } - - private static Row toRow(MetricResponse response, long nowMs) { - if (response == null) { - return new Row(null, null, 0, false, null, null, null, null, null, null, null, null, null, - null, null, null); - } - - var values = metricValuesByName(response); - var openFiles = values.get(Metric.SCAN_OPEN_FILES.getName()); - var queries = values.get(Metric.SCAN_QUERIES.getName()); - var scannedEntries = values.get(Metric.SCAN_SCANNED_ENTRIES.getName()); - var queryResults = values.get(Metric.SCAN_QUERY_SCAN_RESULTS.getName()); - var queryResultBytes = values.get(Metric.SCAN_QUERY_SCAN_RESULTS_BYTES.getName()); - var busyTimeouts = values.get(Metric.SCAN_BUSY_TIMEOUT_COUNT.getName()); - var reservationConflicts = values.get(Metric.SCAN_RESERVATION_CONFLICT_COUNTER.getName()); - var zombieThreads = values.get(Metric.SCAN_ZOMBIE_THREADS.getName()); - var serverIdle = values.get(Metric.SERVER_IDLE.getName()); - var lowMemoryDetected = values.get(Metric.LOW_MEMORY.getName()); - var scansPausedForMemory = values.get(Metric.SCAN_PAUSED_FOR_MEM.getName()); - var scansReturnedEarlyForMemory = values.get(Metric.SCAN_RETURN_FOR_MEM.getName()); - - boolean allMetricsPresent = - Stream.of(openFiles, queries, scannedEntries, queryResults, queryResultBytes, busyTimeouts, - reservationConflicts, zombieThreads, serverIdle, lowMemoryDetected, - scansPausedForMemory, scansReturnedEarlyForMemory).allMatch(Objects::nonNull); - - long lastContact = Math.max(0, nowMs - response.getTimestamp()); - - return new Row(response.getServer(), response.getResourceGroup(), lastContact, - allMetricsPresent, openFiles, queries, scannedEntries, queryResults, queryResultBytes, - busyTimeouts, reservationConflicts, zombieThreads, serverIdle, lowMemoryDetected, - scansPausedForMemory, scansReturnedEarlyForMemory); - } - - private static Map metricValuesByName(MetricResponse response) { - var values = new HashMap(); - if (response == null || response.getMetrics() == null || response.getMetrics().isEmpty()) { - return values; - } - - for (var binary : response.getMetrics()) { - var metric = FMetric.getRootAsFMetric(binary); - var metricStatistic = extractStatistic(metric); - if (metricStatistic == null || metricStatistic.equals("value") - || metricStatistic.equals("count")) { - values.putIfAbsent(metric.name(), SystemInformation.getMetricValue(metric)); - } - } - return values; - } - - private static String extractStatistic(FMetric metric) { - for (int i = 0; i < metric.tagsLength(); i++) { - FTag tag = metric.tags(i); - if (MetricResponseWrapper.STATISTIC_TAG.equals(tag.key())) { - return normalizeStatistic(tag.value()); - } - } - return null; - } - - private static String normalizeStatistic(String statistic) { - if (statistic == null) { - return null; - } - return statistic.toLowerCase(); - } - -} diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MetricColumnMappings.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MetricColumnMappings.java new file mode 100644 index 00000000000..61766961ce6 --- /dev/null +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MetricColumnMappings.java @@ -0,0 +1,433 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.monitor.next.views; + +import static org.apache.accumulo.core.metrics.Metric.BLOCKCACHE_DATA_EVICTIONCOUNT; +import static org.apache.accumulo.core.metrics.Metric.BLOCKCACHE_DATA_HITCOUNT; +import static org.apache.accumulo.core.metrics.Metric.BLOCKCACHE_DATA_REQUESTCOUNT; +import static org.apache.accumulo.core.metrics.Metric.BLOCKCACHE_INDEX_EVICTIONCOUNT; +import static org.apache.accumulo.core.metrics.Metric.BLOCKCACHE_INDEX_HITCOUNT; +import static org.apache.accumulo.core.metrics.Metric.BLOCKCACHE_INDEX_REQUESTCOUNT; +import static org.apache.accumulo.core.metrics.Metric.BLOCKCACHE_SUMMARY_EVICTIONCOUNT; +import static org.apache.accumulo.core.metrics.Metric.BLOCKCACHE_SUMMARY_HITCOUNT; +import static org.apache.accumulo.core.metrics.Metric.BLOCKCACHE_SUMMARY_REQUESTCOUNT; +import static org.apache.accumulo.core.metrics.Metric.COMPACTION_META_SVC_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.COMPACTION_ROOT_SVC_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.COMPACTION_USER_SVC_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_ENTRIES_READ; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_ENTRIES_WRITTEN; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUES; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_AVG_AGE; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_DEQUEUED; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_MAX_AGE; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_MIN_AGE; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_POLL_TIMER; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_PRIORITY; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_QUEUED; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_REJECTED; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_SIZE; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_MAJC_CANCELLED; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_MAJC_COMPLETED; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_MAJC_FAILED; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_MAJC_FAILURES_CONSECUTIVE; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_MAJC_FAILURES_TERMINATION; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_MAJC_IN_PROGRESS; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_MAJC_STUCK; +import static org.apache.accumulo.core.metrics.Metric.COMPACTOR_MINC_STUCK; +import static org.apache.accumulo.core.metrics.Metric.FATE_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.FATE_OPS; +import static org.apache.accumulo.core.metrics.Metric.FATE_OPS_ACTIVITY; +import static org.apache.accumulo.core.metrics.Metric.FATE_OPS_THREADS_INACTIVE; +import static org.apache.accumulo.core.metrics.Metric.FATE_OPS_THREADS_TOTAL; +import static org.apache.accumulo.core.metrics.Metric.FATE_TX; +import static org.apache.accumulo.core.metrics.Metric.FATE_TYPE_IN_PROGRESS; +import static org.apache.accumulo.core.metrics.Metric.GC_CANDIDATES; +import static org.apache.accumulo.core.metrics.Metric.GC_DELETED; +import static org.apache.accumulo.core.metrics.Metric.GC_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.GC_FINISHED; +import static org.apache.accumulo.core.metrics.Metric.GC_IN_USE; +import static org.apache.accumulo.core.metrics.Metric.GC_POST_OP_DURATION; +import static org.apache.accumulo.core.metrics.Metric.GC_RUN_CYCLE; +import static org.apache.accumulo.core.metrics.Metric.GC_STARTED; +import static org.apache.accumulo.core.metrics.Metric.GC_WAL_CANDIDATES; +import static org.apache.accumulo.core.metrics.Metric.GC_WAL_DELETED; +import static org.apache.accumulo.core.metrics.Metric.GC_WAL_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.GC_WAL_FINISHED; +import static org.apache.accumulo.core.metrics.Metric.GC_WAL_IN_USE; +import static org.apache.accumulo.core.metrics.Metric.GC_WAL_STARTED; +import static org.apache.accumulo.core.metrics.Metric.LOW_MEMORY; +import static org.apache.accumulo.core.metrics.Metric.MAJC_PAUSED; +import static org.apache.accumulo.core.metrics.Metric.MANAGER_BALANCER_MIGRATIONS_NEEDED; +import static org.apache.accumulo.core.metrics.Metric.MANAGER_GOAL_STATE; +import static org.apache.accumulo.core.metrics.Metric.MANAGER_META_TGW_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.MANAGER_ROOT_TGW_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.MANAGER_USER_TGW_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.MINC_PAUSED; +import static org.apache.accumulo.core.metrics.Metric.MINC_QUEUED; +import static org.apache.accumulo.core.metrics.Metric.MINC_RUNNING; +import static org.apache.accumulo.core.metrics.Metric.RECOVERIES_AVG_PROGRESS; +import static org.apache.accumulo.core.metrics.Metric.RECOVERIES_IN_PROGRESS; +import static org.apache.accumulo.core.metrics.Metric.RECOVERIES_LONGEST_RUNTIME; +import static org.apache.accumulo.core.metrics.Metric.SCAN_BUSY_TIMEOUT_COUNT; +import static org.apache.accumulo.core.metrics.Metric.SCAN_CLOSE; +import static org.apache.accumulo.core.metrics.Metric.SCAN_CONTINUE; +import static org.apache.accumulo.core.metrics.Metric.SCAN_OPEN_FILES; +import static org.apache.accumulo.core.metrics.Metric.SCAN_PAUSED_FOR_MEM; +import static org.apache.accumulo.core.metrics.Metric.SCAN_QUERIES; +import static org.apache.accumulo.core.metrics.Metric.SCAN_QUERY_SCAN_RESULTS; +import static org.apache.accumulo.core.metrics.Metric.SCAN_QUERY_SCAN_RESULTS_BYTES; +import static org.apache.accumulo.core.metrics.Metric.SCAN_RESERVATION_CONFLICT_COUNTER; +import static org.apache.accumulo.core.metrics.Metric.SCAN_RESERVATION_TOTAL_TIMER; +import static org.apache.accumulo.core.metrics.Metric.SCAN_RESERVATION_WRITEOUT_TIMER; +import static org.apache.accumulo.core.metrics.Metric.SCAN_RESULTS; +import static org.apache.accumulo.core.metrics.Metric.SCAN_RETURN_FOR_MEM; +import static org.apache.accumulo.core.metrics.Metric.SCAN_SCANNED_ENTRIES; +import static org.apache.accumulo.core.metrics.Metric.SCAN_START; +import static org.apache.accumulo.core.metrics.Metric.SCAN_TABLET_METADATA_CACHE; +import static org.apache.accumulo.core.metrics.Metric.SCAN_TIMES; +import static org.apache.accumulo.core.metrics.Metric.SCAN_YIELDS; +import static org.apache.accumulo.core.metrics.Metric.SCAN_ZOMBIE_THREADS; +import static org.apache.accumulo.core.metrics.Metric.SERVER_IDLE; +import static org.apache.accumulo.core.metrics.Metric.THRIFT_EXECUTE; +import static org.apache.accumulo.core.metrics.Metric.THRIFT_IDLE; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_ENTRIES; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_HOLD; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_INGEST_BYTES; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_INGEST_ENTRIES; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_MEM_ENTRIES; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_MINC_QUEUED; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_MINC_RUNNING; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_MINC_TOTAL; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_TABLETS_FILES; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_TABLETS_LONG_ASSIGNMENTS; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_TABLETS_ONLINE; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_TABLETS_ONLINE_ONDEMAND; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_TABLETS_OPENING; +import static org.apache.accumulo.core.metrics.Metric.TSERVER_TABLETS_UNOPENED; +import static org.apache.accumulo.core.metrics.Metric.UPDATE_CHECK; +import static org.apache.accumulo.core.metrics.Metric.UPDATE_COMMIT; +import static org.apache.accumulo.core.metrics.Metric.UPDATE_COMMIT_PREP; +import static org.apache.accumulo.core.metrics.Metric.UPDATE_ERRORS; +import static org.apache.accumulo.core.metrics.Metric.UPDATE_LOCK; +import static org.apache.accumulo.core.metrics.Metric.UPDATE_MUTATION_ARRAY_SIZE; +import static org.apache.accumulo.core.metrics.Metric.UPDATE_WALOG_WRITE; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.accumulo.core.metrics.Metric; + +public class MetricColumnMappings { + + public static record ColumnInformation(String name, String description, String uiClass) + implements Comparable { + @Override + public int compareTo(ColumnInformation other) { + int result = this.name.compareTo(other.name); + if (result == 0) { + result = this.description.compareTo(other.description); + if (result == 0) { + result = this.uiClass.compareTo(other.uiClass); + } + } + return result; + } + } + + private static final Map mappings = new HashMap<>(); + + public static final String NUMBER_UI_CLASS = "big-num"; + public static final String BYTES_UI_CLASS = "big-size"; + public static final String DURATION_UI_CLASS = "duration"; + public static final String PERCENT_UI_CLASS = "percent"; + public static final String TIMESTAMP_UI_CLASS = "timestamp"; + + static { + mappings.put(SERVER_IDLE, new ColumnInformation("Server Idle", + "Indicates if the server is idle or not. The value will be 1 when idle and 0 when not idle.", + NUMBER_UI_CLASS)); + mappings.put(LOW_MEMORY, new ColumnInformation("Low Memory", + "Reports 1 when process memory usage is above the threshold, reports 0 when memory is okay.", + NUMBER_UI_CLASS)); + mappings.put(THRIFT_IDLE, new ColumnInformation("Thrift Idle Time", + "Time waiting to execute an RPC request.", DURATION_UI_CLASS)); + mappings.put(THRIFT_EXECUTE, new ColumnInformation("Thrift Execution Time", + "Time to execute an RPC request.", DURATION_UI_CLASS)); + mappings.put(COMPACTION_ROOT_SVC_ERRORS, new ColumnInformation("Root Compaction Service Errors", + "A value of 1 indicates a misconfiguration in the compaction service, while a value of 0 indicates that the configuration is valid.", + NUMBER_UI_CLASS)); + mappings.put(COMPACTION_META_SVC_ERRORS, new ColumnInformation("Meta Compaction Service Errors", + "A value of 1 indicates a misconfiguration in the compaction service, while a value of 0 indicates that the configuration is valid.", + NUMBER_UI_CLASS)); + mappings.put(COMPACTION_USER_SVC_ERRORS, new ColumnInformation("User Compaction Service Errors", + "A value of 1 indicates a misconfiguration in the compaction service, while a value of 0 indicates that the configuration is valid.", + NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_MAJC_CANCELLED, new ColumnInformation("Majc Cancelled", + "Number compactions that have been cancelled on this compactor", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_MAJC_COMPLETED, new ColumnInformation("Majc Completed", + "Number compactions that have succeeded on this compactor", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_MAJC_FAILED, new ColumnInformation("Majc Failed", + "Number compactions that have failed on this compactor", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_MAJC_FAILURES_CONSECUTIVE, + new ColumnInformation("Majc Consecutive Failures", + "Number of consecutive compaction failures. Resets to zero on a successful compaction", + NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_MAJC_FAILURES_TERMINATION, new ColumnInformation( + "Majc Consecutive Failure Termination", + "Will report 1 if the Compactor terminates due to consecutive failures, else 0. Emitting this metric is a best effort before the process terminates", + NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_MAJC_IN_PROGRESS, new ColumnInformation("Majc In Progress", + "Indicator of whether a compaction is in-progress (value: 1) or not (value: 0). An in-progress compaction could also be stuck.", + NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_MAJC_STUCK, new ColumnInformation("Majc Stuck", + "Number and duration of stuck major compactions.", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_MINC_STUCK, new ColumnInformation("Minc Stuck", + "Number and duration of stuck minor compactions.", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_ENTRIES_READ, new ColumnInformation("Compaction Entries Read", + "Number of entries read by all compactions that have run on this compactor (majc) or tserver (minc).", + NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_ENTRIES_WRITTEN, new ColumnInformation("Compaction Entries Written", + "Number of entries written by all compactions that have run on this compactor (majc) or tserver (minc).", + NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUES, new ColumnInformation("Compaction Queue Count", + "Number of priority queues for compaction jobs.", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_DEQUEUED, new ColumnInformation( + "Compaction Jobs Dequeued", "Count of dequeued jobs.", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_QUEUED, + new ColumnInformation("Compaction Jobs Queued", "Count of queued jobs.", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_SIZE, new ColumnInformation( + "Compaction Queue Size", "Size of queued jobs in bytes.", BYTES_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_REJECTED, new ColumnInformation( + "Compaction Jobs Rejected", "Count of rejected jobs.", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_PRIORITY, new ColumnInformation( + "Compaction Job Lowest Priority", "Lowest priority queued job.", NUMBER_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_MIN_AGE, + new ColumnInformation("Compaction Job Min Age", + "Minimum age of currently queued jobs in seconds.", TIMESTAMP_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_MAX_AGE, + new ColumnInformation("Compaction Job Max Age", + "Maximum age of currently queued jobs in seconds.", TIMESTAMP_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_AVG_AGE, + new ColumnInformation("Compaction Job Avg Age", + "Average age of currently queued jobs in seconds.", DURATION_UI_CLASS)); + mappings.put(COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_POLL_TIMER, + new ColumnInformation("Compaction Job Time in Queue", + "Tracks time a job spent in the queue before exiting the queue.", DURATION_UI_CLASS)); + mappings.put(FATE_TYPE_IN_PROGRESS, new ColumnInformation("Fate Ops In Progress By Type", + "Number of FATE operations in progress. The op type is designated by the `op.type` tag.", + NUMBER_UI_CLASS)); + mappings.put(FATE_OPS, new ColumnInformation("Total Current Fate Ops", + "Number of all the current FATE ops in any state.", NUMBER_UI_CLASS)); + mappings.put(FATE_OPS_ACTIVITY, + new ColumnInformation("Total Fate Ops", + "Count of the total number of times fate operations are added, updated, and removed.", + NUMBER_UI_CLASS)); + mappings.put(FATE_ERRORS, new ColumnInformation("Fate Errors", + "Count of errors that occurred when attempting to gather fate metrics.", NUMBER_UI_CLASS)); + mappings.put(FATE_TX, new ColumnInformation("Fate Ops By State", + "Count of FATE operations in a certain state. The state is now in a tag (e.g., state=new, state=in.progress, state=failed, etc.).", + NUMBER_UI_CLASS)); + mappings.put(FATE_OPS_THREADS_INACTIVE, new ColumnInformation("Fate Threads Inactive", + "Keeps track of the number of idle threads (not working on a fate operation) in the thread pool. The pool name can be found in the pool.name tag. The fate instance type can be found in the instanceType tag.", + NUMBER_UI_CLASS)); + mappings.put(FATE_OPS_THREADS_TOTAL, new ColumnInformation("Fate Threads Total", + "Keeps track of the total number of threads in the thread pool. The pool name can be found in the pool.name tag. The fate instance type can be found in the instanceType tag.", + NUMBER_UI_CLASS)); + mappings.put(GC_STARTED, new ColumnInformation("GC File Cycle Start", + "Timestamp GC file collection cycle started.", NUMBER_UI_CLASS)); + mappings.put(GC_FINISHED, new ColumnInformation("GC File Cycle End", + "Timestamp GC file collect cycle finished.", NUMBER_UI_CLASS)); + mappings.put(GC_CANDIDATES, new ColumnInformation("GC File Candidates For Delete", + "Number of files that are candidates for deletion.", NUMBER_UI_CLASS)); + mappings.put(GC_IN_USE, new ColumnInformation("GC File Candidates In Use", + "Number of candidate files still in use.", NUMBER_UI_CLASS)); + mappings.put(GC_DELETED, new ColumnInformation("GC File Candidates Deleted", + "Number of candidate files deleted.", NUMBER_UI_CLASS)); + mappings.put(GC_ERRORS, new ColumnInformation("GC File Candidate Deletion Errors", + "Number of candidate deletion errors.", NUMBER_UI_CLASS)); + mappings.put(GC_WAL_STARTED, new ColumnInformation("GC WAL Cycle Start", + "Timestamp GC WAL collection cycle started.", NUMBER_UI_CLASS)); + mappings.put(GC_WAL_FINISHED, new ColumnInformation("GC WAL Cycle End", + "Timestamp GC WAL collect cycle finished.", NUMBER_UI_CLASS)); + mappings.put(GC_WAL_CANDIDATES, new ColumnInformation("GC WAL Candidates For Delete", + "Number of files that are candidates for deletion.", NUMBER_UI_CLASS)); + mappings.put(GC_WAL_IN_USE, new ColumnInformation("GC WAL Candidates In Use", + "Number of wal file candidates that are still in use.", NUMBER_UI_CLASS)); + mappings.put(GC_WAL_DELETED, new ColumnInformation("GC WAL Candidates Deleted", + "Number of candidate wal files deleted.", NUMBER_UI_CLASS)); + mappings.put(GC_WAL_ERRORS, new ColumnInformation("GC WAL Candidate Deletion Errors", + "Number candidate wal file deletion errors.", NUMBER_UI_CLASS)); + mappings.put(GC_POST_OP_DURATION, new ColumnInformation("GC Metadata PostOp Duration", + "GC metadata table post operation duration in milliseconds.", DURATION_UI_CLASS)); + mappings.put(GC_RUN_CYCLE, new ColumnInformation("GC Cycles", + "Count of gc cycle runs. Value is reset on process start.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_ENTRIES, new ColumnInformation("Entries Assigned", + "Number of entries assigned to a TabletServer.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_MEM_ENTRIES, new ColumnInformation("Entries In Memory", + "Number of entries in memory.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_MINC_QUEUED, new ColumnInformation("Queued Minc", + "Number of queued minor compactions.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_MINC_RUNNING, new ColumnInformation("Running Minc", + "Number of active minor compactions.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_MINC_TOTAL, new ColumnInformation("Minc Completed", + "Total number of minor compactions performed.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_TABLETS_ONLINE, + new ColumnInformation("Tablets Online", "Number of online tablets.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_TABLETS_LONG_ASSIGNMENTS, new ColumnInformation( + "Tablet Assignments Overdue", + "Number of tablet assignments that are taking longer than the configured warning duration.", + NUMBER_UI_CLASS)); + mappings.put(TSERVER_TABLETS_OPENING, + new ColumnInformation("Tablets Opening", "Number of opening tablets.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_TABLETS_UNOPENED, + new ColumnInformation("Tablets Unopened", "Number of unopened tablets.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_TABLETS_FILES, new ColumnInformation("Avg Files Per Tablet", + "Number of files per tablet.", NUMBER_UI_CLASS)); + mappings.put(TSERVER_INGEST_ENTRIES, + new ColumnInformation("Ingested Entries", + "Ingest entry (a key/value) count. The rate can be derived from this metric.", + NUMBER_UI_CLASS)); + mappings.put(TSERVER_INGEST_BYTES, new ColumnInformation("Ingested Bytes", + "Ingest byte count. The rate can be derived from this metric.", BYTES_UI_CLASS)); + mappings.put(TSERVER_HOLD, new ColumnInformation("Ingest Commit Hold Time", + "Duration for which commits have been held in milliseconds.", DURATION_UI_CLASS)); + mappings.put(TSERVER_TABLETS_ONLINE_ONDEMAND, new ColumnInformation("Online On-Demand Tablets", + "Number of online on-demand tablets", NUMBER_UI_CLASS)); + mappings.put(TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM, + new ColumnInformation("On-Demand Tablets Unloaded For Memory", + "Number of online on-demand tablets unloaded due to low memory", NUMBER_UI_CLASS)); + mappings.put(SCAN_RESERVATION_TOTAL_TIMER, new ColumnInformation("Scan Reservation Total Time", + "Time to reserve a tablet's files for scan.", DURATION_UI_CLASS)); + mappings.put(SCAN_RESERVATION_WRITEOUT_TIMER, + new ColumnInformation("Scan Reservation Write Time", + "Time to write out a tablets file reservations for scan.", DURATION_UI_CLASS)); + mappings.put(SCAN_RESERVATION_CONFLICT_COUNTER, + new ColumnInformation("Scan Reservation Conflicts", + "Count of instances where file reservation attempts for scans encountered conflicts.", + NUMBER_UI_CLASS)); + mappings.put(SCAN_TABLET_METADATA_CACHE, new ColumnInformation("Scan Server Metadata Cache", + "Scan server tablet cache metrics.", NUMBER_UI_CLASS)); + mappings.put(SCAN_BUSY_TIMEOUT_COUNT, new ColumnInformation("Scan Busy Count", + "Count of the scans where a busy timeout happened.", NUMBER_UI_CLASS)); + mappings.put(SCAN_TIMES, new ColumnInformation("Scan Session Total Time", + "Scan session lifetime (creation to close).", DURATION_UI_CLASS)); + mappings.put(SCAN_OPEN_FILES, new ColumnInformation("Scan Files Open", + "Number of files open for scans.", NUMBER_UI_CLASS)); + mappings.put(SCAN_RESULTS, + new ColumnInformation("Scan Result Count", "Results per scan.", NUMBER_UI_CLASS)); + mappings.put(SCAN_YIELDS, new ColumnInformation("Scan Yield Count", + "Counts scans that have yielded.", NUMBER_UI_CLASS)); + mappings.put(SCAN_START, new ColumnInformation("Scan Start Count", + "Number of calls to start a scan or multiscan.", NUMBER_UI_CLASS)); + mappings.put(SCAN_CONTINUE, new ColumnInformation("Scan Continue Count", + "Number of calls to continue a scan or multiscan.", NUMBER_UI_CLASS)); + mappings.put(SCAN_CLOSE, new ColumnInformation("Scan Close Count", + "Number of calls to close a scan or multiscan.", NUMBER_UI_CLASS)); + mappings.put(SCAN_QUERIES, new ColumnInformation("Tablet Lookup Count", + "Number of queries made during scans.", NUMBER_UI_CLASS)); + mappings.put(SCAN_SCANNED_ENTRIES, new ColumnInformation("Scanned Entry Count", + "Count of scanned entries. The rate can be derived from this metric.", NUMBER_UI_CLASS)); + mappings.put(SCAN_QUERY_SCAN_RESULTS, new ColumnInformation("Returned Entry Count", + "Query count. The rate can be derived from this metric.", NUMBER_UI_CLASS)); + mappings.put(SCAN_QUERY_SCAN_RESULTS_BYTES, new ColumnInformation("Returned Bytes Count", + "Query byte count. The rate can be derived from this metric.", BYTES_UI_CLASS)); + mappings.put(SCAN_PAUSED_FOR_MEM, new ColumnInformation("Scans Paused For Low Memory", + "Count of scans paused due to server being low on memory.", NUMBER_UI_CLASS)); + mappings.put(SCAN_RETURN_FOR_MEM, + new ColumnInformation("Scans Returned Early for Low Memory", + "Count of scans that returned results early due to server being low on memory.", + NUMBER_UI_CLASS)); + mappings.put(SCAN_ZOMBIE_THREADS, new ColumnInformation("Scan Zombie Thread Count", + "Number of scan threads that have no associated client session.", NUMBER_UI_CLASS)); + mappings.put(MAJC_PAUSED, new ColumnInformation("Majc Paused", + "Number of paused major compactions.", NUMBER_UI_CLASS)); + mappings.put(MINC_QUEUED, new ColumnInformation("Minc Queued", + "Queued minor compactions time queued.", NUMBER_UI_CLASS)); + mappings.put(MINC_RUNNING, + new ColumnInformation("Minc Running", "Minor compactions time active.", NUMBER_UI_CLASS)); + mappings.put(MINC_PAUSED, new ColumnInformation("Minc Paused", + "Number of paused minor compactions.", NUMBER_UI_CLASS)); + mappings.put(UPDATE_ERRORS, new ColumnInformation("Ingest Errors", + "Count of errors during tablet updates. Type/reason for error is stored in the `type` tag (e.g., type=permission, type=unknown.tablet, type=constraint.violation).", + NUMBER_UI_CLASS)); + mappings.put(UPDATE_LOCK, new ColumnInformation("Condititional Mutation Row Lock Wait Time", + "Average time taken for conditional mutation to get a row lock.", DURATION_UI_CLASS)); + mappings.put(UPDATE_CHECK, new ColumnInformation("Conditional Mutation Condition Check Time", + "Average time taken for conditional mutation to check conditions.", DURATION_UI_CLASS)); + mappings.put(UPDATE_COMMIT, new ColumnInformation("Mutation Commit Avg Total Time", + "Average time taken to commit a mutation.", DURATION_UI_CLASS)); + mappings.put(UPDATE_COMMIT_PREP, new ColumnInformation("Mutation Commit Avg Prep Time", + "Average time taken to prepare to commit a single mutation.", DURATION_UI_CLASS)); + mappings.put(UPDATE_WALOG_WRITE, new ColumnInformation("Mutations Write To WAL Time", + "Time taken to write a batch of mutations to WAL.", DURATION_UI_CLASS)); + mappings.put(UPDATE_MUTATION_ARRAY_SIZE, new ColumnInformation("Mutations Array Size", + "Batch size of mutations from client.", NUMBER_UI_CLASS)); + mappings.put(BLOCKCACHE_INDEX_HITCOUNT, new ColumnInformation("Index Block Cache Hit Count", + "Index block cache hit count.", NUMBER_UI_CLASS)); + mappings.put(BLOCKCACHE_INDEX_REQUESTCOUNT, new ColumnInformation( + "Index Block Cache Request Count", "Index block cache request count.", NUMBER_UI_CLASS)); + mappings.put(BLOCKCACHE_INDEX_EVICTIONCOUNT, new ColumnInformation( + "Index Block Cache Eviction Count", "Index block cache eviction count.", NUMBER_UI_CLASS)); + mappings.put(BLOCKCACHE_DATA_HITCOUNT, new ColumnInformation("Data Block Cache Hit Count", + "Data block cache hit count.", NUMBER_UI_CLASS)); + mappings.put(BLOCKCACHE_DATA_REQUESTCOUNT, new ColumnInformation( + "Data Block Cache Request Count", "Data block cache request count.", NUMBER_UI_CLASS)); + mappings.put(BLOCKCACHE_DATA_EVICTIONCOUNT, new ColumnInformation( + "Data Block Cache Eviction Count", "Data block cache eviction count.", NUMBER_UI_CLASS)); + mappings.put(BLOCKCACHE_SUMMARY_HITCOUNT, new ColumnInformation("Summary Block Cache Hit Count", + "Summary block cache hit count.", NUMBER_UI_CLASS)); + mappings.put(BLOCKCACHE_SUMMARY_REQUESTCOUNT, + new ColumnInformation("Summary Block Cache Request Count", + "Summary block cache request count.", NUMBER_UI_CLASS)); + mappings.put(BLOCKCACHE_SUMMARY_EVICTIONCOUNT, + new ColumnInformation("Summary Block Cache Eviction Count", + "Summary block cache eviction count.", NUMBER_UI_CLASS)); + mappings.put(MANAGER_BALANCER_MIGRATIONS_NEEDED, + new ColumnInformation("Balancer Migrations Needed", + "The number of migrations that need to complete before the system is balanced.", + NUMBER_UI_CLASS)); + mappings.put(MANAGER_ROOT_TGW_ERRORS, + new ColumnInformation("Root Tablet Watcher Errors", + "Error count encountered by the TabletGroupWatcher for the ROOT data level.", + NUMBER_UI_CLASS)); + mappings.put(MANAGER_META_TGW_ERRORS, + new ColumnInformation("Meta Tablet Watcher Errors", + "Error count encountered by the TabletGroupWatcher for the META data level.", + NUMBER_UI_CLASS)); + mappings.put(MANAGER_USER_TGW_ERRORS, + new ColumnInformation("User Tablet Watcher Errors", + "Error count encountered by the TabletGroupWatcher for the USER data level.", + NUMBER_UI_CLASS)); + mappings.put(MANAGER_GOAL_STATE, new ColumnInformation("Manager Goal State", + "Manager goal state: -1=unknown, 0=CLEAN_STOP, 1=SAFE_MODE, 2=NORMAL.", NUMBER_UI_CLASS)); + mappings.put(RECOVERIES_IN_PROGRESS, new ColumnInformation("Tablet Recoveries In Progress", + "The number of recoveries in progress.", NUMBER_UI_CLASS)); + mappings.put(RECOVERIES_LONGEST_RUNTIME, new ColumnInformation("Tablet Recovery Longest Time", + "The time (in milliseconds) of the longest running recovery.", NUMBER_UI_CLASS)); + mappings.put(RECOVERIES_AVG_PROGRESS, + new ColumnInformation("Tablet Recovery Avg Percent Complete", + "The average percentage (0.0 - 99.9) of the in progress recoveries.", NUMBER_UI_CLASS)); + } + + public static ColumnInformation getColumnInformation(Metric m) { + return mappings.get(m); + } + +} diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ServersView.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ServersView.java new file mode 100644 index 00000000000..9b5553966b8 --- /dev/null +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ServersView.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.monitor.next.views; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.accumulo.core.client.admin.servers.ServerId; +import org.apache.accumulo.core.metrics.Metric; +import org.apache.accumulo.core.metrics.flatbuffers.FMetric; +import org.apache.accumulo.core.metrics.flatbuffers.FTag; +import org.apache.accumulo.core.process.thrift.MetricResponse; +import org.apache.accumulo.monitor.next.SystemInformation; +import org.apache.accumulo.monitor.next.views.MetricColumnMappings.ColumnInformation; +import org.apache.accumulo.server.metrics.MetricResponseWrapper; + +import com.github.benmanes.caffeine.cache.Cache; + +/** + * Generic Data Transfer Object (DTO) for a set of Accumulo server processes of the same type. The + * response object contains several fields: + * + * + *
+ * columns - contains an array of column definitions that can be used to create the table headers
+ *           and Data Table columns
+ * data    - an array of objects that can be used for the Data Table data definition
+ * status  - overall status information, counts, warnings, etc.
+ * 
+ */ +public class ServersView { + + /** + * all the data needed for the ScanServer status indicator(s) + */ + public record Status(boolean hasScanServers, boolean hasProblemScanServers, + boolean hasMissingMetrics, long scanServerCount, long problemScanServerCount, + long missingMetricServerCount, String level, String message) { + } + + private static final String LEVEL_OK = "OK"; + private static final String LEVEL_WARN = "WARN"; + private static final String TYPE_COL_NAME = "Server Type"; + private static final ColumnInformation TYPE_COL_MAPPING = + new ColumnInformation(TYPE_COL_NAME, "Type of server", ""); + private static final String RG_COL_NAME = "Resource Group"; + private static final ColumnInformation RG_COL_MAPPING = + new ColumnInformation(RG_COL_NAME, "Resource Group", ""); + private static final String ADDR_COL_NAME = "Server Address"; + private static final ColumnInformation ADDR_COL_MAPPING = + new ColumnInformation(ADDR_COL_NAME, "Server address", ""); + private static final String TIME_COL_NAME = "Last Contact"; + private static final ColumnInformation TIME_COL_MAPPING = new ColumnInformation(TIME_COL_NAME, + "Server last contact time", MetricColumnMappings.TIMESTAMP_UI_CLASS); + + public final List> data = new ArrayList<>(); + public final Set columns = new TreeSet<>(); + public final Status status; + public final long timestamp; + + public ServersView(final Set servers, final long problemServerCount, + final Cache allMetrics, final long timestamp) { + + AtomicInteger serversMissingMetrics = new AtomicInteger(0); + servers.forEach(sid -> { + Map convertedMetrics = new TreeMap<>(); + + convertedMetrics.put(TYPE_COL_NAME, sid.getType().name()); + convertedMetrics.put(RG_COL_NAME, sid.getResourceGroup().canonical()); + convertedMetrics.put(ADDR_COL_NAME, sid.toHostPortString()); + + columns.add(TYPE_COL_MAPPING); + columns.add(RG_COL_MAPPING); + columns.add(ADDR_COL_MAPPING); + + MetricResponse mr = allMetrics.getIfPresent(sid); + if (mr != null) { + convertedMetrics.put(TIME_COL_NAME, mr.getTimestamp()); + columns.add(TIME_COL_MAPPING); + + Map serverMetrics = metricValuesByName(mr); + for (Entry e : serverMetrics.entrySet()) { + Metric m = Metric.fromName(e.getKey()); + ColumnInformation colInfo = MetricColumnMappings.getColumnInformation(m); + convertedMetrics.put(colInfo.name(), e.getValue()); + columns.add(colInfo); + } + data.add(convertedMetrics); + } else { + serversMissingMetrics.incrementAndGet(); + } + }); + status = buildStatus(servers.size(), problemServerCount, serversMissingMetrics.get()); + this.timestamp = timestamp; + } + + private static Status buildStatus(int serverCount, long problemServerCount, + int serversMissingMetrics) { + final boolean hasServers = serverCount > 0; + final boolean hasProblemServers = problemServerCount > 0; + final boolean hasMissingMetrics = serversMissingMetrics > 0; + + List warnings = new ArrayList<>(2); + if (hasProblemServers) { + warnings.add("One or more servers are unavailable"); + } + if (hasMissingMetrics) { + warnings.add("Metrics are not present (are metrics enabled?)"); + } + + if (warnings.isEmpty()) { + // no warnings, set status to OK + return new Status(hasServers, false, false, serverCount, 0, 0, LEVEL_OK, null); + } + + final String message = "WARN: " + String.join("; ", warnings) + "."; + return new Status(hasServers, hasProblemServers, hasMissingMetrics, serverCount, + problemServerCount, serversMissingMetrics, LEVEL_WARN, message); + } + + public static Map metricValuesByName(MetricResponse response) { + var values = new HashMap(); + if (response == null || response.getMetrics() == null || response.getMetrics().isEmpty()) { + return values; + } + + for (var binary : response.getMetrics()) { + var metric = FMetric.getRootAsFMetric(binary); + var metricStatistic = extractStatistic(metric); + if (metricStatistic == null || metricStatistic.equals("value") + || metricStatistic.equals("count")) { + values.putIfAbsent(metric.name(), SystemInformation.getMetricValue(metric)); + } + } + return values; + } + + private static String extractStatistic(FMetric metric) { + for (int i = 0; i < metric.tagsLength(); i++) { + FTag tag = metric.tags(i); + if (MetricResponseWrapper.STATISTIC_TAG.equals(tag.key())) { + return normalizeStatistic(tag.value()); + } + } + return null; + } + + private static String normalizeStatistic(String statistic) { + if (statistic == null) { + return null; + } + return statistic.toLowerCase(); + } + +} diff --git a/server/monitor/src/main/resources/org/apache/accumulo/monitor/resources/js/sservers.js b/server/monitor/src/main/resources/org/apache/accumulo/monitor/resources/js/sservers.js index 011ce89c5b2..b21477a5bbc 100644 --- a/server/monitor/src/main/resources/org/apache/accumulo/monitor/resources/js/sservers.js +++ b/server/monitor/src/main/resources/org/apache/accumulo/monitor/resources/js/sservers.js @@ -33,12 +33,20 @@ function getStoredView() { return JSON.parse(sessionStorage[SSERVERS_VIEW_SESSION_KEY]); } +function getStoredColumns() { + var view = getStoredView(); + if (!Array.isArray(view.columns)) { + return []; + } + return view.columns; +} + function getStoredRows() { var view = getStoredView(); - if (!Array.isArray(view.servers)) { + if (!Array.isArray(view.data)) { return []; } - return view.servers; + return view.data; } function getStoredStatus() { @@ -46,7 +54,44 @@ function getStoredStatus() { return view.status || null; } +function getDataTableCols() { + var dataTableColumns = []; + var storedColumns = getStoredColumns(); + $.each(storedColumns, function (index, col) { + dataTableColumns.push({ + data: col.name + }); + }); + return dataTableColumns; +} + function refreshScanServersTable() { + + // Destroy the DataTable and clear the HTML table + if (sserversTable != null) { + sserversTable.destroy(); + $('#sservers').empty(); + } + + // Create the HTML table columns + var sserversHtmlTable = $('#sservers'); + var thead = $(document.createElement("thead")); + var theadRow = $(document.createElement("tr")); + + var storedColumns = getStoredColumns(); + $.each(storedColumns, function (index, col) { + //console.log('Adding column: ' + JSON.stringify(col)); + var th = $(document.createElement("th")); + th.addClass(col.uiClass); + th.text(col.name); + th.attr("title", col.description); + theadRow.append(th); + }); + thead.append(theadRow); + sserversHtmlTable.append(thead); + + // Create the DataTable + createDataTable(); ajaxReloadTable(sserversTable); } @@ -76,7 +121,8 @@ function refreshScanServers() { refreshSserversBanner(getStoredStatus()); }).fail(function () { sessionStorage[SSERVERS_VIEW_SESSION_KEY] = JSON.stringify({ - servers: [], + data: [], + columns: [], status: null }); refreshScanServersTable(); @@ -88,12 +134,7 @@ function refresh() { refreshScanServers(); } -$(function () { - sessionStorage[SSERVERS_VIEW_SESSION_KEY] = JSON.stringify({ - servers: [], - status: null - }); - +function createDataTable() { sserversTable = $('#sservers').DataTable({ "autoWidth": false, "ajax": function (data, callback) { @@ -136,54 +177,16 @@ $(function () { } } ], - "columns": [{ - "data": "host" - }, - { - "data": "resourceGroup" - }, - { - "data": "lastContact" - }, - { - "data": "openFiles" - }, - { - "data": "queries" - }, - { - "data": "scannedEntries" - }, - { - "data": "queryResults" - }, - { - "data": "queryResultBytes" - }, - { - "data": "busyTimeouts" - }, - { - "data": "reservationConflicts" - }, - { - "data": "zombieThreads" - }, - { - "data": "serverIdle", - "render": renderActivityState - }, - { - "data": "lowMemoryDetected", - "render": renderMemoryState - }, - { - "data": "scansPausedForMemory" - }, - { - "data": "scansReturnedEarlyForMemory" - } - ] + "columns": getDataTableCols() + }); + +} + +$(function () { + sessionStorage[SSERVERS_VIEW_SESSION_KEY] = JSON.stringify({ + data: [], + columns: [], + status: null }); refreshScanServers(); diff --git a/server/monitor/src/main/resources/org/apache/accumulo/monitor/templates/sservers.ftl b/server/monitor/src/main/resources/org/apache/accumulo/monitor/templates/sservers.ftl index 16210181a3b..6136736c1ee 100644 --- a/server/monitor/src/main/resources/org/apache/accumulo/monitor/templates/sservers.ftl +++ b/server/monitor/src/main/resources/org/apache/accumulo/monitor/templates/sservers.ftl @@ -32,6 +32,7 @@ Scan Servers
The following scan servers reported status and scan metrics when available.
+