diff --git a/helm/bundles/cortex-nova/alerts/nova.alerts.yaml b/helm/bundles/cortex-nova/alerts/nova.alerts.yaml
index 2449fa390..e3271f119 100644
--- a/helm/bundles/cortex-nova/alerts/nova.alerts.yaml
+++ b/helm/bundles/cortex-nova/alerts/nova.alerts.yaml
@@ -592,3 +592,20 @@ groups:
         corruption, bugs in reservation creation, or external modifications.
         Reservations are automatically repaired, but the root cause should be
         investigated if this alert persists.
+
+  - alert: CortexNovaDoesntFindValidKVMHosts
+    expr: sum by (az, hvtype) (cortex_vm_faults{hvtype=~"CH|QEMU",faultmsg=~".*No valid host was found.*"}) > 0
+    for: 5m
+    labels:
+      context: scheduling
+      dashboard: cortex/cortex
+      service: cortex
+      severity: warning
+      support_group: workload-management
+    annotations:
+      summary: "Nova scheduling cannot find valid KVM hosts"
+      description: >
+        Cortex is seeing faulty vms in `{{$labels.az}}` where Nova scheduling
+        failed to find a valid `{{$labels.hvtype}}` host. This may indicate
+        capacity issues, misconfigured filters, or resource constraints in the
+        datacenter. Investigate the affected VMs and hypervisor availability.
diff --git a/helm/bundles/cortex-nova/templates/kpis.yaml b/helm/bundles/cortex-nova/templates/kpis.yaml
index af01c10c5..bc5666926 100644
--- a/helm/bundles/cortex-nova/templates/kpis.yaml
+++ b/helm/bundles/cortex-nova/templates/kpis.yaml
@@ -110,6 +110,23 @@ spec:
 ---
 apiVersion: cortex.cloud/v1alpha1
 kind: KPI
+metadata:
+  name: vm-faults
+spec:
+  schedulingDomain: nova
+  impl: vm_faults_kpi
+  dependencies:
+    datasources:
+      - name: nova-servers
+      - name: nova-flavors
+  description: |
+    This kpi tracks vm faults in the datacenter. It exposes helpful information
+    about the faults, such as the availability zone, hypervisor type, vm state,
+    and error info if available. This can be used to identify issues in the
+    datacenter and to monitor the overall health of the vms.
+---
+apiVersion: cortex.cloud/v1alpha1
+kind: KPI
 metadata:
   name: cortex-nova-datasource-state
 spec:
diff --git a/internal/knowledge/datasources/plugins/openstack/nova/nova_types.go b/internal/knowledge/datasources/plugins/openstack/nova/nova_types.go
index 322b05d69..1be2b7a29 100644
--- a/internal/knowledge/datasources/plugins/openstack/nova/nova_types.go
+++ b/internal/knowledge/datasources/plugins/openstack/nova/nova_types.go
@@ -108,9 +108,24 @@ type Server struct {
 	OSEXTSTSVmState                string  `json:"OS-EXT-STS:vm_state" db:"os_ext_sts_vm_state"`
 	OSEXTSTSPowerState             int     `json:"OS-EXT-STS:power_state" db:"os_ext_sts_power_state"`
 
-	// From nested JSON
+	// From nested server.flavor JSON
 	FlavorName string `json:"-" db:"flavor_name"`
 
+	// From nested server.fault JSON
+
+	// The error response code.
+	FaultCode *uint `json:"-" db:"fault_code"`
+	// The date and time when the exception was raised. The date and time stamp
+	// format is ISO 8601 (CCYY-MM-DDThh:mm:ss±hh:mm). For example,
+	// 2015-08-27T09:49:58-05:00. The ±hh:mm value if included, is the time zone
+	// as an offset from UTC. In the previous example, the offset value is -05:00.
+	FaultCreated *string `json:"-" db:"fault_created"`
+	// The error message.
+	FaultMessage *string `json:"-" db:"fault_message"`
+	// The stack trace. It is available if the response code is not 500 or you
+	// have the administrator privilege.
+	FaultDetails *string `json:"-" db:"fault_details"`
+
 	// Note: there are some more fields that are omitted. To include them again, add
 	// custom unmarshalers and marshalers for the struct below.
 }
@@ -119,7 +134,8 @@ type Server struct {
 func (s *Server) UnmarshalJSON(data []byte) error {
 	type Alias Server
 	aux := &struct {
-		Flavor json.RawMessage `json:"flavor"`
+		Flavor json.RawMessage  `json:"flavor"`
+		Fault  *json.RawMessage `json:"fault,omitempty"`
 		*Alias
 	}{
 		Alias: (*Alias)(s),
@@ -135,31 +151,63 @@ func (s *Server) UnmarshalJSON(data []byte) error {
 		return err
 	}
 	s.FlavorName = flavor.Name
+	var fault struct {
+		Code    uint    `json:"code"`
+		Created string  `json:"created"`
+		Message string  `json:"message"`
+		Details *string `json:"details,omitempty"`
+	}
+	if aux.Fault != nil {
+		if err := json.Unmarshal(*aux.Fault, &fault); err != nil {
+			return err
+		}
+		s.FaultCode = &fault.Code
+		s.FaultCreated = &fault.Created
+		s.FaultMessage = &fault.Message
+		s.FaultDetails = fault.Details
+	}
 	return nil
 }
 
 // Custom marshaler for OpenStackServer to handle nested JSON.
 func (s *Server) MarshalJSON() ([]byte, error) {
 	type Alias Server
+	type flavor struct {
+		// Starting in microversion 2.47, "id" was removed...
+		Name string `json:"original_name"`
+	}
+	flavorVal := flavor{
+		Name: s.FlavorName,
+	}
+	type fault struct {
+		Code    uint    `json:"code"`
+		Created string  `json:"created"`
+		Message string  `json:"message"`
+		Details *string `json:"details,omitempty"`
+	}
+	var faultVal *fault
+	if s.FaultCode != nil && s.FaultCreated != nil && s.FaultMessage != nil {
+		faultVal = &fault{
+			Code:    *s.FaultCode,
+			Created: *s.FaultCreated,
+			Message: *s.FaultMessage,
+			Details: s.FaultDetails,
+		}
+	}
 	aux := &struct {
-		Flavor struct {
-			// Starting in microversion 2.47, "id" was removed...
-			Name string `json:"original_name"`
-		} `json:"flavor"`
+		Flavor flavor `json:"flavor"`
+		Fault  *fault `json:"fault,omitempty"`
 		*Alias
 	}{
-		Alias: (*Alias)(s),
-		Flavor: struct {
-			Name string `json:"original_name"`
-		}{
-			Name: s.FlavorName,
-		},
+		Alias:  (*Alias)(s),
+		Flavor: flavorVal,
+		Fault:  faultVal,
 	}
 	return json.Marshal(aux)
 }
 
 // Table in which the openstack model is stored.
-func (Server) TableName() string { return "openstack_servers" }
+func (Server) TableName() string { return "openstack_servers_v2" }
 
 // Index for the openstack model.
 func (Server) Indexes() map[string][]string { return nil }
@@ -285,6 +333,54 @@ type Flavor struct {
 	ExtraSpecs string `json:"extra_specs" db:"extra_specs"`
 }
 
+// FlavorHypervisorType is a type alias for a string to represent the specific
+// values the hypervisor type contained in flavor extra specs may have.
+type FlavorHypervisorType string
+
+const (
+	// FlavorHypervisorTypeQEMU maps a flavor for QEMU/KVM hypervisors.
+	FlavorHypervisorTypeQEMU FlavorHypervisorType = "QEMU"
+	// FlavorHypervisorTypeCH maps flavors to Cloud-Hypervisor/KVM hypervisors.
+	FlavorHypervisorTypeCH FlavorHypervisorType = "CH"
+	// FlavorHypervisorTypeVMware maps flavors to VMware hypervisors.
+	FlavorHypervisorTypeVMware FlavorHypervisorType = "VMware vCenter Server"
+	// FlavorHypervisorTypeIronic maps flavors to Ironic baremetal instances.
+	FlavorHypervisorTypeIronic FlavorHypervisorType = "Ironic"
+	// FlavorHypervisorTypeOther is a flavor for which the hypervisor type
+	// is set in the extra specs but has an unknown value.
+	FlavorHypervisorTypeOther FlavorHypervisorType = "Other"
+	// FlavorHypervisorTypeUnspecified is a flavor for which the hypervisor type
+	// is not set in the extra specs.
+	FlavorHypervisorTypeUnspecified FlavorHypervisorType = "Unspecified"
+)
+
+// GetHypervisorType returns the hypervisor type of the flavor based on its
+// extra specs.
+func (f Flavor) GetHypervisorType() (FlavorHypervisorType, error) {
+	var extraSpecs map[string]string
+	if f.ExtraSpecs == "" {
+		extraSpecs = map[string]string{}
+	} else if err := json.Unmarshal([]byte(f.ExtraSpecs), &extraSpecs); err != nil {
+		return "", err // Return an error if the extra specs cannot be parsed.
+	}
+	hypervisorType, ok := extraSpecs["capabilities:hypervisor_type"]
+	if !ok {
+		return FlavorHypervisorTypeUnspecified, nil
+	}
+	switch hypervisorType {
+	case string(FlavorHypervisorTypeQEMU):
+		return FlavorHypervisorTypeQEMU, nil
+	case string(FlavorHypervisorTypeCH):
+		return FlavorHypervisorTypeCH, nil
+	case string(FlavorHypervisorTypeVMware):
+		return FlavorHypervisorTypeVMware, nil
+	case string(FlavorHypervisorTypeIronic):
+		return FlavorHypervisorTypeIronic, nil
+	default:
+		return FlavorHypervisorTypeOther, nil
+	}
+}
+
 // Custom unmarshaler for OpenStackFlavor to handle nested JSON.
 func (f *Flavor) UnmarshalJSON(data []byte) error {
 	type Alias Flavor
diff --git a/internal/knowledge/extractor/plugins/compute/libvirt_domain_cpu_steal_pct.sql b/internal/knowledge/extractor/plugins/compute/libvirt_domain_cpu_steal_pct.sql
index ea2b9c97a..ab3c7b8a7 100644
--- a/internal/knowledge/extractor/plugins/compute/libvirt_domain_cpu_steal_pct.sql
+++ b/internal/knowledge/extractor/plugins/compute/libvirt_domain_cpu_steal_pct.sql
@@ -3,6 +3,6 @@ SELECT
     os.os_ext_srv_attr_host AS host,
     MAX(value) AS max_steal_time_pct
 FROM kvm_libvirt_domain_metrics kvm
-JOIN openstack_servers os ON os.os_ext_srv_attr_instance_name = kvm.domain
+JOIN openstack_servers_v2 os ON os.os_ext_srv_attr_instance_name = kvm.domain
 WHERE kvm.name = 'kvm_libvirt_domain_steal_pct' AND os.id IS NOT NULL
 GROUP BY os.os_ext_srv_attr_host, os.id;
\ No newline at end of file
diff --git a/internal/knowledge/extractor/plugins/compute/libvirt_domain_cpu_steal_pct_test.go b/internal/knowledge/extractor/plugins/compute/libvirt_domain_cpu_steal_pct_test.go
index b9f84b188..bc28218b5 100644
--- a/internal/knowledge/extractor/plugins/compute/libvirt_domain_cpu_steal_pct_test.go
+++ b/internal/knowledge/extractor/plugins/compute/libvirt_domain_cpu_steal_pct_test.go
@@ -56,7 +56,7 @@ func TestLibvirtDomainCPUStealPctExtractor_Extract(t *testing.T) {
 		t.Fatalf("expected no error, got %v", err)
 	}
 
-	// Insert mock data into the openstack_servers table
+	// Insert mock data into the openstack servers table
 	servers := []any{
 		&nova.Server{
 			ID:                       "uuid-1",
diff --git a/internal/knowledge/extractor/plugins/compute/vm_host_residency.sql b/internal/knowledge/extractor/plugins/compute/vm_host_residency.sql
index fff0086c4..c2b4b8846 100644
--- a/internal/knowledge/extractor/plugins/compute/vm_host_residency.sql
+++ b/internal/knowledge/extractor/plugins/compute/vm_host_residency.sql
@@ -21,7 +21,7 @@ WITH durations AS (
             )) AS BIGINT)
         ) AS duration
     FROM openstack_migrations AS migrations
-    LEFT JOIN openstack_servers AS servers ON servers.id = migrations.instance_uuid
+    LEFT JOIN openstack_servers_v2 AS servers ON servers.id = migrations.instance_uuid
     LEFT JOIN openstack_flavors_v2 AS flavors ON flavors.name = servers.flavor_name
 )
 SELECT
diff --git a/internal/knowledge/extractor/plugins/compute/vm_life_span.sql b/internal/knowledge/extractor/plugins/compute/vm_life_span.sql
index daaa0a470..1fad31536 100644
--- a/internal/knowledge/extractor/plugins/compute/vm_life_span.sql
+++ b/internal/knowledge/extractor/plugins/compute/vm_life_span.sql
@@ -13,7 +13,7 @@ running_servers AS (
         EXTRACT(EPOCH FROM (NOW()::timestamp - servers.created::timestamp))::BIGINT AS duration,
         COALESCE(flavors.name, 'unknown')::TEXT AS flavor_name,
         false::BOOLEAN AS deleted
-    FROM openstack_servers servers
+    FROM openstack_servers_v2 servers
     LEFT JOIN openstack_flavors_v2 flavors ON flavors.name = servers.flavor_name
     WHERE servers.created IS NOT NULL
 )
diff --git a/internal/knowledge/extractor/plugins/compute/vrops_hostsystem_resolver.sql b/internal/knowledge/extractor/plugins/compute/vrops_hostsystem_resolver.sql
index e2c6ad4b2..8ab0a2c70 100644
--- a/internal/knowledge/extractor/plugins/compute/vrops_hostsystem_resolver.sql
+++ b/internal/knowledge/extractor/plugins/compute/vrops_hostsystem_resolver.sql
@@ -3,5 +3,5 @@ SELECT DISTINCT
     m.hostsystem AS vrops_hostsystem,
     s.os_ext_srv_attr_host AS nova_compute_host
 FROM vrops_vm_metrics m
-LEFT JOIN openstack_servers s ON m.instance_uuid = s.id
+LEFT JOIN openstack_servers_v2 s ON m.instance_uuid = s.id
 WHERE s.os_ext_srv_attr_host IS NOT NULL;
diff --git a/internal/knowledge/extractor/plugins/compute/vrops_project_noisiness.sql b/internal/knowledge/extractor/plugins/compute/vrops_project_noisiness.sql
index 334668b22..0b0067790 100644
--- a/internal/knowledge/extractor/plugins/compute/vrops_project_noisiness.sql
+++ b/internal/knowledge/extractor/plugins/compute/vrops_project_noisiness.sql
@@ -19,7 +19,7 @@ host_cpu_usage AS (
         s.tenant_id,
         h.service_host,
         AVG(p.avg_cpu) AS avg_cpu_of_project
-    FROM openstack_servers s
+    FROM openstack_servers_v2 s
     JOIN vrops_vm_metrics m ON s.id = m.instance_uuid
     JOIN projects_avg_cpu p ON s.tenant_id = p.tenant_id
     JOIN openstack_hypervisors h ON s.os_ext_srv_attr_hypervisor_hostname = h.hostname
diff --git a/internal/knowledge/kpis/plugins/compute/vm_faults.go b/internal/knowledge/kpis/plugins/compute/vm_faults.go
new file mode 100644
index 000000000..fec71247c
--- /dev/null
+++ b/internal/knowledge/kpis/plugins/compute/vm_faults.go
@@ -0,0 +1,151 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package compute
+
+import (
+	"errors"
+	"strconv"
+	"strings"
+
+	"github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/nova"
+	"github.com/cobaltcore-dev/cortex/internal/knowledge/db"
+	"github.com/cobaltcore-dev/cortex/internal/knowledge/kpis/plugins"
+	"github.com/cobaltcore-dev/cortex/pkg/conf"
+	"github.com/prometheus/client_golang/prometheus"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+var vmFaultsKPIlogger = ctrl.Log.WithName("vm-faults-kpi")
+
+// This kpi tracks vm faults in the datacenter. It exposes helpful information
+// about the faults, such as the availability zone, hypervisor type, vm state,
+// and error info if available. This can be used to identify issues in the
+// datacenter and to monitor the overall health of the vms.
+type VMFaultsKPI struct {
+	plugins.BaseKPI[struct{} /* No opts */]
+
+	// vmFaultsDesc describes the prometheus metric for vm faults.
+	vmFaultsDesc *prometheus.Desc
+}
+
+// GetName returns a unique name for this kpi that is used for registration
+// and configuration.
+func (VMFaultsKPI) GetName() string { return "vm_faults_kpi" }
+
+// Init initializes the kpi, e.g. by creating the necessary Prometheus
+// descriptors. The base kpi is also initialized with the provided database,
+// client and options.
+func (k *VMFaultsKPI) Init(db *db.DB, client client.Client, opts conf.RawOpts) error {
+	if err := k.BaseKPI.Init(db, client, opts); err != nil {
+		return err
+	}
+	k.vmFaultsDesc = prometheus.NewDesc("cortex_vm_faults",
+		"Number of vm faults in the datacenter",
+		[]string{"az", "hvtype", "state", "faultcode", "faultmsg", "faultyvm"}, nil,
+	)
+	return nil
+}
+
+// Describe sends the descriptor of this kpi to the provided channel. This is
+// used by Prometheus to know which metrics this kpi exposes.
+func (k *VMFaultsKPI) Describe(ch chan<- *prometheus.Desc) { ch <- k.vmFaultsDesc }
+
+// Collect collects the current state of vms from the database and sends it as
+// Prometheus metrics to the provided channel.
+func (k *VMFaultsKPI) Collect(ch chan<- prometheus.Metric) {
+	vmFaultsKPIlogger.Info("collecting metrics")
+
+	// This can happen when no datasource is provided that connects to a database.
+	if k.DB == nil {
+		err := errors.New("no database connection")
+		vmFaultsKPIlogger.Error(err, "cannot collect metric")
+		return
+	}
+
+	// Get all vms with their current state from the database.
+	var servers []nova.Server
+	nServers, err := k.DB.Select(&servers, "SELECT * FROM "+nova.Server{}.TableName())
+	if err != nil {
+		vmFaultsKPIlogger.Error(err, "failed to query servers from database")
+		return
+	}
+	vmFaultsKPIlogger.Info("queried servers from database", "nServers", nServers)
+
+	// Get all flavors from the database to map them to the vms.
+	var flavors []nova.Flavor
+	nFlavors, err := k.DB.Select(&flavors, "SELECT * FROM "+nova.Flavor{}.TableName())
+	if err != nil {
+		vmFaultsKPIlogger.Error(err, "failed to query flavors from database")
+		return
+	}
+	vmFaultsKPIlogger.Info("queried flavors from database", "nFlavors", nFlavors)
+
+	flavorsByName := make(map[string]nova.Flavor, len(flavors))
+	for _, flavor := range flavors {
+		flavorsByName[flavor.Name] = flavor
+	}
+
+	type labels struct {
+		az         string
+		hvtype     string
+		state      string
+		errcode    string
+		errmessage string
+		faultyVM   string
+	}
+	counts := make(map[labels]float64)
+
+	// For each vm, get its hypervisor type and count up.
+	// Note: this will also expose vms that are NOT in an error state,
+	// but this can be useful to compare it to the number of faulty vms.
+	for _, server := range servers {
+		flavor, ok := flavorsByName[server.FlavorName]
+		if !ok {
+			vmFaultsKPIlogger.Info("warning: flavor not found for server", "server",
+				server.ID, "flavor", server.FlavorName)
+			continue
+		}
+		hypervisorType, err := flavor.GetHypervisorType()
+		if err != nil {
+			vmFaultsKPIlogger.Error(err, "failed to get hypervisor type for server",
+				"server", server.ID, "flavor", flavor.Name)
+			continue
+		}
+		var errcode uint = 0
+		if server.FaultCode != nil {
+			errcode = *server.FaultCode
+		}
+		errmsg := "n/a"
+		if server.FaultMessage != nil {
+			errmsg = *server.FaultMessage
+			// Sometimes the VM ID may appear in the error message, which can
+			// lead to high cardinality in the metric. To avoid this, we replace
+			// the VM ID with a placeholder.
+			errmsg = strings.ReplaceAll(errmsg, server.ID, "<vm_id>")
+		}
+		// Only provide the server ID for faulty VMs, to avoid cardinality
+		// explosion in the metric.
+		faultyVM := "no"
+		if server.FaultCode != nil || server.FaultMessage != nil {
+			faultyVM = server.ID
+		}
+		key := labels{
+			az:         server.OSEXTAvailabilityZone,
+			hvtype:     string(hypervisorType),
+			state:      server.Status,
+			errcode:    strconv.FormatUint(uint64(errcode), 10),
+			errmessage: errmsg,
+			faultyVM:   faultyVM,
+		}
+		counts[key]++
+	}
+
+	// Emit metrics to prometheus.
+	for key, count := range counts {
+		ch <- prometheus.MustNewConstMetric(k.vmFaultsDesc, prometheus.GaugeValue, count,
+			key.az, key.hvtype, key.state, key.errcode, key.errmessage, key.faultyVM)
+	}
+	vmFaultsKPIlogger.Info("collected metrics", "nMetrics", len(counts))
+}
diff --git a/internal/knowledge/kpis/plugins/compute/vm_faults_test.go b/internal/knowledge/kpis/plugins/compute/vm_faults_test.go
new file mode 100644
index 000000000..a5b248b55
--- /dev/null
+++ b/internal/knowledge/kpis/plugins/compute/vm_faults_test.go
@@ -0,0 +1,408 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package compute
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/nova"
+	"github.com/cobaltcore-dev/cortex/internal/knowledge/db"
+	testlibDB "github.com/cobaltcore-dev/cortex/internal/knowledge/db/testing"
+	"github.com/cobaltcore-dev/cortex/pkg/conf"
+	testlib "github.com/cobaltcore-dev/cortex/pkg/testing"
+	"github.com/prometheus/client_golang/prometheus"
+	prometheusgo "github.com/prometheus/client_model/go"
+)
+
+func TestVMFaultsKPI_GetName(t *testing.T) {
+	kpi := VMFaultsKPI{}
+	if kpi.GetName() != "vm_faults_kpi" {
+		t.Errorf("expected 'vm_faults_kpi', got %q", kpi.GetName())
+	}
+}
+
+func TestVMFaultsKPI_Init(t *testing.T) {
+	dbEnv := testlibDB.SetupDBEnv(t)
+	testDB := db.DB{DbMap: dbEnv.DbMap}
+	defer dbEnv.Close()
+
+	kpi := &VMFaultsKPI{}
+	if err := kpi.Init(&testDB, nil, conf.NewRawOpts("{}")); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+	if kpi.vmFaultsDesc == nil {
+		t.Error("vmFaultsDesc should be initialized")
+	}
+}
+
+func TestVMFaultsKPI_Describe(t *testing.T) {
+	kpi := &VMFaultsKPI{}
+	if err := kpi.Init(nil, nil, conf.NewRawOpts("{}")); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	ch := make(chan *prometheus.Desc, 1)
+	kpi.Describe(ch)
+	close(ch)
+
+	desc := <-ch
+	if desc == nil {
+		t.Error("expected descriptor to be sent to channel")
+	}
+}
+
+func TestVMFaultsKPI_Collect_NoDB(t *testing.T) {
+	kpi := &VMFaultsKPI{}
+	if err := kpi.Init(nil, nil, conf.NewRawOpts("{}")); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	// Collect should not panic when no database is provided
+	ch := make(chan prometheus.Metric, 100)
+	kpi.Collect(ch)
+	close(ch)
+
+	count := 0
+	for range ch {
+		count++
+	}
+	if count != 0 {
+		t.Errorf("expected 0 metrics when no DB, got %d", count)
+	}
+}
+
+func TestVMFaultsKPI_Collect(t *testing.T) {
+	dbEnv := testlibDB.SetupDBEnv(t)
+	testDB := db.DB{DbMap: dbEnv.DbMap}
+	defer dbEnv.Close()
+
+	if err := testDB.CreateTable(
+		testDB.AddTable(nova.Server{}),
+		testDB.AddTable(nova.Flavor{}),
+	); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	// Insert mock flavors with different hypervisor types
+	flavors := []any{
+		&nova.Flavor{
+			ID:         "flavor-qemu",
+			Name:       "qemu-small",
+			VCPUs:      2,
+			RAM:        4096,
+			ExtraSpecs: `{"capabilities:hypervisor_type":"QEMU"}`,
+		},
+		&nova.Flavor{
+			ID:         "flavor-vmware",
+			Name:       "vmware-medium",
+			VCPUs:      4,
+			RAM:        8192,
+			ExtraSpecs: `{"capabilities:hypervisor_type":"VMware vCenter Server"}`,
+		},
+		&nova.Flavor{
+			ID:         "flavor-unspecified",
+			Name:       "generic-large",
+			VCPUs:      8,
+			RAM:        16384,
+			ExtraSpecs: `{}`,
+		},
+	}
+	if err := testDB.Insert(flavors...); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	// Insert mock servers
+	servers := []any{
+		// Normal server without fault
+		&nova.Server{
+			ID:                    "server-1",
+			Name:                  "normal-vm",
+			Status:                "ACTIVE",
+			FlavorName:            "qemu-small",
+			OSEXTAvailabilityZone: "az1",
+		},
+		// Server with fault code and message
+		&nova.Server{
+			ID:                    "server-2",
+			Name:                  "faultyvm",
+			Status:                "ERROR",
+			FlavorName:            "qemu-small",
+			OSEXTAvailabilityZone: "az1",
+			FaultCode:             testlib.Ptr(uint(500)),
+			FaultMessage:          testlib.Ptr("Internal error"),
+		},
+		// Another faulty server in different AZ
+		&nova.Server{
+			ID:                    "server-3",
+			Name:                  "another-faulty",
+			Status:                "ERROR",
+			FlavorName:            "vmware-medium",
+			OSEXTAvailabilityZone: "az2",
+			FaultCode:             testlib.Ptr(uint(400)),
+			FaultMessage:          testlib.Ptr("Bad request"),
+		},
+		// Server with only fault message (no code)
+		&nova.Server{
+			ID:                    "server-4",
+			Name:                  "partial-fault",
+			Status:                "BUILD",
+			FlavorName:            "generic-large",
+			OSEXTAvailabilityZone: "az1",
+			FaultMessage:          testlib.Ptr("Some warning"),
+		},
+		// Server with flavor that doesn't exist (should be skipped)
+		&nova.Server{
+			ID:                    "server-5",
+			Name:                  "orphan-vm",
+			Status:                "ACTIVE",
+			FlavorName:            "nonexistent-flavor",
+			OSEXTAvailabilityZone: "az1",
+		},
+	}
+	if err := testDB.Insert(servers...); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	kpi := &VMFaultsKPI{}
+	if err := kpi.Init(&testDB, nil, conf.NewRawOpts("{}")); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	ch := make(chan prometheus.Metric, 100)
+	kpi.Collect(ch)
+	close(ch)
+
+	type vmFaultsMetric struct {
+		az           string
+		hvtype       string
+		state        string
+		faultCode    string
+		faultMessage string
+		faultyVM     string
+		value        float64
+	}
+
+	metrics := make(map[string]vmFaultsMetric)
+	for metric := range ch {
+		var m prometheusgo.Metric
+		if err := metric.Write(&m); err != nil {
+			t.Fatalf("failed to write metric: %v", err)
+		}
+
+		labels := make(map[string]string)
+		for _, label := range m.Label {
+			labels[label.GetName()] = label.GetValue()
+		}
+
+		key := labels["az"] + "|" + labels["hvtype"] + "|" + labels["state"] + "|" +
+			labels["faultcode"] + "|" + labels["faultyvm"]
+
+		metrics[key] = vmFaultsMetric{
+			az:           labels["az"],
+			hvtype:       labels["hvtype"],
+			state:        labels["state"],
+			faultCode:    labels["faultcode"],
+			faultMessage: labels["faultmsg"],
+			faultyVM:     labels["faultyvm"],
+			value:        m.GetGauge().GetValue(),
+		}
+	}
+
+	expectedMetrics := map[string]vmFaultsMetric{
+		// Normal VM without fault
+		"az1|QEMU|ACTIVE|0|no": {
+			az:           "az1",
+			hvtype:       "QEMU",
+			state:        "ACTIVE",
+			faultCode:    "0",
+			faultMessage: "n/a",
+			faultyVM:     "no",
+			value:        1,
+		},
+		// Faulty VM with code 500
+		"az1|QEMU|ERROR|500|server-2": {
+			az:           "az1",
+			hvtype:       "QEMU",
+			state:        "ERROR",
+			faultCode:    "500",
+			faultMessage: "Internal error",
+			faultyVM:     "server-2",
+			value:        1,
+		},
+		// Faulty VM with code 400 in az2
+		"az2|VMware vCenter Server|ERROR|400|server-3": {
+			az:           "az2",
+			hvtype:       "VMware vCenter Server",
+			state:        "ERROR",
+			faultCode:    "400",
+			faultMessage: "Bad request",
+			faultyVM:     "server-3",
+			value:        1,
+		},
+		// Server with only fault message (code=0 but has message)
+		"az1|Unspecified|BUILD|0|server-4": {
+			az:           "az1",
+			hvtype:       "Unspecified",
+			state:        "BUILD",
+			faultCode:    "0",
+			faultMessage: "Some warning",
+			faultyVM:     "server-4",
+			value:        1,
+		},
+	}
+
+	if len(expectedMetrics) != len(metrics) {
+		t.Errorf("expected %d metrics, got %d", len(expectedMetrics), len(metrics))
+		t.Logf("actual metrics: %+v", metrics)
+	}
+
+	for key, expected := range expectedMetrics {
+		actual, ok := metrics[key]
+		if !ok {
+			t.Errorf("expected metric %q not found", key)
+			continue
+		}
+
+		if !reflect.DeepEqual(expected, actual) {
+			t.Errorf("metric %q: expected %+v, got %+v", key, expected, actual)
+		}
+	}
+}
+
+func TestVMFaultsKPI_Collect_InvalidExtraSpecs(t *testing.T) {
+	dbEnv := testlibDB.SetupDBEnv(t)
+	testDB := db.DB{DbMap: dbEnv.DbMap}
+	defer dbEnv.Close()
+
+	if err := testDB.CreateTable(
+		testDB.AddTable(nova.Server{}),
+		testDB.AddTable(nova.Flavor{}),
+	); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	// Insert flavor with invalid extra specs JSON
+	flavors := []any{
+		&nova.Flavor{
+			ID:         "flavor-bad",
+			Name:       "bad-flavor",
+			VCPUs:      2,
+			RAM:        4096,
+			ExtraSpecs: `invalid-json`,
+		},
+	}
+	if err := testDB.Insert(flavors...); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	servers := []any{
+		&nova.Server{
+			ID:                    "server-bad",
+			Name:                  "bad-vm",
+			Status:                "ACTIVE",
+			FlavorName:            "bad-flavor",
+			OSEXTAvailabilityZone: "az1",
+		},
+	}
+	if err := testDB.Insert(servers...); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	kpi := &VMFaultsKPI{}
+	if err := kpi.Init(&testDB, nil, conf.NewRawOpts("{}")); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	// Should not panic, but should skip the server with invalid flavor
+	ch := make(chan prometheus.Metric, 100)
+	kpi.Collect(ch)
+	close(ch)
+
+	count := 0
+	for range ch {
+		count++
+	}
+	// Should have 0 metrics since the server's flavor has invalid extra specs
+	if count != 0 {
+		t.Errorf("expected 0 metrics, got %d", count)
+	}
+}
+
+func TestVMFaultsKPI_Collect_MultipleSameLabels(t *testing.T) {
+	dbEnv := testlibDB.SetupDBEnv(t)
+	testDB := db.DB{DbMap: dbEnv.DbMap}
+	defer dbEnv.Close()
+
+	if err := testDB.CreateTable(
+		testDB.AddTable(nova.Server{}),
+		testDB.AddTable(nova.Flavor{}),
+	); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	flavors := []any{
+		&nova.Flavor{
+			ID:         "flavor-1",
+			Name:       "small",
+			VCPUs:      2,
+			RAM:        4096,
+			ExtraSpecs: `{"capabilities:hypervisor_type":"QEMU"}`,
+		},
+	}
+	if err := testDB.Insert(flavors...); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	// Insert multiple servers that should aggregate to same metric
+	servers := []any{
+		&nova.Server{
+			ID:                    "server-1",
+			Name:                  "vm-1",
+			Status:                "ACTIVE",
+			FlavorName:            "small",
+			OSEXTAvailabilityZone: "az1",
+		},
+		&nova.Server{
+			ID:                    "server-2",
+			Name:                  "vm-2",
+			Status:                "ACTIVE",
+			FlavorName:            "small",
+			OSEXTAvailabilityZone: "az1",
+		},
+		&nova.Server{
+			ID:                    "server-3",
+			Name:                  "vm-3",
+			Status:                "ACTIVE",
+			FlavorName:            "small",
+			OSEXTAvailabilityZone: "az1",
+		},
+	}
+	if err := testDB.Insert(servers...); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	kpi := &VMFaultsKPI{}
+	if err := kpi.Init(&testDB, nil, conf.NewRawOpts("{}")); err != nil {
+		t.Fatalf("expected no error, got %v", err)
+	}
+
+	ch := make(chan prometheus.Metric, 100)
+	kpi.Collect(ch)
+	close(ch)
+
+	var value float64
+	for metric := range ch {
+		var m prometheusgo.Metric
+		if err := metric.Write(&m); err != nil {
+			t.Fatalf("failed to write metric: %v", err)
+		}
+		value = m.GetGauge().GetValue()
+	}
+
+	// All 3 VMs should be counted together since they have the same labels
+	if value != 3 {
+		t.Errorf("expected metric value 3, got %f", value)
+	}
+}
diff --git a/internal/knowledge/kpis/supported_kpis.go b/internal/knowledge/kpis/supported_kpis.go
index 274c5ace5..2623ff8bd 100644
--- a/internal/knowledge/kpis/supported_kpis.go
+++ b/internal/knowledge/kpis/supported_kpis.go
@@ -21,6 +21,7 @@ var supportedKPIs = map[string]plugins.KPI{
 	"vm_migration_statistics_kpi":  &compute.VMMigrationStatisticsKPI{},
 	"vm_life_span_kpi":             &compute.VMLifeSpanKPI{},
 	"vm_commitments_kpi":           &compute.VMCommitmentsKPI{},
+	"vm_faults_kpi":                &compute.VMFaultsKPI{},
 
 	"netapp_storage_pool_cpu_usage_kpi": &storage.NetAppStoragePoolCPUUsageKPI{},
 
diff --git a/internal/scheduling/reservations/commitments/controller.go b/internal/scheduling/reservations/commitments/controller.go
index 9c238aeee..d38c6e1d8 100644
--- a/internal/scheduling/reservations/commitments/controller.go
+++ b/internal/scheduling/reservations/commitments/controller.go
@@ -445,7 +445,7 @@ func (r *CommitmentReservationController) listServersByProjectID(ctx context.Con
 	// Query servers from the database cache.
 	var servers []nova.Server
 	_, err := r.DB.Select(&servers,
-		"SELECT * FROM openstack_servers WHERE tenant_id = $1",
+		"SELECT * FROM "+nova.Server{}.TableName()+" WHERE tenant_id = $1",
 		projectID)
 	if err != nil {
 		return nil, fmt.Errorf("failed to query servers from database: %w", err)
diff --git a/tools/plutono/provisioning/dashboards/cortex-status.json b/tools/plutono/provisioning/dashboards/cortex-status.json
index f83e2926b..37f4b2479 100644
--- a/tools/plutono/provisioning/dashboards/cortex-status.json
+++ b/tools/plutono/provisioning/dashboards/cortex-status.json
@@ -16,7 +16,7 @@
   "editable": true,
   "gnetId": null,
   "graphTooltip": 0,
-  "id": 3,
+  "id": 1,
   "links": [],
   "panels": [
     {
@@ -557,6 +557,7 @@
       "dashLength": 10,
       "dashes": false,
       "datasource": "prometheus-openstack",
+      "description": "",
       "fieldConfig": {
         "defaults": {
           "unit": "short"
@@ -567,11 +568,117 @@
       "fillGradient": 0,
       "gridPos": {
         "h": 12,
-        "w": 24,
+        "w": 12,
         "x": 0,
         "y": 31
       },
       "hiddenSeries": false,
+      "id": 58,
+      "interval": null,
+      "legend": {
+        "alignAsTable": false,
+        "avg": false,
+        "current": false,
+        "hideEmpty": false,
+        "hideZero": true,
+        "max": false,
+        "min": false,
+        "rightSide": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.5.37",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": true,
+      "steppedLine": false,
+      "targets": [
+        {
+          "exemplar": false,
+          "expr": "sum by (faultmsg,state) (cortex_vm_faults{faultyvm!=\"no\"})",
+          "format": "time_series",
+          "instant": false,
+          "interval": "",
+          "intervalFactor": 1,
+          "legendFormat": "{{state}} {{faultmsg}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Nova: faults in vm scheduling lifecycle",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "$$hashKey": "object:234",
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "$$hashKey": "object:235",
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "prometheus-openstack",
+      "fieldConfig": {
+        "defaults": {
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 12,
+        "w": 24,
+        "x": 0,
+        "y": 43
+      },
+      "hiddenSeries": false,
       "id": 39,
       "legend": {
         "avg": false,
@@ -669,7 +776,7 @@
         "h": 11,
         "w": 6,
         "x": 0,
-        "y": 43
+        "y": 55
       },
       "hiddenSeries": false,
       "id": 31,
@@ -766,7 +873,7 @@
         "h": 11,
         "w": 6,
         "x": 6,
-        "y": 43
+        "y": 55
       },
       "hiddenSeries": false,
       "id": 33,
@@ -878,7 +985,7 @@
         "h": 11,
         "w": 6,
         "x": 12,
-        "y": 43
+        "y": 55
       },
       "hiddenSeries": false,
       "id": 35,
@@ -990,7 +1097,7 @@
         "h": 11,
         "w": 6,
         "x": 18,
-        "y": 43
+        "y": 55
       },
       "hiddenSeries": false,
       "id": 37,
@@ -1100,7 +1207,7 @@
         "h": 12,
         "w": 12,
         "x": 0,
-        "y": 54
+        "y": 66
       },
       "hiddenSeries": false,
       "id": 27,
@@ -1208,7 +1315,7 @@
         "h": 12,
         "w": 12,
         "x": 12,
-        "y": 54
+        "y": 66
       },
       "hiddenSeries": false,
       "id": 29,
@@ -1296,7 +1403,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 66
+        "y": 78
       },
       "id": 5,
       "panels": [],
@@ -1321,7 +1428,7 @@
         "h": 11,
         "w": 12,
         "x": 0,
-        "y": 67
+        "y": 79
       },
       "hiddenSeries": false,
       "id": 2,
@@ -1441,7 +1548,7 @@
         "h": 11,
         "w": 12,
         "x": 12,
-        "y": 67
+        "y": 79
       },
       "hiddenSeries": false,
       "id": 3,
@@ -1580,7 +1687,7 @@
         "h": 12,
         "w": 24,
         "x": 0,
-        "y": 78
+        "y": 90
       },
       "id": 50,
       "options": {
@@ -1621,7 +1728,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 90
+        "y": 102
       },
       "id": 25,
       "panels": [],
@@ -1644,7 +1751,7 @@
         "h": 14,
         "w": 12,
         "x": 0,
-        "y": 91
+        "y": 103
       },
       "hiddenSeries": false,
       "id": 21,
@@ -1746,7 +1853,7 @@
         "h": 14,
         "w": 12,
         "x": 12,
-        "y": 91
+        "y": 103
       },
       "hiddenSeries": false,
       "id": 23,
@@ -1839,7 +1946,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 105
+        "y": 117
       },
       "id": 19,
       "panels": [],
@@ -1862,7 +1969,7 @@
         "h": 13,
         "w": 12,
         "x": 0,
-        "y": 106
+        "y": 118
       },
       "hiddenSeries": false,
       "id": 17,
@@ -1960,7 +2067,7 @@
         "h": 13,
         "w": 12,
         "x": 12,
-        "y": 106
+        "y": 118
       },
       "hiddenSeries": false,
       "id": 15,
@@ -2057,7 +2164,7 @@
         "h": 12,
         "w": 12,
         "x": 0,
-        "y": 119
+        "y": 131
       },
       "hiddenSeries": false,
       "id": 11,
@@ -2155,7 +2262,7 @@
         "h": 12,
         "w": 12,
         "x": 12,
-        "y": 119
+        "y": 131
       },
       "hiddenSeries": false,
       "id": 13,
diff --git a/tools/visualize-reservations/main.go b/tools/visualize-reservations/main.go
index 9b5880be5..c99ff2eb1 100644
--- a/tools/visualize-reservations/main.go
+++ b/tools/visualize-reservations/main.go
@@ -52,6 +52,7 @@ import (
 	"time"
 
 	"github.com/cobaltcore-dev/cortex/api/v1alpha1"
+	"github.com/cobaltcore-dev/cortex/internal/knowledge/datasources/plugins/openstack/nova"
 	hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
 	_ "github.com/lib/pq"
 	corev1 "k8s.io/api/core/v1"
@@ -1761,9 +1762,10 @@ func connectToPostgres(
 
 	// Query servers with host information
 	serverMap = make(map[string]serverInfo)
-	rows, err := db.QueryContext(ctx, "SELECT id, flavor_name, COALESCE(host_id, ''), COALESCE(os_ext_srv_attr_host, '') FROM openstack_servers")
+	//nolint:gosec // This query is not using any user input, so it's not vulnerable to SQL injection
+	rows, err := db.QueryContext(ctx, "SELECT id, flavor_name, COALESCE(host_id, ''), COALESCE(os_ext_srv_attr_host, '') FROM "+nova.Server{}.TableName())
 	if err != nil {
-		fmt.Fprintf(os.Stderr, "Warning: Could not query openstack_servers: %v\n", err)
+		fmt.Fprintf(os.Stderr, "Warning: Could not query "+nova.Server{}.TableName()+": %v\n", err)
 	} else {
 		defer rows.Close()
 		for rows.Next() {