Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 59 additions & 15 deletions collector/hwmon_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,19 +161,9 @@ func collectSensorData(dir string, data map[string]map[string]string) error {
return nil
}

func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir string) error {
hwmonName, err := c.hwmonName(dir)
if err != nil {
return err
}

if c.deviceFilter.ignored(hwmonName) {
c.logger.Debug("ignoring hwmon chip", "chip", hwmonName)
return nil
}

func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir, hwmonName string) error {
data := make(map[string]map[string]string)
err = collectSensorData(dir, data)
err := collectSensorData(dir, data)
if err != nil {
return err
}
Expand Down Expand Up @@ -452,7 +442,7 @@ func (c *hwMonCollector) hwmonHumanReadableChipName(dir string) (string, error)

func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error {
// Step 1: scan /sys/class/hwmon, resolve all symlinks and call
// updatesHwmon for each folder
// updateHwmon for each folder.

hwmonPathName := filepath.Join(sysFilePath("class"), "hwmon")

Expand All @@ -466,7 +456,20 @@ func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error {
return err
}

var lastErr error
// Pass 1: enumerate hwmon directories and pre-compute the device-derived
// chip name. Multiple hwmon nodes can share a single parent device (for
// example, asus-nb-wmi exposes one hwmon for fan control and another for
// WMI sensors), which makes hwmonName collide and triggers the "metric
// collected before with the same name and label values" registry error.
type hwmonEntry struct {
dir string
baseName string
nameFile string
}

entries := make([]hwmonEntry, 0, len(hwmonFiles))
chipCounts := make(map[string]int)
nameCounts := make(map[string]int)
for _, hwDir := range hwmonFiles {
hwmonXPathName := filepath.Join(hwmonPathName, hwDir.Name())
fileInfo, err := os.Lstat(hwmonXPathName)
Expand All @@ -485,7 +488,48 @@ func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error {
continue
}

if err = c.updateHwmon(ch, hwmonXPathName); err != nil {
baseName, err := c.hwmonName(hwmonXPathName)
if err != nil {
c.logger.Debug("failed to derive hwmon chip name", "dir", hwmonXPathName, "err", err)
continue
}

nameFile := ""
if raw, err := os.ReadFile(filepath.Join(hwmonXPathName, "name")); err == nil {
nameFile = strings.TrimSpace(string(raw))
}

entries = append(entries, hwmonEntry{
dir: hwmonXPathName,
baseName: baseName,
nameFile: nameFile,
})
chipCounts[baseName]++
nameCounts[baseName+"\x00"+nameFile]++
}

// Pass 2: emit metrics. For each entry, resolve a unique chip label
// (disambiguating colliding base names with the chip's `name` file
// content when distinct, else with the hwmonX basename) and only then
// apply the include/exclude filter so user regexes match the label
// that ends up in the metrics.
var lastErr error
for _, e := range entries {
chipName := e.baseName
if chipCounts[e.baseName] > 1 {
suffix := cleanMetricName(e.nameFile)
if suffix == "" || nameCounts[e.baseName+"\x00"+e.nameFile] > 1 {
suffix = cleanMetricName(filepath.Base(e.dir))
}
chipName = e.baseName + "_" + suffix
}

if c.deviceFilter.ignored(chipName) {
c.logger.Debug("ignoring hwmon chip", "chip", chipName)
continue
}

if err := c.updateHwmon(ch, e.dir, chipName); err != nil {
lastErr = err
}
}
Expand Down
275 changes: 275 additions & 0 deletions collector/hwmon_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
// Copyright 2026 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !nohwmon

package collector

import (
"io"
"log/slog"
"os"
"path/filepath"
"testing"

"github.com/prometheus/client_golang/prometheus"
)

// fakeHwmon describes a single hwmon node to materialize under a temporary
// /sys layout. The collector's hwmonName function reads the parent of the
// `device` symlink to derive a chip label, which is what produces the
// collision we need to test.
type fakeHwmon struct {
hwmonDir string // e.g. "hwmon3"
device string // parent device dir name, e.g. "asus-nb-wmi". Empty disables the device symlink.
name string // contents of the optional `name` file
files map[string]string // sensor file basename -> content
}

// buildFakeSysfs writes a minimal /sys tree containing the supplied hwmon
// nodes and returns the path that should be passed as *sysPath. Each
// non-empty `device` shares a common /sys/devices/platform/<device>
// directory so two hwmon entries can collide on chip name.
func buildFakeSysfs(t *testing.T, hwmons []fakeHwmon) string {
t.Helper()

sysRoot := t.TempDir()
classHwmon := filepath.Join(sysRoot, "class", "hwmon")
if err := os.MkdirAll(classHwmon, 0o755); err != nil {
t.Fatalf("mkdir class/hwmon: %v", err)
}

for _, h := range hwmons {
var hwmonReal string
if h.device != "" {
hwmonReal = filepath.Join(sysRoot, "devices", "platform", h.device, "hwmon", h.hwmonDir)
} else {
hwmonReal = filepath.Join(sysRoot, "devices", "virtual", "hwmon", h.hwmonDir)
}
if err := os.MkdirAll(hwmonReal, 0o755); err != nil {
t.Fatalf("mkdir %s: %v", hwmonReal, err)
}

if h.device != "" {
deviceTarget := filepath.Join(sysRoot, "devices", "platform", h.device)
if err := os.Symlink(deviceTarget, filepath.Join(hwmonReal, "device")); err != nil {
t.Fatalf("symlink device: %v", err)
}
}

if h.name != "" {
if err := os.WriteFile(filepath.Join(hwmonReal, "name"), []byte(h.name+"\n"), 0o644); err != nil {
t.Fatalf("write name: %v", err)
}
}

for fname, content := range h.files {
if err := os.WriteFile(filepath.Join(hwmonReal, fname), []byte(content+"\n"), 0o644); err != nil {
t.Fatalf("write %s: %v", fname, err)
}
}

if err := os.Symlink(hwmonReal, filepath.Join(classHwmon, h.hwmonDir)); err != nil {
t.Fatalf("symlink class/hwmon: %v", err)
}
}

return sysRoot
}

// gatherChipLabels runs the collector through a registry and returns the
// observed `chip` label values across all metrics. It also surfaces any
// gather error — which is the failure mode #3637 reported.
func gatherChipLabels(t *testing.T, c *hwMonCollector) ([]string, error) {
t.Helper()
reg := prometheus.NewRegistry()
if err := reg.Register(testHwmonCollector{c: c}); err != nil {
t.Fatalf("register: %v", err)
}
families, err := reg.Gather()
if err != nil {
return nil, err
}
var chips []string
for _, fam := range families {
for _, m := range fam.Metric {
for _, l := range m.Label {
if l.GetName() == "chip" {
chips = append(chips, l.GetValue())
}
}
}
}
return chips, nil
}

// testHwmonCollector adapts hwMonCollector.Update for prometheus.Registry
// so we can exercise duplicate detection at the gather step.
type testHwmonCollector struct {
c *hwMonCollector
}

func (t testHwmonCollector) Describe(ch chan<- *prometheus.Desc) {
prometheus.DescribeByCollect(t, ch)
}

func (t testHwmonCollector) Collect(ch chan<- prometheus.Metric) {
if err := t.c.Update(ch); err != nil {
panic(err)
}
}

func newTestHwmonCollector() *hwMonCollector {
return &hwMonCollector{logger: slog.New(slog.NewTextHandler(io.Discard, nil))}
}

func contains(haystack []string, needle string) bool {
for _, h := range haystack {
if h == needle {
return true
}
}
return false
}

// Two hwmon entries sharing the same parent device — the configuration
// that triggered #3637 on ASUS WMI laptops — must produce distinct chip
// labels and not error during gather.
func TestHwmonDuplicateChipNamesAreDisambiguated(t *testing.T) {
hwmons := []fakeHwmon{
{
hwmonDir: "hwmon6",
device: "asus-nb-wmi",
name: "asus",
files: map[string]string{
"pwm1": "128",
"pwm1_enable": "2",
},
},
{
hwmonDir: "hwmon7",
device: "asus-nb-wmi",
name: "asus_wmi_sensors",
files: map[string]string{
"pwm1": "200",
"pwm1_enable": "2",
},
},
}

sysRoot := buildFakeSysfs(t, hwmons)
prev := *sysPath
t.Cleanup(func() { *sysPath = prev })
*sysPath = sysRoot

chips, err := gatherChipLabels(t, newTestHwmonCollector())
if err != nil {
t.Fatalf("gather: %v", err)
}

if !contains(chips, "platform_asus_nb_wmi_asus") {
t.Errorf("expected disambiguated chip 'platform_asus_nb_wmi_asus', got %v", uniq(chips))
}
if !contains(chips, "platform_asus_nb_wmi_asus_wmi_sensors") {
t.Errorf("expected disambiguated chip 'platform_asus_nb_wmi_asus_wmi_sensors', got %v", uniq(chips))
}
for _, chip := range chips {
if chip == "platform_asus_nb_wmi" {
t.Errorf("colliding chip should not be emitted with bare base name, got %q", chip)
}
}
}

// When chip names are already unique, the collector must leave them alone
// — no surprise suffixes for unaffected users.
func TestHwmonUniqueChipNamesAreUnchanged(t *testing.T) {
hwmons := []fakeHwmon{
{
hwmonDir: "hwmon0",
device: "coretemp.0",
name: "coretemp",
files: map[string]string{"temp1_input": "42000"},
},
{
hwmonDir: "hwmon1",
device: "coretemp.1",
name: "coretemp",
files: map[string]string{"temp1_input": "43000"},
},
}

sysRoot := buildFakeSysfs(t, hwmons)
prev := *sysPath
t.Cleanup(func() { *sysPath = prev })
*sysPath = sysRoot

chips, err := gatherChipLabels(t, newTestHwmonCollector())
if err != nil {
t.Fatalf("gather: %v", err)
}
if !contains(chips, "platform_coretemp_0") {
t.Errorf("expected platform_coretemp_0, got %v", uniq(chips))
}
if !contains(chips, "platform_coretemp_1") {
t.Errorf("expected platform_coretemp_1, got %v", uniq(chips))
}
}

// When colliding entries share the same `name` file content, the `name`
// disambiguator collapses too. We must still emit unique chip labels by
// falling back to the hwmonX basename.
func TestHwmonDuplicateChipNamesWithSameNameFile(t *testing.T) {
hwmons := []fakeHwmon{
{
hwmonDir: "hwmon3",
device: "asus-nb-wmi",
name: "asus",
files: map[string]string{"pwm1_enable": "2"},
},
{
hwmonDir: "hwmon4",
device: "asus-nb-wmi",
name: "asus",
files: map[string]string{"pwm1_enable": "2"},
},
}

sysRoot := buildFakeSysfs(t, hwmons)
prev := *sysPath
t.Cleanup(func() { *sysPath = prev })
*sysPath = sysRoot

chips, err := gatherChipLabels(t, newTestHwmonCollector())
if err != nil {
t.Fatalf("gather: %v", err)
}
if !contains(chips, "platform_asus_nb_wmi_hwmon3") {
t.Errorf("expected platform_asus_nb_wmi_hwmon3, got %v", uniq(chips))
}
if !contains(chips, "platform_asus_nb_wmi_hwmon4") {
t.Errorf("expected platform_asus_nb_wmi_hwmon4, got %v", uniq(chips))
}
}

func uniq(in []string) []string {
seen := map[string]struct{}{}
out := make([]string, 0, len(in))
for _, s := range in {
if _, ok := seen[s]; ok {
continue
}
seen[s] = struct{}{}
out = append(out, s)
}
return out
}