Skip to content

Commit 3747d0e

Browse files
fix: don't fail merticbeat/windows/perfmon when no data is available (#42803) (#42948)
* fix: don't fail merticbeat/windows/perfmon when no data is available * add CHANGELOG.next.asciidoc entry * fix: linter issue * fix: linter issue * fix: fix code according to PR comments * refactor: make CollectData error handling testable * fix: add mossing imports to windows/perfmon/reader_test.go * fix: add mossing imports to windows/perfmon/reader_test.go --------- Co-authored-by: Pierre HILBERT <pierre.hilbert@elastic.co> (cherry picked from commit 0ea31fd) Co-authored-by: stefans-elastic <stefan.stas@elastic.co>
1 parent fd2ed54 commit 3747d0e

File tree

3 files changed

+61
-7
lines changed

3 files changed

+61
-7
lines changed

Diff for: CHANGELOG.next.asciidoc

+1
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
157157
- Use namespace for GetListMetrics when exists in AWS {pull}41022[41022]
158158
- Fixed panic caused by uninitialized meraki device wifi0 and wifi1 struct pointers in the device WiFi data fetching. {issue}42745[42745] {pull}42746[42746]
159159
- Only fetch cluster-level index stats summary {issue}36019[36019] {pull}42901[42901]
160+
- Fixed an issue in Metricbeat's Windows module where data collection would fail if the data was unavailable. {issue}42802[42802] {pull}42803[42803]
160161

161162
*Osquerybeat*
162163

Diff for: metricbeat/module/windows/perfmon/reader.go

+15-7
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,8 @@ func (re *Reader) Read() ([]mb.Event, error) {
9696
// Some counters, such as rate counters, require two counter values in order to compute a displayable value. In this case we must call PdhCollectQueryData twice before calling PdhGetFormattedCounterValue.
9797
// For more information, see Collecting Performance Data (https://docs.microsoft.com/en-us/windows/desktop/PerfCtrs/collecting-performance-data).
9898
if err := re.query.CollectData(); err != nil {
99-
// users can encounter the case no counters are found (services/processes stopped), this should not generate an event with the error message,
100-
//could be the case the specific services are started after and picked up by the next RefreshCounterPaths func
101-
if err == pdh.PDH_NO_COUNTERS { //nolint:errorlint // Bad linter! This is always errno or nil.
102-
re.log.Warnf("%s %v", collectFailedMsg, err)
103-
} else {
104-
return nil, fmt.Errorf("%v: %w", collectFailedMsg, err)
105-
}
99+
err = re.handleCollectDataError(err)
100+
return nil, err
106101
}
107102

108103
// Get the values.
@@ -121,6 +116,19 @@ func (re *Reader) Read() ([]mb.Event, error) {
121116
return events, nil
122117
}
123118

119+
func (re *Reader) handleCollectDataError(err error) error {
120+
// users can encounter the case no counters are found (services/processes stopped), this should not generate an event with the error message,
121+
//could be the case the specific services are started after and picked up by the next RefreshCounterPaths func
122+
if err == pdh.PDH_NO_COUNTERS || err == pdh.PDH_NO_DATA { //nolint:errorlint // linter complains about comparing error using '==' operator but here error is always of type pdh.PdhErrno (or nil) so `errors.Is` is redundant here
123+
re.log.Warnf("%s %v", collectFailedMsg, err)
124+
125+
// Ensure the returned error is nil to prevent the Elastic Agent from transitioning to an UNHEALTHY state.
126+
return nil
127+
}
128+
129+
return fmt.Errorf("%v: %w", collectFailedMsg, err)
130+
}
131+
124132
func (re *Reader) getValues() (map[string][]pdh.CounterValue, error) {
125133
var val map[string][]pdh.CounterValue
126134
// Sleep for one second before collecting the second raw value-

Diff for: metricbeat/module/windows/perfmon/reader_test.go

+45
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
package perfmon
2121

2222
import (
23+
"errors"
2324
"testing"
2425

2526
"github.com/stretchr/testify/assert"
2627

2728
"github.com/elastic/beats/v7/metricbeat/helper/windows/pdh"
29+
"github.com/elastic/elastic-agent-libs/logp"
2830
)
2931

3032
func TestGetCounter(t *testing.T) {
@@ -153,3 +155,46 @@ func TestIsWildcard(t *testing.T) {
153155
result = isWildcard(queries, instance)
154156
assert.False(t, result)
155157
}
158+
159+
func Test_handleCollectDataError(t *testing.T) {
160+
tests := []struct {
161+
name string
162+
163+
mockErr error
164+
expectedErrMsg string
165+
}{
166+
{
167+
name: "no counters error",
168+
169+
mockErr: pdh.PDH_NO_COUNTERS,
170+
expectedErrMsg: "",
171+
},
172+
{
173+
name: "no data error",
174+
175+
mockErr: pdh.PDH_NO_DATA,
176+
expectedErrMsg: "",
177+
},
178+
{
179+
name: "unexpected error",
180+
181+
mockErr: errors.New("test error"),
182+
expectedErrMsg: "failed collecting counter values: test error",
183+
},
184+
}
185+
186+
reader := &Reader{
187+
log: logp.NewLogger("perfmon"),
188+
}
189+
190+
for _, tt := range tests {
191+
t.Run(tt.name, func(t *testing.T) {
192+
err := reader.handleCollectDataError(tt.mockErr)
193+
if tt.expectedErrMsg == "" {
194+
assert.NoError(t, err)
195+
} else {
196+
assert.EqualError(t, err, tt.expectedErrMsg)
197+
}
198+
})
199+
}
200+
}

0 commit comments

Comments
 (0)