Adding Jobstats support to Lustre2 input plugin
Lustre Jobstats allows for RPCs to be tagged with a value, such as a job's ID. This allows for per job statistics. This plugin collects statistics and tags the data with the jobid. closes #1107
This commit is contained in:
committed by
Cameron Sparr
parent
89f2c0b0a4
commit
a7b0861436
@@ -38,6 +38,23 @@ cache_hit 7393729777 samples [pages] 1 1 7393729777
|
||||
cache_miss 11653333250 samples [pages] 1 1 11653333250
|
||||
`
|
||||
|
||||
const obdfilterJobStatsContents = `job_stats:
|
||||
- job_id: testjob1
|
||||
snapshot_time: 1461772761
|
||||
read_bytes: { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 }
|
||||
write_bytes: { samples: 25, unit: bytes, min: 1048576, max: 1048576, sum: 26214400 }
|
||||
getattr: { samples: 0, unit: reqs }
|
||||
setattr: { samples: 0, unit: reqs }
|
||||
punch: { samples: 1, unit: reqs }
|
||||
sync: { samples: 0, unit: reqs }
|
||||
destroy: { samples: 0, unit: reqs }
|
||||
create: { samples: 0, unit: reqs }
|
||||
statfs: { samples: 0, unit: reqs }
|
||||
get_info: { samples: 0, unit: reqs }
|
||||
set_info: { samples: 0, unit: reqs }
|
||||
quotactl: { samples: 0, unit: reqs }
|
||||
`
|
||||
|
||||
const mdtProcContents = `snapshot_time 1438693238.20113 secs.usecs
|
||||
open 1024577037 samples [reqs]
|
||||
close 873243496 samples [reqs]
|
||||
@@ -57,6 +74,27 @@ samedir_rename 259625 samples [reqs]
|
||||
crossdir_rename 369571 samples [reqs]
|
||||
`
|
||||
|
||||
const mdtJobStatsContents = `job_stats:
|
||||
- job_id: testjob1
|
||||
snapshot_time: 1461772761
|
||||
open: { samples: 5, unit: reqs }
|
||||
close: { samples: 4, unit: reqs }
|
||||
mknod: { samples: 6, unit: reqs }
|
||||
link: { samples: 8, unit: reqs }
|
||||
unlink: { samples: 90, unit: reqs }
|
||||
mkdir: { samples: 521, unit: reqs }
|
||||
rmdir: { samples: 520, unit: reqs }
|
||||
rename: { samples: 9, unit: reqs }
|
||||
getattr: { samples: 11, unit: reqs }
|
||||
setattr: { samples: 1, unit: reqs }
|
||||
getxattr: { samples: 3, unit: reqs }
|
||||
setxattr: { samples: 4, unit: reqs }
|
||||
statfs: { samples: 1205, unit: reqs }
|
||||
sync: { samples: 2, unit: reqs }
|
||||
samedir_rename: { samples: 705, unit: reqs }
|
||||
crossdir_rename: { samples: 200, unit: reqs }
|
||||
`
|
||||
|
||||
func TestLustre2GeneratesMetrics(t *testing.T) {
|
||||
|
||||
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
|
||||
@@ -83,6 +121,7 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
|
||||
err = ioutil.WriteFile(obddir+"/"+ost_name+"/stats", []byte(obdfilterProcContents), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Begin by testing standard Lustre stats
|
||||
m := &Lustre2{
|
||||
Ost_procfiles: []string{obddir + "/*/stats", osddir + "/*/stats"},
|
||||
Mds_procfiles: []string{mdtdir + "/*/md_stats"},
|
||||
@@ -128,3 +167,82 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
|
||||
err = os.RemoveAll(os.TempDir() + "/telegraf")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestLustre2GeneratesJobstatsMetrics(t *testing.T) {
|
||||
|
||||
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
|
||||
ost_name := "OST0001"
|
||||
job_name := "testjob1"
|
||||
|
||||
mdtdir := tempdir + "/mdt/"
|
||||
err := os.MkdirAll(mdtdir+"/"+ost_name, 0755)
|
||||
require.NoError(t, err)
|
||||
|
||||
obddir := tempdir + "/obdfilter/"
|
||||
err = os.MkdirAll(obddir+"/"+ost_name, 0755)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = ioutil.WriteFile(mdtdir+"/"+ost_name+"/job_stats", []byte(mdtJobStatsContents), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = ioutil.WriteFile(obddir+"/"+ost_name+"/job_stats", []byte(obdfilterJobStatsContents), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test Lustre Jobstats
|
||||
m := &Lustre2{
|
||||
Ost_procfiles: []string{obddir + "/*/job_stats"},
|
||||
Mds_procfiles: []string{mdtdir + "/*/job_stats"},
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
|
||||
err = m.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
tags := map[string]string{
|
||||
"name": ost_name,
|
||||
"jobid": job_name,
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"jobstats_read_calls": uint64(1),
|
||||
"jobstats_read_min_size": uint64(4096),
|
||||
"jobstats_read_max_size": uint64(4096),
|
||||
"jobstats_read_bytes": uint64(4096),
|
||||
"jobstats_write_calls": uint64(25),
|
||||
"jobstats_write_min_size": uint64(1048576),
|
||||
"jobstats_write_max_size": uint64(1048576),
|
||||
"jobstats_write_bytes": uint64(26214400),
|
||||
"jobstats_ost_getattr": uint64(0),
|
||||
"jobstats_ost_setattr": uint64(0),
|
||||
"jobstats_punch": uint64(1),
|
||||
"jobstats_ost_sync": uint64(0),
|
||||
"jobstats_destroy": uint64(0),
|
||||
"jobstats_create": uint64(0),
|
||||
"jobstats_ost_statfs": uint64(0),
|
||||
"jobstats_get_info": uint64(0),
|
||||
"jobstats_set_info": uint64(0),
|
||||
"jobstats_quotactl": uint64(0),
|
||||
"jobstats_open": uint64(5),
|
||||
"jobstats_close": uint64(4),
|
||||
"jobstats_mknod": uint64(6),
|
||||
"jobstats_link": uint64(8),
|
||||
"jobstats_unlink": uint64(90),
|
||||
"jobstats_mkdir": uint64(521),
|
||||
"jobstats_rmdir": uint64(520),
|
||||
"jobstats_rename": uint64(9),
|
||||
"jobstats_getattr": uint64(11),
|
||||
"jobstats_setattr": uint64(1),
|
||||
"jobstats_getxattr": uint64(3),
|
||||
"jobstats_setxattr": uint64(4),
|
||||
"jobstats_statfs": uint64(1205),
|
||||
"jobstats_sync": uint64(2),
|
||||
"jobstats_samedir_rename": uint64(705),
|
||||
"jobstats_crossdir_rename": uint64(200),
|
||||
}
|
||||
|
||||
acc.AssertContainsTaggedFields(t, "lustre2", fields, tags)
|
||||
|
||||
err = os.RemoveAll(os.TempDir() + "/telegraf")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user