diff --git a/README.md b/README.md index d3accaf..8735bb9 100644 --- a/README.md +++ b/README.md @@ -2,36 +2,43 @@ ### CGROUP-METRICS -Node Module for reading [cgroup](https://www.kernel.org/doc/Documentation/cgroup-v1/) metrics. Reads from `/sys/fs/cgroup/`. +Node Module for reading [cgroup v1](https://www.kernel.org/doc/Documentation/cgroup-v1/) and [cgroup v2](https://docs.kernel.org/admin-guide/cgroup-v2.html) metrics. Reads from `/sys/fs/cgroup/`. ### Memory Metrics: -[Memory](https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt) reads from path `/sys/fs/cgroup/memory/memory`: -Raw values: -- `stat.rss`: # of bytes of anonymous and swap cache memory -- `kmem.usage_in_bytes`: current kernel memory allocation -- `limit_in_bytes`: limit of memory usage +#### Raw values ([cgroup v1](https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt)): +- `memory.stat.rss`: Resident Set Size - anonymous and swap cache memory (bytes) +- `memory.kmem.usage_in_bytes`: Kernel memory usage (bytes) +- `memory.limit_in_bytes`: Memory limit for the cgroup (bytes) + +#### Raw values ([cgroup v2](https://docs.kernel.org/admin-guide/cgroup-v2.html#memory)): +- `memory.stat.anon`: Anonymous memory usage (bytes) +- `memory.stat`: Sum of `kernel_stack + slab + percpu + sock + vmalloc` (bytes) +- `memory.max`: Memory limit for the cgroup (bytes, or "max" for unlimited) + +#### Calculated values: +- `containerUsage()`: Total memory usage (combines anonymous memory + kernel memory) +- `containerUsagePercentage()`: Memory usage as percentage of limit -Calculated values: -- `containerUsage()`: `stats.rss` + `kmem.usage_in_bytes` -- `containerUsagePercentage()`:`stats.rss` + `kmem.usage_in_bytes` / `limit_in_bytes` ### CPU Metrics: -Raw CPU values: -[CPU](https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt) reads from path `/sys/fs/cgroup/`: +#### Raw values ([cgroup v1](https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt)): +- `cpuacct.usage`: Total CPU time consumed by all tasks (nanoseconds) +- `cpuacct.stat.user`: CPU time spent in user mode +- `cpuacct.stat.system`: CPU time spent in kernel mode -- `cpuacct.usage`: total CPU time (in nanoseconds) since the start of the container obtained by this cgroup (CPU time obtained by all the tasks) in the system -- `cpuacct.stat`: reports the user and system CPU time consumed by all tasks in this cgroup (including tasks lower in the hierarchy) - - `user`: CPU time (in nanoseconds) spent by tasks of the cgroup in user mode - - `system`: CPU time (in nanoseconds) spent by tasks of the cgroup in kernel mode - - `timestamp`: timestamp of when the measurement was taken +#### Raw values ([cgroup v2](https://docs.kernel.org/admin-guide/cgroup-v2.html#cpu)): +- `cpu.stat.usage_usec`: Total CPU time consumed by all tasks (microseconds) +- `cpu.stat.user_usec`: CPU time spent in user mode (microseconds) +- `cpu.stat.system_usec`: CPU time spent in kernel mode (microseconds) -Both calls will return an object containing one or more `CpuMetric` objects for a specific cpu task: -- `cpuNanosSinceContainerStart`: total CPU time (in nanoseconds) since the start of the container obtained by this cgroup in the system +#### Returned CPU metrics format: +All CPU metrics include: +- `cpuNanosSinceContainerStart`: total CPU time since container start - `timestamp`: timestamp of when the measurement was taken -Calculated CPU values: +#### Calculated CPU values: - `calculateUsage`: takes two instances of calls to `cpuacct.usage` or `cpuacct.stat` and returns the calculated usage in percentage of CPU time: ` second time since container start - first time since container start / total time` @@ -113,23 +120,40 @@ console.log(`Memory usage in the container: ${metrics["memory.containerUsage"]}` ### Error Handling -If there is no container running or there is an issue reading the file path, the function call will error something like this: +#### File System Errors + +If there is no container running or cgroup files are missing: +``` +Error: Error reading file /sys/fs/cgroup/memory/memory.stat. ENOENT: no such file or directory, open '/sys/fs/cgroup/memory/memory.stat' +``` + +If cgroup files are empty: ``` -Error: Error reading file /sys/fs/cgroup/memory/memory.stat, Message: ENOENT: no such file or directory, open '/sys/fs/cgroup/memory/memory.stat' +Error: Error reading file /sys/fs/cgroup/memory/memory.stat. File is empty: /sys/fs/cgroup/memory/memory.stat. ``` -If one of the files is empty, it will return an error like this: +#### Data Validation Errors + +For malformed memory metrics: +``` +Error: One or more metrics are malformed. rss: 1234, kmemUsage: NaN +``` + +For malformed CPU metrics: ``` -Error: Error reading file /sys/fs/cgroup/memory/memory.stat, Message: File is empty +Error: Error reading file /sys/fs/cgroup/cpuacct/cpuacct.stat. Malformed cpuacct.stat file: invalid CPU fields ``` -If a file is malformed, it will return an error like this: +#### cgroup v2 Specific Errors + +For malformed cgroup v2 memory data: ``` -Error: One or more metrics are malformed. containerUsage: 1234, limit: NaN +Error: Malformed memory.stat file: invalid anon field ``` -Or: + +For malformed cgroup v2 CPU data: ``` -Error reading file /sys/fs/cgroup/cpuacct/cpuacct.stat, Message: Cannot read property 'split' of undefined +Error: Malformed cpu.stat file: invalid usage_usec field ``` ### Contributing diff --git a/lib/utils.js b/lib/utils.js index fa72536..70d8a3b 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -15,49 +15,116 @@ const fs = require('fs'); const flat = require('flat'); /** - * Reads metrics from `/sys/fs/cgroup/` - * @param {String} metric What metric to read from `/sys/fs/cgroup/` - * @returns metric value (could be object or number) + * Read cgroup metrics (supports both v1 and v2) + * @param {string} metric Metric path to read (using cgroup v1 format) e.g., 'memory/memory.stat' + * @returns {number|Object} Parsed metric value */ function readandFormatMetric(metric) { + // Check whether cgroup v2 is enabled + const isV2 = fs.existsSync('/sys/fs/cgroup/cgroup.controllers'); + + // Map v1 paths to v2 paths + let filePath = metric; + if (isV2) { + const pathMap = { + 'memory/memory.stat': 'memory.stat', + 'memory/memory.kmem.usage_in_bytes': 'memory.stat', + 'memory/memory.limit_in_bytes': 'memory.max', + 'cpuacct/cpuacct.usage': 'cpu.stat', + 'cpuacct/cpuacct.stat': 'cpu.stat' + }; + filePath = pathMap[metric] || metric; + } + try { - const data = fs.readFileSync(`/sys/fs/cgroup/${metric}`).toString(); - // check file is not empty - if (data.length === 0) { - throw Error("File is empty"); - } + const data = fs.readFileSync(`/sys/fs/cgroup/${filePath}`).toString(); + if (data.length === 0) throw new Error(`File is empty: /sys/fs/cgroup/${filePath}.`); + if (metric === 'memory/memory.stat') { - // parse rss - const rss = data.split('\n')[1].split(' ')[1]; - return(parseInt(rss, 10)); + if (isV2) { + const stats = parseKeyValueData(data); + if (!isValidNumber(stats.anon)) { + throw new Error('Malformed memory.stat file: invalid anon field'); + } + return stats.anon; + } + // Handle cgroup v1 + const stats = parseKeyValueData(data); + if (!isValidNumber(stats.rss)) { + throw new Error('Malformed memory.stat file: invalid rss field'); + } + return stats.rss; + } + + if (metric === 'memory/memory.kmem.usage_in_bytes') { + if (isV2) { + const stats = parseKeyValueData(data); + return ['kernel_stack', 'slab', 'percpu', 'sock', 'vmalloc'] + .reduce((sum, field) => sum + (stats[field] || 0), 0); + } + // Handle cgroup v1 + return parseInt(data.trim(), 10); } - if (metric.includes('cpuacct')) { - const timestamp = getTimestamp(); - if (metric.includes('stat')) { - const user = data.split('\n')[0].split(' ')[1]; - const system = data.split('\n')[1].split(' ')[1]; + + if (metric === 'memory/memory.limit_in_bytes') { + // Both v1 and v2 use same parsing + return parseInt(data.trim(), 10); + } + + if (metric === 'cpuacct/cpuacct.usage') { + if (isV2) { + const stats = parseKeyValueData(data); + if (!isValidNumber(stats.usage_usec)) { + throw new Error('Malformed cpu.stat file: invalid usage_usec field'); + } return { - user: { - cpuNanosSinceContainerStart: parseInt(user, 10), - timestamp: timestamp - }, - system: { - cpuNanosSinceContainerStart: parseInt(system, 10), - timestamp: timestamp - } + cpuNanosSinceContainerStart: stats.usage_usec * 1000, // μs -> ns + timestamp: Date.now() + }; + } + // Handle cgroup v1 + const timestamp = Date.now(); + return { cpuNanosSinceContainerStart: parseInt(data.trim(), 10), timestamp }; + } + + if (metric === 'cpuacct/cpuacct.stat') { + if (isV2) { + const stats = parseKeyValueData(data); + if (!isValidNumber(stats.user_usec) || !isValidNumber(stats.system_usec)) { + throw new Error('Malformed cpu.stat file: invalid CPU fields'); } + const timestamp = Date.now(); + // Convert microseconds to USER_HZ to match v1 + // https://docs.kernel.org/admin-guide/cgroup-v1/cpuacct.html + const USER_HZ = 100; + const userTicks = Math.round(stats.user_usec / (1000000 / USER_HZ)); + const systemTicks = Math.round(stats.system_usec / (1000000 / USER_HZ)); + + return { + user: { cpuNanosSinceContainerStart: userTicks, timestamp }, + system: { cpuNanosSinceContainerStart: systemTicks, timestamp } + }; } - return { - cpuNanosSinceContainerStart: parseInt(data.trim(), 10), - timestamp: timestamp + // Handle cgroup v1 + const timestamp = Date.now(); + const stats = parseKeyValueData(data); + if (!isValidNumber(stats.user) || !isValidNumber(stats.system)) { + throw new Error('Malformed cpuacct.stat file: invalid CPU fields'); } + return { + user: { cpuNanosSinceContainerStart: stats.user, timestamp }, + system: { cpuNanosSinceContainerStart: stats.system, timestamp } + }; } - return parseInt(data.trim(), 10); } catch (e) { - throw Error(`Error reading file /sys/fs/cgroup/${metric}, Message: ${e.message || e}`) + throw new Error(`Error reading file /sys/fs/cgroup/${filePath}. ${e.message || e}`); } } +function isValidNumber(value) { + return typeof(value) === "number" && !isNaN(value); +} + function formatMetrics(metrics, flatten) { if (flatten) { return flat(metrics); @@ -66,8 +133,18 @@ function formatMetrics(metrics, flatten) { return metrics; } -function getTimestamp() { - return Date.now(); +function parseKeyValueData(data) { + const stats = {}; + data.split('\n').filter(line => line.trim()).forEach(line => { + const parts = line.trim().split(/\s+/); + if (parts.length >= 2) { + const value = parseInt(parts[1], 10); + if (!isNaN(value)) { + stats[parts[0]] = value; + } + } + }); + return stats; } module.exports = { diff --git a/test/metrics.test.js b/test/metrics.test.js index b6971dc..69db571 100644 --- a/test/metrics.test.js +++ b/test/metrics.test.js @@ -196,7 +196,7 @@ describe('cgroup Metrics', function() { assert.fail('failure expected'); } catch (e) { console.log(`test expected to fail: ${e}`) - assert.equal(e.message, "Error reading file /sys/fs/cgroup/memory/memory.stat, Message: ENOENT: no such file or directory, open '/sys/fs/cgroup/memory/memory.stat'") + assert.equal(e.message, "Error reading file /sys/fs/cgroup/memory/memory.stat. ENOENT: no such file or directory, open '/sys/fs/cgroup/memory/memory.stat'") } }); @@ -208,7 +208,7 @@ describe('cgroup Metrics', function() { assert.fail('failure expected'); } catch (e) { console.log(`test expected to fail: ${e}`) - assert.equal(e.message, "Error reading file /sys/fs/cgroup/cpuacct/cpuacct.usage, Message: ENOENT: no such file or directory, open '/sys/fs/cgroup/cpuacct/cpuacct.usage'") + assert.equal(e.message, "Error reading file /sys/fs/cgroup/cpuacct/cpuacct.usage. ENOENT: no such file or directory, open '/sys/fs/cgroup/cpuacct/cpuacct.usage'") } }); @@ -220,7 +220,7 @@ describe('cgroup Metrics', function() { assert.fail('failure expected'); } catch (e) { console.log(`test expected to fail: ${e}`) - assert.equal(e.message, "Error reading file /sys/fs/cgroup/memory/memory.stat, Message: File is empty") + assert.equal(e.message, "Error reading file /sys/fs/cgroup/memory/memory.stat. File is empty: /sys/fs/cgroup/memory/memory.stat.") } }); @@ -273,7 +273,7 @@ describe('cgroup Metrics', function() { } catch (e) { threw = true; console.log(`test expected to fail: ${e}`) - assert.equal(e.message, "Error reading file /sys/fs/cgroup/memory/memory.stat, Message: Cannot read property 'split' of undefined") + assert.equal(e.message, "Error reading file /sys/fs/cgroup/memory/memory.stat. Malformed memory.stat file: invalid rss field") } assert.ok(threw); @@ -285,11 +285,34 @@ describe('cgroup Metrics', function() { } catch (e) { threw = true; console.log(`test expected to fail: ${e}`) - assert.equal(e.message, "Error reading file /sys/fs/cgroup/cpuacct/cpuacct.stat, Message: Cannot read property 'split' of undefined") + assert.equal(e.message, "Error reading file /sys/fs/cgroup/cpuacct/cpuacct.stat. Malformed cpuacct.stat file: invalid CPU fields") } assert.ok(threw); }); + it('should handle cgroup v1 parsing with extra whitespace and formatting variations', async () => { + mockFs({ + '/sys/fs/cgroup': { + 'memory': { + 'memory.stat':' cache 2453 \nrss 1234\n\n', // extra whitespace + 'memory.kmem.usage_in_bytes':'5432', + 'memory.limit_in_bytes': '9999' + }, + 'cpuacct': { + 'cpuacct.usage': '1000', + 'cpuacct.stat': ' user 2000 \n system 3000\n' // extra whitespace + } + } + }) + + const containerUsage = memory.containerUsage(); + assert.equal(containerUsage, 6666); + + const stat = cpu.stat(); + assert.equal(stat.user.cpuNanosSinceContainerStart, 2000); + assert.equal(stat.system.cpuNanosSinceContainerStart, 3000); + }); + it('should use the total memory of the host if the memory limit is set to unlimited', async () => { mockFs({ '/sys/fs/cgroup/memory': { @@ -310,4 +333,129 @@ describe('cgroup Metrics', function() { osMock.totalmem = originalOSTotalMem; }); + // Test cgroup v2 support + describe('cgroup v2 support', function() { + it('should work with cgroup v2 memory metrics', async () => { + mockFs({ + '/sys/fs/cgroup': { + 'cgroup.controllers': 'memory cpu', + 'memory.stat': 'anon 1234\nfile 2000\nkernel_stack 500\nslab 1000\npercpu 200\nsock 100\nvmalloc 50\n', + 'memory.max': '9999' + } + }); + + const containerUsage = memory.containerUsage(); + // 1234+500+1000+200+100+50 = 3084 + assert.equal(containerUsage, 3084); + + const containerUsagePercentage = memory.containerUsagePercentage(); + assert.equal(containerUsagePercentage, 3084/9999 * 100); + }); + + it('should work with cgroup v2 CPU metrics', async () => { + mockFs({ + '/sys/fs/cgroup': { + 'cgroup.controllers': 'memory cpu', + 'cpu.stat': 'usage_usec 1000000\nuser_usec 20000000\nsystem_usec 30000000\n' + } + }); + + const usage = cpu.usage(); + assert.equal(usage.cpuNanosSinceContainerStart, 1000000000); + assert.equal(typeof usage.timestamp, "number"); + + const stat = cpu.stat(); + assert.equal(stat.user.cpuNanosSinceContainerStart, 2000); // 20,000,000 usec converted to USER_HZ ticks + assert.equal(stat.system.cpuNanosSinceContainerStart, 3000); // 30,000,000 usec converted to USER_HZ ticks + assert.equal(typeof stat.user.timestamp, "number"); + assert.equal(typeof stat.system.timestamp, "number"); + }); + + it('should throw error for cgroup v2 malformed memory limit (memory.max = "max")', async () => { + mockFs({ + '/sys/fs/cgroup': { + 'cgroup.controllers': 'memory cpu', + 'memory.stat': 'anon 1234\nfile 2000\nkernel_stack 500\nslab 1000\npercpu 200\nsock 100\nvmalloc 50\n', + 'memory.max': 'max' + } + }); + + try { + memory.containerUsagePercentage(); + assert.fail('Expected error was not thrown'); + } catch (e) { + console.log(`test expected to fail: ${e}`) + assert.equal(e.message, 'One or more metrics are malformed. containerUsage: 3084, limit: NaN'); + } + }); + + it('should handle cgroup v2 error cases', async () => { + // Missing files + mockFs({ + '/sys/fs/cgroup': { + 'cgroup.controllers': 'memory cpu' + } + }); + + try { + memory.containerUsage(); + assert.fail('failure expected'); + } catch (e) { + + assert.equal(e.message, "Error reading file /sys/fs/cgroup/memory.stat. ENOENT: no such file or directory, open '/sys/fs/cgroup/memory.stat'"); + } + + try { + cpu.usage(); + assert.fail('failure expected'); + } catch (e) { + + assert.equal(e.message, "Error reading file /sys/fs/cgroup/cpu.stat. ENOENT: no such file or directory, open '/sys/fs/cgroup/cpu.stat'"); + } + + // Missing required fields + mockFs({ + '/sys/fs/cgroup': { + 'cgroup.controllers': 'memory cpu', + 'memory.stat': 'file 2000\n', // Missing anon + 'cpu.stat': 'some_field 1000\n' // Missing usage_usec + } + }); + + try { + memory.containerUsage(); + assert.fail('failure expected'); + } catch (e) { + + assert.equal(e.message, "Error reading file /sys/fs/cgroup/memory.stat. Malformed memory.stat file: invalid anon field"); + } + + try { + cpu.usage(); + assert.fail('failure expected'); + } catch (e) { + + assert.equal(e.message, "Error reading file /sys/fs/cgroup/cpu.stat. Malformed cpu.stat file: invalid usage_usec field"); + } + }); + + it('should get all metrics with cgroup v2', async () => { + mockFs({ + '/sys/fs/cgroup': { + 'cgroup.controllers': 'memory cpu', + 'memory.stat': 'anon 1234\nfile 2000\nkernel_stack 500\nslab 1000\npercpu 200\nsock 100\nvmalloc 50\n', + 'memory.max': '9999', + 'cpu.stat': 'usage_usec 1000000\nuser_usec 20000000\nsystem_usec 30000000\n' + } + }); + + const metrics_object = metrics(); + assert.equal(metrics_object.memory.containerUsage, 3084); + assert.equal(metrics_object.memory.containerUsagePercentage, 3084/9999 * 100); + assert.equal(metrics_object.cpuacct.stat.user.cpuNanosSinceContainerStart, 2000); + assert.equal(metrics_object.cpuacct.stat.system.cpuNanosSinceContainerStart, 3000); + assert.equal(metrics_object.cpuacct.usage.cpuNanosSinceContainerStart, 1000000000); + }); + }); + });