Search K
Appearance
Appearance
📊 SEO元描述:2024年最新Node.js进程监控教程,详解内存CPU监控、性能指标收集、日志管理、告警系统。包含完整监控工具集成和故障诊断,适合高级开发者构建企业级监控体系。
核心关键词:Node.js进程监控2024、性能监控、内存监控、CPU监控、日志管理、告警系统
长尾关键词:Node.js监控怎么做、进程性能监控、内存泄漏检测、CPU使用率监控、Node.js日志管理
通过本节Node.js进程监控,你将系统性掌握:
进程监控是什么?这是对运行中的Node.js应用进行实时观察、数据收集和分析的过程。进程监控是保障生产环境稳定运行的核心技术。
💡 学习建议:监控是生产环境的生命线,完善的监控体系是高质量服务的基础保障
// 🎉 内存监控系统实现
const EventEmitter = require('events');
const fs = require('fs');
const path = require('path');
class MemoryMonitor extends EventEmitter {
constructor(options = {}) {
super();
this.interval = options.interval || 5000; // 5秒采集一次
this.thresholds = {
rss: options.rssThreshold || 500 * 1024 * 1024, // 500MB
heapUsed: options.heapThreshold || 400 * 1024 * 1024, // 400MB
external: options.externalThreshold || 100 * 1024 * 1024 // 100MB
};
this.history = [];
this.maxHistorySize = options.maxHistorySize || 1000;
this.alertCooldown = options.alertCooldown || 60000; // 1分钟冷却
this.lastAlerts = new Map();
this.isRunning = false;
this.timer = null;
}
start() {
if (this.isRunning) return;
this.isRunning = true;
console.log('内存监控启动');
this.timer = setInterval(() => {
this.collectMemoryData();
}, this.interval);
this.emit('started');
}
stop() {
if (!this.isRunning) return;
this.isRunning = false;
if (this.timer) {
clearInterval(this.timer);
this.timer = null;
}
console.log('内存监控停止');
this.emit('stopped');
}
collectMemoryData() {
const memoryUsage = process.memoryUsage();
const timestamp = new Date();
const data = {
timestamp,
pid: process.pid,
rss: memoryUsage.rss,
heapTotal: memoryUsage.heapTotal,
heapUsed: memoryUsage.heapUsed,
external: memoryUsage.external,
arrayBuffers: memoryUsage.arrayBuffers || 0
};
// 添加到历史记录
this.history.push(data);
if (this.history.length > this.maxHistorySize) {
this.history.shift();
}
// 检查阈值
this.checkThresholds(data);
// 发射数据事件
this.emit('data', data);
return data;
}
checkThresholds(data) {
const checks = [
{ name: 'rss', value: data.rss, threshold: this.thresholds.rss },
{ name: 'heapUsed', value: data.heapUsed, threshold: this.thresholds.heapUsed },
{ name: 'external', value: data.external, threshold: this.thresholds.external }
];
checks.forEach(check => {
if (check.value > check.threshold) {
this.triggerAlert(check.name, check.value, check.threshold);
}
});
}
triggerAlert(type, value, threshold) {
const now = Date.now();
const lastAlert = this.lastAlerts.get(type) || 0;
// 冷却期检查
if (now - lastAlert < this.alertCooldown) {
return;
}
this.lastAlerts.set(type, now);
const alert = {
type: 'memory_threshold',
metric: type,
value: value,
threshold: threshold,
percentage: ((value / threshold) * 100).toFixed(2),
timestamp: new Date(),
pid: process.pid
};
console.warn(`内存告警: ${type} = ${this.formatBytes(value)} (阈值: ${this.formatBytes(threshold)})`);
this.emit('alert', alert);
}
formatBytes(bytes) {
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
if (bytes === 0) return '0 Bytes';
const i = Math.floor(Math.log(bytes) / Math.log(1024));
return Math.round(bytes / Math.pow(1024, i) * 100) / 100 + ' ' + sizes[i];
}
getMemoryTrend(minutes = 10) {
const cutoff = new Date(Date.now() - minutes * 60 * 1000);
const recentData = this.history.filter(d => d.timestamp > cutoff);
if (recentData.length < 2) return null;
const first = recentData[0];
const last = recentData[recentData.length - 1];
return {
duration: minutes,
samples: recentData.length,
rss: {
start: first.rss,
end: last.rss,
change: last.rss - first.rss,
trend: last.rss > first.rss ? 'increasing' : 'decreasing'
},
heapUsed: {
start: first.heapUsed,
end: last.heapUsed,
change: last.heapUsed - first.heapUsed,
trend: last.heapUsed > first.heapUsed ? 'increasing' : 'decreasing'
}
};
}
detectMemoryLeak() {
const trend = this.getMemoryTrend(30); // 30分钟趋势
if (!trend) return null;
const rssGrowth = trend.rss.change;
const heapGrowth = trend.heapUsed.change;
// 简单的内存泄漏检测逻辑
const rssGrowthRate = rssGrowth / (30 * 60 * 1000); // 每毫秒增长
const heapGrowthRate = heapGrowth / (30 * 60 * 1000);
const isLeak = rssGrowthRate > 1000 || heapGrowthRate > 1000; // 每秒增长1MB
return {
detected: isLeak,
rssGrowthRate: rssGrowthRate * 1000, // 每秒增长
heapGrowthRate: heapGrowthRate * 1000,
recommendation: isLeak ? '检测到可能的内存泄漏,建议检查代码' : '内存使用正常'
};
}
getStatistics() {
if (this.history.length === 0) return null;
const latest = this.history[this.history.length - 1];
const rssValues = this.history.map(d => d.rss);
const heapValues = this.history.map(d => d.heapUsed);
return {
current: latest,
rss: {
min: Math.min(...rssValues),
max: Math.max(...rssValues),
avg: rssValues.reduce((a, b) => a + b, 0) / rssValues.length
},
heap: {
min: Math.min(...heapValues),
max: Math.max(...heapValues),
avg: heapValues.reduce((a, b) => a + b, 0) / heapValues.length
},
samples: this.history.length,
timespan: this.history.length > 1 ?
this.history[this.history.length - 1].timestamp - this.history[0].timestamp : 0
};
}
exportData(filePath) {
const data = {
metadata: {
pid: process.pid,
nodeVersion: process.version,
platform: process.platform,
exportTime: new Date().toISOString()
},
statistics: this.getStatistics(),
memoryLeak: this.detectMemoryLeak(),
history: this.history
};
fs.writeFileSync(filePath, JSON.stringify(data, null, 2));
console.log(`内存监控数据已导出到: ${filePath}`);
}
}
// 使用内存监控
const memoryMonitor = new MemoryMonitor({
interval: 3000,
rssThreshold: 200 * 1024 * 1024, // 200MB
heapThreshold: 150 * 1024 * 1024 // 150MB
});
memoryMonitor.on('data', (data) => {
console.log(`内存使用: RSS=${memoryMonitor.formatBytes(data.rss)}, Heap=${memoryMonitor.formatBytes(data.heapUsed)}`);
});
memoryMonitor.on('alert', (alert) => {
console.error('内存告警:', alert);
});
memoryMonitor.start();
// 定期输出统计信息
setInterval(() => {
const stats = memoryMonitor.getStatistics();
const leak = memoryMonitor.detectMemoryLeak();
if (stats) {
console.log('\n=== 内存统计 ===');
console.log(`当前RSS: ${memoryMonitor.formatBytes(stats.current.rss)}`);
console.log(`当前Heap: ${memoryMonitor.formatBytes(stats.current.heapUsed)}`);
console.log(`RSS范围: ${memoryMonitor.formatBytes(stats.rss.min)} - ${memoryMonitor.formatBytes(stats.rss.max)}`);
if (leak) {
console.log(`内存泄漏检测: ${leak.detected ? '⚠️ 检测到泄漏' : '✅ 正常'}`);
console.log(`建议: ${leak.recommendation}`);
}
console.log('================\n');
}
}, 60000);CPU监控帮助识别性能瓶颈和资源使用情况:
// 🎉 CPU监控系统实现
const os = require('os');
const { performance } = require('perf_hooks');
class CPUMonitor extends EventEmitter {
constructor(options = {}) {
super();
this.interval = options.interval || 5000;
this.threshold = options.threshold || 80; // 80% CPU使用率阈值
this.history = [];
this.maxHistorySize = options.maxHistorySize || 500;
this.isRunning = false;
this.timer = null;
this.lastCpuUsage = process.cpuUsage();
this.lastTime = performance.now();
}
start() {
if (this.isRunning) return;
this.isRunning = true;
console.log('CPU监控启动');
this.timer = setInterval(() => {
this.collectCPUData();
}, this.interval);
this.emit('started');
}
stop() {
if (!this.isRunning) return;
this.isRunning = false;
if (this.timer) {
clearInterval(this.timer);
this.timer = null;
}
console.log('CPU监控停止');
this.emit('stopped');
}
collectCPUData() {
const currentTime = performance.now();
const currentCpuUsage = process.cpuUsage();
// 计算CPU使用率
const timeDiff = currentTime - this.lastTime;
const userDiff = currentCpuUsage.user - this.lastCpuUsage.user;
const systemDiff = currentCpuUsage.system - this.lastCpuUsage.system;
const totalDiff = userDiff + systemDiff;
const cpuPercent = (totalDiff / (timeDiff * 1000)) * 100; // 转换为百分比
// 系统CPU信息
const cpus = os.cpus();
const loadAvg = os.loadavg();
const data = {
timestamp: new Date(),
pid: process.pid,
process: {
cpu: Math.min(cpuPercent, 100), // 限制在100%以内
user: userDiff,
system: systemDiff
},
system: {
cores: cpus.length,
loadAvg: {
'1min': loadAvg[0],
'5min': loadAvg[1],
'15min': loadAvg[2]
},
cpuInfo: this.getSystemCPUUsage(cpus)
}
};
// 更新上次记录
this.lastCpuUsage = currentCpuUsage;
this.lastTime = currentTime;
// 添加到历史记录
this.history.push(data);
if (this.history.length > this.maxHistorySize) {
this.history.shift();
}
// 检查阈值
this.checkThreshold(data);
this.emit('data', data);
return data;
}
getSystemCPUUsage(cpus) {
return cpus.map((cpu, index) => {
const total = Object.values(cpu.times).reduce((acc, time) => acc + time, 0);
const idle = cpu.times.idle;
const usage = ((total - idle) / total) * 100;
return {
core: index,
model: cpu.model,
speed: cpu.speed,
usage: usage.toFixed(2)
};
});
}
checkThreshold(data) {
if (data.process.cpu > this.threshold) {
const alert = {
type: 'cpu_threshold',
value: data.process.cpu,
threshold: this.threshold,
timestamp: data.timestamp,
pid: process.pid
};
console.warn(`CPU告警: 使用率 ${data.process.cpu.toFixed(2)}% (阈值: ${this.threshold}%)`);
this.emit('alert', alert);
}
}
getCPUTrend(minutes = 10) {
const cutoff = new Date(Date.now() - minutes * 60 * 1000);
const recentData = this.history.filter(d => d.timestamp > cutoff);
if (recentData.length < 2) return null;
const cpuValues = recentData.map(d => d.process.cpu);
const avg = cpuValues.reduce((a, b) => a + b, 0) / cpuValues.length;
const max = Math.max(...cpuValues);
const min = Math.min(...cpuValues);
return {
duration: minutes,
samples: recentData.length,
average: avg.toFixed(2),
maximum: max.toFixed(2),
minimum: min.toFixed(2),
trend: this.calculateTrend(cpuValues)
};
}
calculateTrend(values) {
if (values.length < 2) return 'stable';
const first = values.slice(0, Math.floor(values.length / 3));
const last = values.slice(-Math.floor(values.length / 3));
const firstAvg = first.reduce((a, b) => a + b, 0) / first.length;
const lastAvg = last.reduce((a, b) => a + b, 0) / last.length;
const diff = lastAvg - firstAvg;
if (Math.abs(diff) < 5) return 'stable';
return diff > 0 ? 'increasing' : 'decreasing';
}
getStatistics() {
if (this.history.length === 0) return null;
const latest = this.history[this.history.length - 1];
const cpuValues = this.history.map(d => d.process.cpu);
return {
current: latest,
cpu: {
min: Math.min(...cpuValues),
max: Math.max(...cpuValues),
avg: cpuValues.reduce((a, b) => a + b, 0) / cpuValues.length
},
samples: this.history.length,
timespan: this.history.length > 1 ?
this.history[this.history.length - 1].timestamp - this.history[0].timestamp : 0
};
}
}综合性能监控整合多个维度的性能数据:
// 🎉 综合性能监控系统
class PerformanceMonitor extends EventEmitter {
constructor(options = {}) {
super();
this.memoryMonitor = new MemoryMonitor(options.memory);
this.cpuMonitor = new CPUMonitor(options.cpu);
this.metrics = {
requests: 0,
errors: 0,
responseTime: [],
uptime: process.uptime()
};
this.setupEventHandlers();
}
setupEventHandlers() {
this.memoryMonitor.on('alert', (alert) => {
this.emit('alert', { source: 'memory', ...alert });
});
this.cpuMonitor.on('alert', (alert) => {
this.emit('alert', { source: 'cpu', ...alert });
});
}
start() {
this.memoryMonitor.start();
this.cpuMonitor.start();
console.log('性能监控系统启动');
this.emit('started');
}
stop() {
this.memoryMonitor.stop();
this.cpuMonitor.stop();
console.log('性能监控系统停止');
this.emit('stopped');
}
recordRequest(responseTime) {
this.metrics.requests++;
this.metrics.responseTime.push(responseTime);
// 只保留最近1000个响应时间
if (this.metrics.responseTime.length > 1000) {
this.metrics.responseTime.shift();
}
}
recordError() {
this.metrics.errors++;
}
getPerformanceReport() {
const memoryStats = this.memoryMonitor.getStatistics();
const cpuStats = this.cpuMonitor.getStatistics();
const responseTimes = this.metrics.responseTime;
const avgResponseTime = responseTimes.length > 0 ?
responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length : 0;
return {
timestamp: new Date(),
uptime: process.uptime(),
memory: memoryStats,
cpu: cpuStats,
requests: {
total: this.metrics.requests,
errors: this.metrics.errors,
errorRate: this.metrics.requests > 0 ?
(this.metrics.errors / this.metrics.requests * 100).toFixed(2) : 0,
avgResponseTime: avgResponseTime.toFixed(2)
},
system: {
platform: process.platform,
nodeVersion: process.version,
pid: process.pid
}
};
}
exportReport(filePath) {
const report = this.getPerformanceReport();
fs.writeFileSync(filePath, JSON.stringify(report, null, 2));
console.log(`性能报告已导出到: ${filePath}`);
}
}
// Express中间件示例
function createPerformanceMiddleware(monitor) {
return (req, res, next) => {
const startTime = Date.now();
res.on('finish', () => {
const responseTime = Date.now() - startTime;
monitor.recordRequest(responseTime);
if (res.statusCode >= 400) {
monitor.recordError();
}
});
next();
};
}结构化日志提供更好的可搜索性和分析能力:
// 🎉 结构化日志系统
const winston = require('winston');
const path = require('path');
class LogManager {
constructor(options = {}) {
this.logDir = options.logDir || './logs';
this.level = options.level || 'info';
this.maxFiles = options.maxFiles || 14;
this.maxSize = options.maxSize || '20m';
this.createLogDirectory();
this.setupLogger();
}
createLogDirectory() {
if (!fs.existsSync(this.logDir)) {
fs.mkdirSync(this.logDir, { recursive: true });
}
}
setupLogger() {
const logFormat = winston.format.combine(
winston.format.timestamp(),
winston.format.errors({ stack: true }),
winston.format.json()
);
this.logger = winston.createLogger({
level: this.level,
format: logFormat,
defaultMeta: {
service: 'node-app',
pid: process.pid,
hostname: os.hostname()
},
transports: [
// 控制台输出
new winston.transports.Console({
format: winston.format.combine(
winston.format.colorize(),
winston.format.simple()
)
}),
// 应用日志
new winston.transports.File({
filename: path.join(this.logDir, 'app.log'),
maxsize: this.maxSize,
maxFiles: this.maxFiles
}),
// 错误日志
new winston.transports.File({
filename: path.join(this.logDir, 'error.log'),
level: 'error',
maxsize: this.maxSize,
maxFiles: this.maxFiles
}),
// 性能日志
new winston.transports.File({
filename: path.join(this.logDir, 'performance.log'),
level: 'info',
maxsize: this.maxSize,
maxFiles: this.maxFiles,
format: winston.format.combine(
winston.format.timestamp(),
winston.format.json(),
winston.format((info) => {
return info.type === 'performance' ? info : false;
})()
)
})
]
});
}
logPerformance(data) {
this.logger.info({
type: 'performance',
...data
});
}
logAlert(alert) {
this.logger.warn({
type: 'alert',
...alert
});
}
logError(error, context = {}) {
this.logger.error({
type: 'error',
message: error.message,
stack: error.stack,
...context
});
}
info(message, meta = {}) {
this.logger.info(message, meta);
}
warn(message, meta = {}) {
this.logger.warn(message, meta);
}
error(message, meta = {}) {
this.logger.error(message, meta);
}
}监控系统集成:
💼 生产环境建议:集成专业监控工具如Prometheus、Grafana、ELK Stack等,构建完整的可观测性体系
通过本节Node.js进程监控的学习,你已经掌握:
A: 基于历史数据分析,设置P95或P99分位数作为阈值,结合业务特点和SLA要求进行调整。
A: 使用数据采样、聚合和压缩技术,设置合理的数据保留策略,重要指标保留更长时间。
A: 使用异步采集、批量发送、本地缓存等技术,控制监控开销在1-2%以内。
A: 结合内存监控趋势、堆快照分析、代码审查等方法,重点关注事件监听器、定时器、闭包等。
A: 分级告警、告警聚合、冷却期设置、多渠道通知,避免告警风暴和疲劳。
// 问题:监控数据采集不稳定
// 解决:实现数据缓存和重试机制
class ReliableMetricsCollector {
constructor() {
this.buffer = [];
this.maxBufferSize = 1000;
this.retryAttempts = 3;
}
async sendMetrics(data) {
this.buffer.push(data);
if (this.buffer.length >= 10) {
await this.flushBuffer();
}
}
async flushBuffer() {
const batch = this.buffer.splice(0, 100);
for (let attempt = 1; attempt <= this.retryAttempts; attempt++) {
try {
await this.sendToMonitoringSystem(batch);
break;
} catch (error) {
if (attempt === this.retryAttempts) {
console.error('监控数据发送失败:', error.message);
// 可以选择写入本地文件
}
await new Promise(resolve => setTimeout(resolve, 1000 * attempt));
}
}
}
}// 问题:告警过于频繁
// 解决:实现告警聚合和抑制
class AlertManager {
constructor() {
this.alerts = new Map();
this.suppressionRules = new Map();
}
processAlert(alert) {
const key = `${alert.type}_${alert.metric}`;
const now = Date.now();
// 检查抑制规则
const suppression = this.suppressionRules.get(key);
if (suppression && now < suppression.until) {
return; // 被抑制
}
// 聚合相同类型的告警
const existing = this.alerts.get(key);
if (existing) {
existing.count++;
existing.lastOccurrence = now;
} else {
this.alerts.set(key, {
...alert,
count: 1,
firstOccurrence: now,
lastOccurrence: now
});
}
// 设置抑制期
this.suppressionRules.set(key, {
until: now + 300000 // 5分钟抑制期
});
this.sendAlert(this.alerts.get(key));
}
}"完善的监控体系是生产环境稳定运行的基石,掌握监控技术让你能够主动发现问题、快速解决故障,成为真正的系统守护者!"