Add threshold calculations methods

This commit is contained in:
Stephen Papierski 2023-11-08 14:50:03 -07:00
parent 2ff8e579e1
commit d73a17fd8a
5 changed files with 171 additions and 81 deletions

View file

@ -3,9 +3,11 @@ exports.up = function (knex) {
return knex.schema
.alterTable("monitor", function(table) {
table.boolean("slow_response_notification").notNullable().defaultTo(false);
table.integer("slow_response_notification_threshold").notNullable().defaultTo(0);
table.integer("slow_response_notification_range").notNullable().defaultTo(0);
table.string("slow_response_notification_method").notNullable().defaultTo("");
table.integer("slow_response_notification_range").notNullable().defaultTo(0);
table.string("slow_response_notification_threshold_method").notNullable().defaultTo("");
table.integer("slow_response_notification_threshold").notNullable().defaultTo(0);
table.float("slow_response_notification_threshold_multiplier").notNullable().defaultTo(0.0);
table.integer("slow_response_notification_resend_interval").notNullable().defaultTo(0);
})
.alterTable("heartbeat", function(table) {
@ -14,15 +16,18 @@ exports.up = function (knex) {
}
exports.down = function (knex) {
// remove various slow_response_notification parameters
return knex.schema
.alterTable("monitor", function(table) {
table.boolean("slow_response_notification").notNullable().defaultTo(false);
table.integer("slow_response_notification_threshold").notNullable().defaultTo(0);
table.integer("slow_response_notification_range").notNullable().defaultTo(0);
table.string("slow_response_notification_method").notNullable().defaultTo("");
table.integer("slow_response_notification_resend_interval").notNullable().defaultTo(0);
table.dropColumn("slow_response_notification");
table.dropColumn("slow_response_notification_method");
table.dropColumn("slow_response_notification_range");
table.dropColumn("slow_response_notification_threshold_method");
table.dropColumn("slow_response_notification_threshold");
table.dropColumn("slow_response_notification_threshold_multiplier");
table.dropColumn("slow_response_notification_resend_interval");
})
.alterTable("heartbeat", function(table) {
table.integer("slow_response_count").notNullable().defaultTo(0);
table.dropColumn("slow_response_count");
});
}

View file

@ -133,9 +133,11 @@ class Monitor extends BeanModel {
databaseQuery: this.databaseQuery,
authMethod: this.authMethod,
slowResponseNotification: this.isEnabledSlowResponseNotification(),
slowResponseNotificationThreshold: this.slowResponseNotificationThreshold,
slowResponseNotificationRange: this.slowResponseNotificationRange,
slowResponseNotificationMethod: this.slowResponseNotificationMethod,
slowResponseNotificationRange: this.slowResponseNotificationRange,
slowResponseNotificationThresholdMethod: this.slowResponseNotificationThresholdMethod,
slowResponseNotificationThreshold: this.slowResponseNotificationThreshold,
slowResponseNotificationThresholdMultiplier: this.slowResponseNotificationThresholdMultiplier,
slowResponseNotificationResendInterval: this.slowResponseNotificationResendInterval,
grpcUrl: this.grpcUrl,
grpcProtobuf: this.grpcProtobuf,
@ -949,7 +951,7 @@ class Monitor extends BeanModel {
bean.end_time = R.isoDateTimeMillis(endTimeDayjs);
// Check if response time is slow
if (this.isEnabledSlowResponseNotification()) {
if (this.isEnabledSlowResponseNotification() && !isFirstBeat) {
log.debug("monitor", `[${this.name}] Check if response is slow`);
await this.checkSlowResponseNotification(this, bean);
}
@ -1423,28 +1425,33 @@ class Monitor extends BeanModel {
}
/**
* Check heartbeat response time is slower than threshold.
* Check if heartbeat response time is slower than threshold.
* @param {Monitor} monitor The monitor to send a notification about
* @param {Bean} bean Status information about monitor
* @returns {Promise<void>}
*/
async checkSlowResponseNotification(monitor, bean) {
//Get recent heartbeat list with range of time
const afterThisDate = new Date(Date.now() - (1000 * (monitor.slowResponseNotificationRange + 1))); // add 1 second otherwise we grab 0 previous beats when Time Range == Heartbeat Interval
const previousBeats = await R.getAll(`
SELECT * FROM heartbeat
WHERE monitor_id = ? AND time > datetime(?) AND status = ?`,
[
monitor.id,
afterThisDate.toISOString(),
UP,
]);
const method = monitor.slowResponseNotificationMethod;
const thresholdResponseTime = monitor.slowResponseNotificationThreshold;
const thresholdMethod = monitor.slowResponseNotificationThresholdMethod;
const thresholdMultipler = monitor.slowResponseNotificationThresholdMultiplier;
const windowDuration = monitor.slowResponseNotificationRange;
let actualResponseTime = 0;
let previousBeats;
if (method != "last") {
//Get recent heartbeat list with range of time
const afterThisDate = new Date(Date.now() - (1000 * (monitor.slowResponseNotificationRange + 1))); // add 1 second otherwise we grab 0 previous beats when Time Range == Heartbeat Interval
previousBeats = await R.getAll(`
SELECT * FROM heartbeat
WHERE monitor_id = ? AND time > datetime(?) AND status = ?`,
[
monitor.id,
afterThisDate.toISOString(),
UP,
]);
}
switch (method) {
case "average":
previousBeats.forEach(beat => {
@ -1460,58 +1467,98 @@ class Monitor extends BeanModel {
break;
case "last":
actualResponseTime = bean.ping
actualResponseTime = bean.ping;
break;
default:
log.error("monitor", `[${this.name}] Unknown slow response notification method ${method}`);
log.error("monitor", `[${this.name}] Unknown response time calculation method for slow response notification: ${method}`);
return;
}
let threshold;
let thresholdDescription;
switch (thresholdMethod) {
case "threshold-static":
threshold = monitor.slowResponseNotificationThreshold;
thresholdDescription = "static";
break;
case "threshold-relative-24-hour":
//Get average response time over last 24 hours
const afterThisDate = new Date(Date.now() - (1000 * (24 * 60 * 60))); // 24 hours in milliseconds
const avgPing = parseInt(await R.getCell(`
SELECT AVG(ping) FROM heartbeat
WHERE time > datetime(?)
AND ping IS NOT NULL
AND monitor_id = ?
AND status = ?
`,
[ afterThisDate.toISOString(), monitor.id, UP ]
));
//calculate threshold
threshold = Math.round(avgPing * thresholdMultipler);
thresholdDescription = `${thresholdMultipler}x 24H Avg`;
break;
default:
log.error("monitor", `[${this.name}] Unknown threshold calculation method for slow response notification: ${thresholdMethod}`);
return;
}
// Create stats to append to messages/logs
let msgStats = `\nResponse: ${actualResponseTime}ms | Threshold: ${thresholdResponseTime}ms | Method: ${method}`
const methodDescription = ["average", "max"].includes(method) ? `${method} of ${windowDuration}s` : method;
let msgStats = `Response: ${actualResponseTime}ms (${methodDescription}) | Threshold: ${threshold}ms (${thresholdDescription})`
// Add window duration for methods that make sense
if (["average", "max"].includes(method)) {
msgStats += ` over ${windowDuration}s`
// Verify valid response time was calculated
if (actualResponseTime == 0 || !Number.isInteger(actualResponseTime)) {
log.debug("monitor", `[${this.name}] Failed to calculate valid response time`);
return;
}
// Verify something was actually calculated
if (actualResponseTime != 0 && Number.isInteger(actualResponseTime)) {
// Responding normally
if (actualResponseTime < thresholdResponseTime) {
if (bean.slowResponseCount == 0) {
log.debug("monitor", `[${this.name}] Responding normally. No need to send slow response notification ${msgStats}`);
} else {
log.debug("monitor", `[${this.name}] Returned to normal response time ${msgStats}`);
let msg = `[${this.name}] Returned to Normal Response Time ${msgStats}`;
Monitor.sendSlowResponseNotification(monitor, bean, msg);
}
// Verify valid threshold was calculated
if (!Number.isInteger(threshold)) {
log.debug("monitor", `[${this.name}] Failed to calculate valid threshold`);
return;
}
// Reset slow response count
bean.slowResponseCount = 0;
return;
// Responding slowly
// Responding normally
if (actualResponseTime < threshold) {
if (bean.slowResponseCount == 0) {
log.debug("monitor", `[${this.name}] Responding normally. No need to send slow response notification | ${msgStats}`);
} else {
++bean.slowResponseCount;
// Always send first notification
if (bean.slowResponseCount == 1) {
log.debug("monitor", `[${this.name}] Responded slowly, sending notification ${msgStats}`);
let msg = `[${this.name}] Responded Slowly ${msgStats}`;
Monitor.sendSlowResponseNotification(monitor, bean, msg);
} else if (this.slowResponseNotificationResendInterval > 0){
// Send notification every x times
if (((bean.slowResponseCount) % this.slowResponseNotificationResendInterval) == 0) {
// Send notification again, because we are still responding slow
log.debug("monitor", `[${this.name}] sendSlowResponseNotification again ${msgStats}`);
let msg = `[${this.name}] Still Responding Slowly ${msgStats}`;
Monitor.sendSlowResponseNotification(monitor, bean, msg);
}
}
msgStats += ` | Slow for: ${bean.slowResponseCount * monitor.interval}s`;
log.debug("monitor", `[${this.name}] Returned to normal response time | ${msgStats}`);
let msg = `[${this.name}] Returned to Normal Response Time \n${msgStats}`;
Monitor.sendSlowResponseNotification(monitor, bean, msg);
}
// Reset slow response count
bean.slowResponseCount = 0;
// Responding slowly
} else {
log.debug("monitor", `[${this.name}] Failed to calculate valid response time`);
++bean.slowResponseCount;
// Always send first notification
if (bean.slowResponseCount == 1) {
log.debug("monitor", `[${this.name}] Responded slowly, sending notification | ${msgStats}`);
let msg = `[${this.name}] Responded Slowly \n${msgStats}`;
Monitor.sendSlowResponseNotification(monitor, bean, msg);
// Send notification every x times
} else if (this.slowResponseNotificationResendInterval > 0){
if (((bean.slowResponseCount) % this.slowResponseNotificationResendInterval) == 0) {
// Send notification again, because we are still responding slow
msgStats += ` | Slow for: ${bean.slowResponseCount * monitor.interval}s`;
log.debug("monitor", `[${this.name}] Still responding slowly, sendSlowResponseNotification again | ${msgStats}`);
let msg = `[${this.name}] Still Responding Slowly \n${msgStats}`;
Monitor.sendSlowResponseNotification(monitor, bean, msg);
} else {
log.debug("monitor", `[${this.name}] Still responding slowly, waiting for resend interal | ${msgStats}`);
}
} else {
log.debug("monitor", `[${this.name}] Still responding slowly, but resend is disabled | ${msgStats}`);
}
}
}

View file

@ -804,9 +804,11 @@ let needSetup = false;
bean.authWorkstation = monitor.authWorkstation;
bean.authDomain = monitor.authDomain;
bean.slowResponseNotification = monitor.slowResponseNotification;
bean.slowResponseNotificationThreshold = monitor.slowResponseNotificationThreshold;
bean.slowResponseNotificationRange = monitor.slowResponseNotificationRange;
bean.slowResponseNotificationMethod = monitor.slowResponseNotificationMethod;
bean.slowResponseNotificationRange = monitor.slowResponseNotificationRange;
bean.slowResponseNotificationThresholdMethod = monitor.slowResponseNotificationThresholdMethod;
bean.slowResponseNotificationThreshold = monitor.slowResponseNotificationThreshold;
bean.slowResponseNotificationThresholdMultiplier = monitor.slowResponseNotificationThresholdMultiplier;
bean.slowResponseNotificationResendInterval = monitor.slowResponseNotificationResendInterval;
bean.grpcUrl = monitor.grpcUrl;
bean.grpcProtobuf = monitor.grpcProtobuf;

View file

@ -491,9 +491,16 @@
"slowResponseNotificationUseDescription": "Send a notification when service response time is slow.",
"slowResponseNotificationThreshold": "Threshold (ms)",
"slowResponseNotificationThresholdDescription": "Send a notification if calculated response time is greater than {0} ms.",
"slowResponseNotificationThresholdMethod": "Threshold Calculation",
"slowResponseNotificationThresholdMethodStatic": "Static Threshold",
"slowResponseNotificationThresholdMethodStaticDescription": "Define a static threshold.",
"slowResponseNotificationThresholdMethodRelative24Hour": "Relative to Avg. Response",
"slowResponseNotificationThresholdMethodRelative24HourDescription": "Calculate the threshold ({0}x the 24-hour average response time).",
"slowResponseNotificationThresholdMultiplier": "Threshold Multiplier",
"slowResponseNotificationThresholdMultiplierDescription": "Send notification if response time is greater than {0}x the 24-hour average.",
"slowResponseNotificationRange": "Window Duration (seconds)",
"slowResponseNotificationRangeDescription": "Window duration for calculating the {0}.",
"slowResponseNotificationMethod": "Calculation Method",
"slowResponseNotificationMethod": "Response Time Calculation",
"slowResponseNotificationMethodAverage": "Average",
"slowResponseNotificationMethodAverageDescription": "Get the average response time over the last {0} seconds.",
"slowResponseNotificationMethodMax": "Max",

View file

@ -426,8 +426,8 @@
<!-- Method -->
<div v-if="monitor.slowResponseNotification" class="my-3">
<label for="method" class="form-label">{{ $t("slowResponseNotificationMethod") }}</label>
<select id="method" v-model="monitor.slowResponseNotificationMethod" class="form-select">
<label for="slow-response-notification-method" class="form-label">{{ $t("slowResponseNotificationMethod") }}</label>
<select id="slow-response-notification-method" v-model="monitor.slowResponseNotificationMethod" class="form-select">
<option value="average">
{{ $t("slowResponseNotificationMethodAverage") }}
</option>
@ -449,14 +449,7 @@
</div>
</div>
<div v-if="monitor.slowResponseNotification" class="my-3">
<label for="slow-response-notification-threshold" class="form-label">{{ $t("slowResponseNotificationThreshold") }}</label>
<input id="slow-response-notification-threshold" v-model="monitor.slowResponseNotificationThreshold" type="number" class="form-control" required min="0" step="1">
<div class="form-text">
{{ $t("slowResponseNotificationThresholdDescription", [monitor.slowResponseNotificationThreshold]) }}
</div>
</div>
<!-- Window Duration -->
<div v-if="monitor.slowResponseNotification && monitor.slowResponseNotificationMethod !== 'last'" class="my-3">
<label for="slow-response-notification-range" class="form-label">{{ $t("slowResponseNotificationRange") }}</label>
<input id="slow-response-notification-range" v-model="monitor.slowResponseNotificationRange" type="number" class="form-control" required :min="monitor.interval" step="1">
@ -465,6 +458,44 @@
</div>
</div>
<!-- Threshold Method -->
<div v-if="monitor.slowResponseNotification" class="my-3">
<label for="slow-response-notification-threshold-method" class="form-label">{{ $t("slowResponseNotificationThresholdMethod") }}</label>
<select id="slow-response-notification-threshold-method" v-model="monitor.slowResponseNotificationThresholdMethod" class="form-select">
<option value="threshold-static">
{{ $t("slowResponseNotificationThresholdMethodStatic") }}
</option>
<option value="threshold-relative-24-hour">
{{ $t("slowResponseNotificationThresholdMethodRelative24Hour") }}
</option>
</select>
<div v-if="monitor.slowResponseNotificationThresholdMethod === 'threshold-static'" class="form-text">
{{ $t("slowResponseNotificationThresholdMethodStaticDescription") }}
</div>
<div v-if="monitor.slowResponseNotificationThresholdMethod === 'threshold-relative-24-hour'" class="form-text">
{{ $t("slowResponseNotificationThresholdMethodRelative24HourDescription", [monitor.slowResponseNotificationThresholdMultiplier]) }}
</div>
</div>
<!-- Threshold -->
<div v-if="monitor.slowResponseNotification && monitor.slowResponseNotificationThresholdMethod == 'threshold-static'" class="my-3">
<label for="slow-response-notification-threshold" class="form-label">{{ $t("slowResponseNotificationThreshold") }}</label>
<input id="slow-response-notification-threshold" v-model="monitor.slowResponseNotificationThreshold" type="number" class="form-control" required min="0" step="1">
<div class="form-text">
{{ $t("slowResponseNotificationThresholdDescription", [monitor.slowResponseNotificationThreshold]) }}
</div>
</div>
<!-- Threshold Multiplier -->
<div v-if="monitor.slowResponseNotification && monitor.slowResponseNotificationThresholdMethod == 'threshold-relative-24-hour'" class="my-3">
<label for="slow-response-notification-threshold-multiplier" class="form-label">{{ $t("slowResponseNotificationThresholdMultiplier") }}</label>
<input id="slow-response-notification-threshold-multiplier" v-model="monitor.slowResponseNotificationThresholdMultiplier" type="number" class="form-control" required min="1" step="0.1">
<div class="form-text">
{{ $t("slowResponseNotificationThresholdMultiplierDescription", [monitor.slowResponseNotificationThresholdMultiplier]) }}
</div>
</div>
<!-- Slow Response Resend Interval -->
<div v-if="monitor.slowResponseNotification" class="my-3">
<label for="slow-response-notification-resend-interval" class="form-label">
{{ $t("slowResponseNotificationResendInterval", [monitor.slowResponseNotificationInterval]) }}
@ -945,9 +976,11 @@ const monitorDefaults = {
kafkaProducerSsl: false,
gamedigGivenPortOnly: true,
slowResponseNotification: false,
slowResponseNotificationThreshold: 5000,
slowResponseNotificationRange: 60,
slowResponseNotificationMethod: "average",
slowResponseNotificationRange: 300,
slowResponseNotificationThresholdMethod:"threshold-relative-24-hour",
slowResponseNotificationThreshold: 2500,
slowResponseNotificationThresholdMultiplier: 5.0,
slowResponseNotificationResendInterval: 0,
};
@ -1214,11 +1247,7 @@ message HealthCheckResponse {
if (this.monitor.retryInterval === oldValue) {
this.monitor.retryInterval = value;
}
// Link interval and slowResponseNotificationRange if the are the same value
if (this.monitor.slowResponseNotificationRange === oldValue) {
this.monitor.slowResponseNotificationRange = value;
}
// But always keep slowResponseNotificationRange >= interval
// Always keep slowResponseNotificationRange >= interval
if (this.monitor.slowResponseNotificationRange < value) {
this.monitor.slowResponseNotificationRange = value;
}