Improvement of the FullTS performance

Improvement of the Full Theil-Sen performance by using a Binary Search Tree for the Median Calculation, which is much more efficient for larger values of maxSeriesLength
This commit is contained in:
Jaap van Ekris 2023-12-03 20:30:27 +01:00 committed by GitHub
parent 6681cfcc5e
commit f6573cb0d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 17 additions and 37 deletions

View File

@ -2,9 +2,9 @@
/*
Open Rowing Monitor, https://github.com/laberning/openrowingmonitor
The TSLinearSeries is a datatype that represents a Linear Series. It allows
The FullTSLinearSeries is a datatype that represents a Linear Series. It allows
values to be retrieved (like a FiFo buffer, or Queue) but it also includes
a Theilââ¬âœSen estimator Linear Regressor to determine the slope of this timeseries.
a Theil-Sen estimator Linear Regressor to determine the slope of this timeseries.
At creation its length is determined. After it is filled, the oldest will be pushed
out of the queue) automatically.
@ -19,6 +19,7 @@
*/
import { createSeries } from './Series.js'
import { createLabelledBinarySearchTree } from './BinarySearchTree.js'
import loglevel from 'loglevel'
const log = loglevel.getLogger('RowingEngine')
@ -26,43 +27,37 @@ const log = loglevel.getLogger('RowingEngine')
function createTSLinearSeries (maxSeriesLength = 0) {
const X = createSeries(maxSeriesLength)
const Y = createSeries(maxSeriesLength)
const slopes = []
const A = createLabelledBinarySearchTree()
let _A = 0
let _B = 0
let _goodnessOfFit = 0
function push (x, y) {
// Invariant: A contains all a's (as in the general formula y = a * x^2 + b * x + c)
// Where the a's are labeled in the Binary Search Tree with their xi when they BEGIN in the point (xi, yi)
if (maxSeriesLength > 0 && X.length() >= maxSeriesLength) {
// The maximum of the array has been reached, so when pushing the x,y the array gets shifted,
// thus we have to remove the a's belonging to the current position X0 as well before this value is trashed
A.remove(X.get(0))
}
X.push(x)
Y.push(y)
if (maxSeriesLength > 0 && slopes.length >= maxSeriesLength) {
// The maximum of the array has been reached, we have to create room
// in the 2D array by removing the first row from the table
removeFirstRow()
}
// Invariant: the indices of the X and Y array now match up with the
// row numbers of the slopes array. So, the slope of (X[0],Y[0]) and (X[1],Y[1]
// will be stored in slopes[0][.].
// Calculate the slopes of this new point
// Calculate all the slopes of the newly added point
if (X.length() > 1) {
// There are at least two points in the X and Y arrays, so let's add the new datapoint
let i = 0
let result = 0
while (i < slopes.length) {
result = calculateSlope(i, slopes.length)
slopes[i].push(result)
while (i < X.length() - 1) {
A.push(X.get(i), calculateSlope(i, X.length() - 1))
i++
}
}
// Add an empty array at the end to store futurs results for the most recent points
slopes.push([])
// Calculate the median of the slopes
if (X.length() > 1) {
_A = median()
_A = A.median()
} else {
_A = 0
}
@ -164,10 +159,6 @@ function createTSLinearSeries (maxSeriesLength = 0) {
return Y.series()
}
function removeFirstRow () {
slopes.shift()
}
function calculateSlope (pointOne, pointTwo) {
if (pointOne !== pointTwo && X.get(pointOne) !== X.get(pointTwo)) {
return ((Y.get(pointTwo) - Y.get(pointOne)) / (X.get(pointTwo) - X.get(pointOne)))
@ -177,21 +168,10 @@ function createTSLinearSeries (maxSeriesLength = 0) {
}
}
function median () {
if (slopes.length > 1) {
const sortedArray = [...slopes.flat()].sort((a, b) => a - b)
const mid = Math.floor(sortedArray.length / 2)
return (sortedArray.length % 2 !== 0 ? sortedArray[mid] : ((sortedArray[mid - 1] + sortedArray[mid]) / 2))
} else {
log.error('TS Linear Regressor, Median calculation on empty dataset attempted!')
return 0
}
}
function reset () {
X.reset()
Y.reset()
slopes.splice(0, slopes.length)
A.reset()
_A = 0
_B = 0
_goodnessOfFit = 0