From 0ec584cd01332c6431147f759d2163d37831afc9 Mon Sep 17 00:00:00 2001 From: lujs <327240969@qq.com> Date: Tue, 19 Nov 2024 10:55:52 +0800 Subject: [PATCH] feat(transform): add exponential smoothing data transform methods (#6522) * feat: add exponential smoothing data transform methods * feat: update test * feat: add unit tests; add docs; * feat: update test --- .../integration/snapshots/static/emaBasic.svg | 900 ++++++++++++++++++ __tests__/plots/static/ema-basic.ts | 37 + __tests__/plots/static/index.ts | 1 + __tests__/unit/data/ema.spec.ts | 101 ++ __tests__/unit/lib/core.spec.ts | 2 + __tests__/unit/lib/std.spec.ts | 2 + site/docs/spec/data/ema.en.md | 6 + site/docs/spec/data/ema.zh.md | 116 +++ src/data/ema.ts | 62 ++ src/data/index.ts | 2 + src/lib/core.ts | 2 + src/spec/dataTransform.ts | 11 +- 12 files changed, 1241 insertions(+), 1 deletion(-) create mode 100644 __tests__/integration/snapshots/static/emaBasic.svg create mode 100644 __tests__/plots/static/ema-basic.ts create mode 100644 __tests__/unit/data/ema.spec.ts create mode 100644 site/docs/spec/data/ema.en.md create mode 100644 site/docs/spec/data/ema.zh.md create mode 100644 src/data/ema.ts diff --git a/__tests__/integration/snapshots/static/emaBasic.svg b/__tests__/integration/snapshots/static/emaBasic.svg new file mode 100644 index 0000000000..9eb8e106f8 --- /dev/null +++ b/__tests__/integration/snapshots/static/emaBasic.svg @@ -0,0 +1,900 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 2008 + + + + + + + 2009 + + + + + + + 2010 + + + + + + + 2011 + + + + + + + 2012 + + + + + + + + + date + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 100 + + + + + + + 200 + + + + + + + 300 + + + + + + + 400 + + + + + + + 500 + + + + + + + 600 + + + + + + + + + close + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/__tests__/plots/static/ema-basic.ts b/__tests__/plots/static/ema-basic.ts new file mode 100644 index 0000000000..120c5a61f3 --- /dev/null +++ b/__tests__/plots/static/ema-basic.ts @@ -0,0 +1,37 @@ +import { G2Spec } from '../../../src'; + +export function emaBasic(): G2Spec { + return { + type: 'view', + children: [ + { + type: 'line', + data: { + type: 'fetch', + value: 'data/aapl.csv', + transform: [ + { + type: 'ema', + field: 'close', + alpha: 0.8, + }, + ], + }, + }, + { + type: 'line', + style: { + opacity: 0.3, + }, + data: { + type: 'fetch', + value: 'data/aapl.csv', + }, + }, + ], + encode: { + x: 'date', + y: 'close', + }, + }; +} diff --git a/__tests__/plots/static/index.ts b/__tests__/plots/static/index.ts index 4fd18d2d69..3e920ba4b7 100644 --- a/__tests__/plots/static/index.ts +++ b/__tests__/plots/static/index.ts @@ -62,6 +62,7 @@ export { aaplLineAreaBasicSample } from './aapl-line-area-basic-sample'; export { aaplAreaLineSmoothSample } from './aapl-area-line-smooth-sample'; export { aaplLinePointBasicSample } from './aapl-line-point-basic-sample'; export { speciesDensityBasic } from './species-density-basic'; +export { emaBasic } from './ema-basic'; export { speciesViolinBasic } from './species-violin-basic'; export { speciesViolinBasicPolar } from './species-violin-basic-polar'; export { unemploymentLineMultiSeries } from './unemployment-line-multi-series'; diff --git a/__tests__/unit/data/ema.spec.ts b/__tests__/unit/data/ema.spec.ts new file mode 100644 index 0000000000..e4749cba81 --- /dev/null +++ b/__tests__/unit/data/ema.spec.ts @@ -0,0 +1,101 @@ +import { EMA } from '../../../src/data'; + +describe('EMA', () => { + it('EMA({...}) returns a function that is used to exponentially smooth the data', async () => { + const transform = EMA({ alpha: 0.6, field: 'y' }); + const data = [ + { x: 1, y: 2 }, + { x: 4, y: 5 }, + { x: 5, y: 8 }, + ]; + const r = await transform(data); + r.forEach((d, i) => { + if (i > 0) { + expect(d.y).not.toBe(data[i].y); + } else { + expect(d.y).toBe(data[i].y); + } + expect(d.x).toBe(data[i].x); + }); + }); + + it('The "field" field determines the smoothed data', () => { + const transform = EMA({ alpha: 0.6, field: 'x' }); + const data = [ + { x: 1, y: 2 }, + { x: 4, y: 5 }, + { x: 5, y: 8 }, + ]; + const r = transform(data); + r.forEach((d, i) => { + if (i > 0) { + expect(d.x).not.toBe(data[i].x); + } else { + expect(d.x).toBe(data[i].x); + } + expect(d.y).toBe(data[i].y); + }); + }); + + it('The as field will avoid overwriting the original data', () => { + const transform = EMA({ alpha: 0.6, field: 'y', as: 'smooth' }); + const data = [ + { x: 1, y: 2 }, + { x: 4, y: 5 }, + { x: 5, y: 8 }, + ]; + const r = transform(data); + expect(r).toEqual([ + { + x: 1, + y: 2, + smooth: 2, + }, + { + x: 4, + y: 5, + smooth: 3.2, + }, + { + x: 5, + y: 8, + smooth: 5.12, + }, + ]); + }); + + it('should handle missing field values', function () { + const data = [{ x: 1 }, { y: 2 }, { y: 3 }, { x: 4 }]; + const result = EMA({ field: 'y' })(data); + expect(result[0].y).toBe(undefined); + expect(result[1].y).not.toBe(undefined); + expect(result[2].y).not.toBe(undefined); + expect(result[3].y).toBe(undefined); + }); + + it('should handle missing alpha value', function () { + const data = [{ y: 1 }, { y: 2 }, { y: 3 }]; + const r = EMA({ field: 'y' })(data); + r.forEach((d, i) => { + if (i > 0) { + expect(d.y).not.toBe(data[i].y); + } else { + expect(d.y).toBe(data[i].y); + } + }); + }); + it('The value of alpha should be greater than zero and less than one', function () { + const data = [{ y: 1 }, { y: 2 }, { y: 3 }]; + let alpha = 1.1; + expect(() => EMA({ field: 'y', alpha })(data)).toThrowError(); + + alpha = -0.1; + expect(() => EMA({ field: 'y', alpha })(data)).toThrowError(); + }); + + it('Returns an empty array if entered', function () { + const data = []; + const result = EMA({ field: 'y' })(data); + expect(result).toEqual([]); + }); +}); diff --git a/__tests__/unit/lib/core.spec.ts b/__tests__/unit/lib/core.spec.ts index 225cba8c6c..43e7b1f0dc 100644 --- a/__tests__/unit/lib/core.spec.ts +++ b/__tests__/unit/lib/core.spec.ts @@ -156,6 +156,7 @@ import { KDE, Log as DataLog, WordCloud, + EMA, } from '../../../src/data'; import { OverflowHide, @@ -182,6 +183,7 @@ describe('corelib', () => { 'data.join': Join, 'data.kde': KDE, 'data.log': DataLog, + 'data.ema': EMA, 'data.wordCloud': WordCloud, 'transform.stackY': StackY, 'transform.binX': BinX, diff --git a/__tests__/unit/lib/std.spec.ts b/__tests__/unit/lib/std.spec.ts index 9ded77f4f7..b05314eaff 100644 --- a/__tests__/unit/lib/std.spec.ts +++ b/__tests__/unit/lib/std.spec.ts @@ -170,6 +170,7 @@ import { Arc, Log as DataLog, WordCloud, + EMA, } from '../../../src/data'; import { OverflowHide, @@ -196,6 +197,7 @@ describe('stdlib', () => { 'data.join': Join, 'data.kde': KDE, 'data.venn': Venn, + 'data.ema': EMA, 'data.wordCloud': WordCloud, 'data.cluster': Cluster, 'data.arc': Arc, diff --git a/site/docs/spec/data/ema.en.md b/site/docs/spec/data/ema.en.md new file mode 100644 index 0000000000..a13951ead9 --- /dev/null +++ b/site/docs/spec/data/ema.en.md @@ -0,0 +1,6 @@ +--- +title: ema +order: 1 +--- + + diff --git a/site/docs/spec/data/ema.zh.md b/site/docs/spec/data/ema.zh.md new file mode 100644 index 0000000000..58db6c999e --- /dev/null +++ b/site/docs/spec/data/ema.zh.md @@ -0,0 +1,116 @@ +--- +title: ema +order: 1 +--- + + +EMA(Exponential Moving Average)是一种常用的平滑算法,用于计算数据的指数移动平均值。它通过给较近的数据赋予权重来平滑数据,从而减少噪声和波动。 + +在模型训练中,可以使用EMA来平滑数据,观察数据变化趋势。 + +如下公式显示,α越大平滑效果更明显 + + +$EMA_t = (1 - \alpha) \cdot P_t + \alpha \cdot EMA_{t-1}$ + +具体细节可参考[文档](https://en.wikipedia.org/wiki/Exponential_smoothing) + + + +## 开始使用 + +```ts +const data = [ + { x: 1, y: 2 }, + { x: 4, y: 5 }, + { x: 5, y: 8 }, +]; + +chart + .data({ + type: 'line', + value: data, + transform: [ + { + type: 'ema', + field: 'y', + alpha: 0.6, + as: 'other' + }, + ], + }); +``` + +上述例子处理之后,数据变成为: + +```js +[ + { + "x": 1, + "y": 2, + "other": 2, + }, + { + "x": 4, + "y": 3.2, + "other": 3.2, + }, + { + "x": 5, + "y": 5.12, + "other": 5.12, + } +]; +``` + +## 开始使用 + +```js | ob +(() => { + const chart = new G2.Chart(); + + chart.options({ + type: 'view', + children: [ + { + type: 'line', + data: { + type: 'fetch', + value: 'https://gw.alipayobjects.com/os/bmw-prod/551d80c6-a6be-4f3c-a82a-abd739e12977.csv', + transform: [ + { + type: 'ema', + field: 'close', + alpha: 0.8, + }, + ], + }, + }, + { + type: 'line', + style: { + opacity: 0.3, + }, + data: { + type: 'fetch', + value: 'https://gw.alipayobjects.com/os/bmw-prod/551d80c6-a6be-4f3c-a82a-abd739e12977.csv', + }, + }, + ], + encode: { + x: 'date', + y: 'close', + }, + }); + + return chart.render().then((chart) => chart.getContainer()); +})(); +``` + +## 选项 + +| 属性 | 描述 | 类型 | 默认值| +| -------------| ----------------------------------------------------------- | -----------------------------| --------------------| +| field | 需要处理的字段列表 | string | y | +| alpha | 平滑因子,范围在0-1 | number | 0.6 | +| as | 存储的字段, 默认是field传入的值,可自定义字段避免覆盖原字段数据 | string | y | diff --git a/src/data/ema.ts b/src/data/ema.ts new file mode 100644 index 0000000000..2642146aab --- /dev/null +++ b/src/data/ema.ts @@ -0,0 +1,62 @@ +import { DataComponent as DC } from '../runtime'; +import { EMADataTransform } from '../spec'; + +export function ema(values: number[], alpha: number): number[] { + if (alpha < 0 || alpha > 1) { + throw new Error('alpha must be between 0 and 1.'); + } + if (values.length === 0) { + return []; + } + + let last = values[0]; + const smoothed: number[] = []; + + for (const point of values) { + if (point === null || point === undefined) { + // 如果没有数据的话,使用最近的值 + smoothed.push(point); + console.warn('EMA:The value is null or undefined', values); + continue; + } + + if (last === null || last === undefined) { + last = point; + } + + const smoothedVal = last * alpha + (1 - alpha) * point; + smoothed.push(smoothedVal); + last = smoothedVal; + } + + return smoothed; +} + +export type EMAOptions = Omit; + +/** + * https://en.wikipedia.org/wiki/Exponential_smoothing + * @param options + * @returns + */ + +export const EMA: DC = (options) => { + const { field = 'y', alpha = 0.6, as = field } = options; + + return (data) => { + const values = data.map((d) => { + return d[field]; + }); + + const out = ema(values, alpha); + + return data.map((d, i) => { + return { + ...d, + [as]: out[i], + }; + }); + }; +}; + +EMA.props = {}; diff --git a/src/data/index.ts b/src/data/index.ts index 204daaa9ce..8be36e13a6 100644 --- a/src/data/index.ts +++ b/src/data/index.ts @@ -18,6 +18,7 @@ export { Slice } from './slice'; export { KDE } from './kde'; export { Venn } from './venn'; export { Log } from './log'; +export { EMA } from './ema'; export type { FetchOptions } from './fetch'; export type { FoldOptions } from './fold'; @@ -39,3 +40,4 @@ export type { SliceOptions } from './slice'; export type { KDEOptions } from './kde'; export type { VennOptions } from './venn'; export type { LogDataOptions } from './log'; +export type { EMAOptions } from './ema'; diff --git a/src/lib/core.ts b/src/lib/core.ts index c1f01003c0..f96820b7a3 100644 --- a/src/lib/core.ts +++ b/src/lib/core.ts @@ -154,6 +154,7 @@ import { Sort as DataSort, KDE as DataKDE, Log as DataLog, + EMA as DataEMA, WordCloud, } from '../data'; import { @@ -181,6 +182,7 @@ export function corelib() { 'data.kde': DataKDE, 'data.log': DataLog, 'data.wordCloud': WordCloud, + 'data.ema': DataEMA, 'transform.stackY': StackY, 'transform.binX': BinX, 'transform.bin': Bin, diff --git a/src/spec/dataTransform.ts b/src/spec/dataTransform.ts index 08312cece5..fcafe8d5fb 100644 --- a/src/spec/dataTransform.ts +++ b/src/spec/dataTransform.ts @@ -13,7 +13,8 @@ export type DataTransform = | KDEDataTransform | VennDataTransform | LogDataTransform - | CustomTransform; + | CustomTransform + | EMADataTransform; export type DataTransformTypes = | 'sortBy' @@ -29,6 +30,7 @@ export type DataTransformTypes = | 'venn' | 'log' | 'custom' + | 'ema' | DataComponent; export type SortByTransform = { @@ -188,3 +190,10 @@ export type CustomTransform = { type?: DataComponent; [key: string]: any; }; + +export type EMADataTransform = { + type?: 'ema'; + field?: string; // The field to be smoothed, default: 'y' + alpha?: number; // smooth factor, default: 0.6 + as?: string; // Set the generated field, default: 'y' +};