From 6d1204a36c590dd453db949b9a84e72843d0ca80 Mon Sep 17 00:00:00 2001 From: ljs Date: Fri, 15 Nov 2024 17:57:32 +0800 Subject: [PATCH 1/4] feat: add exponential smoothing data transform methods --- __tests__/plots/static/ema-basic.ts | 37 +++++++++++ __tests__/plots/static/index.ts | 1 + __tests__/unit/data/ema.spec.ts | 27 ++++++++ site/docs/spec/data/ema.en.md | 6 ++ site/docs/spec/data/ema.zh.md | 99 +++++++++++++++++++++++++++++ src/data/ema.ts | 50 +++++++++++++++ src/data/index.ts | 2 + src/lib/core.ts | 2 + src/spec/dataTransform.ts | 6 ++ 9 files changed, 230 insertions(+) create mode 100644 __tests__/plots/static/ema-basic.ts create mode 100644 __tests__/unit/data/ema.spec.ts create mode 100644 site/docs/spec/data/ema.en.md create mode 100644 site/docs/spec/data/ema.zh.md create mode 100644 src/data/ema.ts diff --git a/__tests__/plots/static/ema-basic.ts b/__tests__/plots/static/ema-basic.ts new file mode 100644 index 0000000000..120c5a61f3 --- /dev/null +++ b/__tests__/plots/static/ema-basic.ts @@ -0,0 +1,37 @@ +import { G2Spec } from '../../../src'; + +export function emaBasic(): G2Spec { + return { + type: 'view', + children: [ + { + type: 'line', + data: { + type: 'fetch', + value: 'data/aapl.csv', + transform: [ + { + type: 'ema', + field: 'close', + alpha: 0.8, + }, + ], + }, + }, + { + type: 'line', + style: { + opacity: 0.3, + }, + data: { + type: 'fetch', + value: 'data/aapl.csv', + }, + }, + ], + encode: { + x: 'date', + y: 'close', + }, + }; +} diff --git a/__tests__/plots/static/index.ts b/__tests__/plots/static/index.ts index 4fd18d2d69..3e920ba4b7 100644 --- a/__tests__/plots/static/index.ts +++ b/__tests__/plots/static/index.ts @@ -62,6 +62,7 @@ export { aaplLineAreaBasicSample } from './aapl-line-area-basic-sample'; export { aaplAreaLineSmoothSample } from './aapl-area-line-smooth-sample'; export { aaplLinePointBasicSample } from './aapl-line-point-basic-sample'; export { speciesDensityBasic } from './species-density-basic'; +export { emaBasic } from './ema-basic'; export { speciesViolinBasic } from './species-violin-basic'; export { speciesViolinBasicPolar } from './species-violin-basic-polar'; export { unemploymentLineMultiSeries } from './unemployment-line-multi-series'; diff --git a/__tests__/unit/data/ema.spec.ts b/__tests__/unit/data/ema.spec.ts new file mode 100644 index 0000000000..8385ff7c78 --- /dev/null +++ b/__tests__/unit/data/ema.spec.ts @@ -0,0 +1,27 @@ +import { EMA } from '../../../src/data'; + +describe('EMA', () => { + it('EMA({...}) returns a function that is used to exponentially smooth the data', async () => { + const transform = EMA({ alpha: 0.6, field: 'y' }); + const data = [ + { x: 1, y: 2 }, + { x: 4, y: 5 }, + { x: 5, y: 8 }, + ]; + const r = await transform(data); + expect(r).toEqual([ + { + x: 1, + y: 2, + }, + { + x: 4, + y: 3.2, + }, + { + x: 5, + y: 5.12, + }, + ]); + }); +}); diff --git a/site/docs/spec/data/ema.en.md b/site/docs/spec/data/ema.en.md new file mode 100644 index 0000000000..a13951ead9 --- /dev/null +++ b/site/docs/spec/data/ema.en.md @@ -0,0 +1,6 @@ +--- +title: ema +order: 1 +--- + + diff --git a/site/docs/spec/data/ema.zh.md b/site/docs/spec/data/ema.zh.md new file mode 100644 index 0000000000..77918d123c --- /dev/null +++ b/site/docs/spec/data/ema.zh.md @@ -0,0 +1,99 @@ +--- +title: ema +order: 1 +--- + +对数据进行指数平滑 + +## 开始使用 + +```ts +const data = [ + { x: 1, y: 2 }, + { x: 4, y: 5 }, + { x: 5, y: 8 }, +]; + +chart + .data({ + type: 'line', + value: data, + transform: [ + { + type: 'ema', + field: 'y', + alpha: 0.6, + }, + ], + }); +``` + +上述例子处理之后,数据变成为: + +```js +[ + { + "x": 1, + "y": 2 + }, + { + "x": 4, + "y": 3.2 + }, + { + "x": 5, + "y": 5.12 + } +]; +``` + +## 开始使用 +渲染了处理前,处理后的数据 +```js | ob +(() => { + const chart = new G2.Chart(); + + chart.options({ + type: 'view', + children: [ + { + type: 'line', + data: { + type: 'fetch', + value: 'https://gw.alipayobjects.com/os/bmw-prod/551d80c6-a6be-4f3c-a82a-abd739e12977.csv', + transform: [ + { + type: 'ema', + field: 'close', + alpha: 0.8, + }, + ], + }, + }, + { + type: 'line', + style: { + opacity: 0.3, + }, + data: { + type: 'fetch', + value: 'https://gw.alipayobjects.com/os/bmw-prod/551d80c6-a6be-4f3c-a82a-abd739e12977.csv', + }, + }, + ], + encode: { + x: 'date', + y: 'close', + }, + }); + + return chart.render().then((chart) => chart.getContainer()); +})(); +``` + +## 选项 + +| 属性 | 描述 | 类型 | 默认值| +| -------------| ----------------------------------------------------------- | -----------------------------| --------------------| +| field | 需要处理的字段列表 | string | y | +| alpha | 平滑因子,范围在0-1 | number | 0.6 | diff --git a/src/data/ema.ts b/src/data/ema.ts new file mode 100644 index 0000000000..0e15765988 --- /dev/null +++ b/src/data/ema.ts @@ -0,0 +1,50 @@ +import { DataComponent as DC } from '../runtime'; +import { EMADataTransform } from '../spec'; + +export function ema(values: number[], alpha: number): number[] { + if (alpha < 0 || alpha > 1) { + throw new Error('alpha must be between 0 and 1.'); + } + if (values.length === 0) { + return []; + } + + let last = values[0]; + const smoothed: number[] = []; + + for (const point of values) { + const smoothedVal = last * alpha + (1 - alpha) * point; + smoothed.push(smoothedVal); + last = smoothedVal; + } + + return smoothed; +} + +export type EMAOptions = Omit; + +/** + * https://en.wikipedia.org/wiki/Exponential_smoothing + * @param options + * @returns + */ + +export const EMA: DC = (options) => { + const { field = 'y', alpha = 0.6 } = options; + return (data) => { + const values = data.map((d) => { + return d[field]; + }); + + const out = ema(values, alpha); + + return data.map((d, i) => { + return { + ...d, + [field]: out[i], + }; + }); + }; +}; + +EMA.props = {}; diff --git a/src/data/index.ts b/src/data/index.ts index 204daaa9ce..8be36e13a6 100644 --- a/src/data/index.ts +++ b/src/data/index.ts @@ -18,6 +18,7 @@ export { Slice } from './slice'; export { KDE } from './kde'; export { Venn } from './venn'; export { Log } from './log'; +export { EMA } from './ema'; export type { FetchOptions } from './fetch'; export type { FoldOptions } from './fold'; @@ -39,3 +40,4 @@ export type { SliceOptions } from './slice'; export type { KDEOptions } from './kde'; export type { VennOptions } from './venn'; export type { LogDataOptions } from './log'; +export type { EMAOptions } from './ema'; diff --git a/src/lib/core.ts b/src/lib/core.ts index c1f01003c0..f96820b7a3 100644 --- a/src/lib/core.ts +++ b/src/lib/core.ts @@ -154,6 +154,7 @@ import { Sort as DataSort, KDE as DataKDE, Log as DataLog, + EMA as DataEMA, WordCloud, } from '../data'; import { @@ -181,6 +182,7 @@ export function corelib() { 'data.kde': DataKDE, 'data.log': DataLog, 'data.wordCloud': WordCloud, + 'data.ema': DataEMA, 'transform.stackY': StackY, 'transform.binX': BinX, 'transform.bin': Bin, diff --git a/src/spec/dataTransform.ts b/src/spec/dataTransform.ts index 08312cece5..a9dc5b7ab6 100644 --- a/src/spec/dataTransform.ts +++ b/src/spec/dataTransform.ts @@ -188,3 +188,9 @@ export type CustomTransform = { type?: DataComponent; [key: string]: any; }; + +export type EMADataTransform = { + type?: 'ema'; + field: string; + alpha: number; // smooth factor +}; From 85ddb324370daacb807e7f1bd3e31f807c6051d9 Mon Sep 17 00:00:00 2001 From: ljs Date: Mon, 18 Nov 2024 11:16:40 +0800 Subject: [PATCH 2/4] feat: update test --- .../integration/snapshots/static/emaBasic.svg | 900 ++++++++++++++++++ __tests__/unit/lib/core.spec.ts | 2 + __tests__/unit/lib/std.spec.ts | 2 + 3 files changed, 904 insertions(+) create mode 100644 __tests__/integration/snapshots/static/emaBasic.svg diff --git a/__tests__/integration/snapshots/static/emaBasic.svg b/__tests__/integration/snapshots/static/emaBasic.svg new file mode 100644 index 0000000000..9eb8e106f8 --- /dev/null +++ b/__tests__/integration/snapshots/static/emaBasic.svg @@ -0,0 +1,900 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 2008 + + + + + + + 2009 + + + + + + + 2010 + + + + + + + 2011 + + + + + + + 2012 + + + + + + + + + date + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 100 + + + + + + + 200 + + + + + + + 300 + + + + + + + 400 + + + + + + + 500 + + + + + + + 600 + + + + + + + + + close + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/__tests__/unit/lib/core.spec.ts b/__tests__/unit/lib/core.spec.ts index 225cba8c6c..43e7b1f0dc 100644 --- a/__tests__/unit/lib/core.spec.ts +++ b/__tests__/unit/lib/core.spec.ts @@ -156,6 +156,7 @@ import { KDE, Log as DataLog, WordCloud, + EMA, } from '../../../src/data'; import { OverflowHide, @@ -182,6 +183,7 @@ describe('corelib', () => { 'data.join': Join, 'data.kde': KDE, 'data.log': DataLog, + 'data.ema': EMA, 'data.wordCloud': WordCloud, 'transform.stackY': StackY, 'transform.binX': BinX, diff --git a/__tests__/unit/lib/std.spec.ts b/__tests__/unit/lib/std.spec.ts index 9ded77f4f7..b05314eaff 100644 --- a/__tests__/unit/lib/std.spec.ts +++ b/__tests__/unit/lib/std.spec.ts @@ -170,6 +170,7 @@ import { Arc, Log as DataLog, WordCloud, + EMA, } from '../../../src/data'; import { OverflowHide, @@ -196,6 +197,7 @@ describe('stdlib', () => { 'data.join': Join, 'data.kde': KDE, 'data.venn': Venn, + 'data.ema': EMA, 'data.wordCloud': WordCloud, 'data.cluster': Cluster, 'data.arc': Arc, From ca72c8b06d6aba068280c92ddf6f7bf7c9d3d483 Mon Sep 17 00:00:00 2001 From: ljs Date: Mon, 18 Nov 2024 21:20:16 +0800 Subject: [PATCH 3/4] feat: add unit tests; add docs; --- __tests__/unit/data/ema.spec.ts | 75 ++++++++++++++++++++++++++++++++- site/docs/spec/data/ema.zh.md | 45 ++++++++++++++------ src/data/ema.ts | 11 +++-- src/spec/dataTransform.ts | 9 ++-- 4 files changed, 118 insertions(+), 22 deletions(-) diff --git a/__tests__/unit/data/ema.spec.ts b/__tests__/unit/data/ema.spec.ts index 8385ff7c78..aa5e9db892 100644 --- a/__tests__/unit/data/ema.spec.ts +++ b/__tests__/unit/data/ema.spec.ts @@ -9,19 +9,90 @@ describe('EMA', () => { { x: 5, y: 8 }, ]; const r = await transform(data); + r.forEach((d, i) => { + if (i > 0) { + expect(d.y).not.toBe(data[i].y); + } else { + expect(d.y).toBe(data[i].y); + } + expect(d.x).toBe(data[i].x); + }); + }); + + it('The "field" field determines the smoothed data', () => { + const transform = EMA({ alpha: 0.6, field: 'x' }); + const data = [ + { x: 1, y: 2 }, + { x: 4, y: 5 }, + { x: 5, y: 8 }, + ]; + const r = transform(data); + r.forEach((d, i) => { + if (i > 0) { + expect(d.x).not.toBe(data[i].x); + } else { + expect(d.x).toBe(data[i].x); + } + expect(d.y).toBe(data[i].y); + }); + }); + + it('The as field will avoid overwriting the original data', () => { + const transform = EMA({ alpha: 0.6, field: 'y', as: 'smooth' }); + const data = [ + { x: 1, y: 2 }, + { x: 4, y: 5 }, + { x: 5, y: 8 }, + ]; + const r = transform(data); expect(r).toEqual([ { x: 1, y: 2, + smooth: 2, }, { x: 4, - y: 3.2, + y: 5, + smooth: 3.2, }, { x: 5, - y: 5.12, + y: 8, + smooth: 5.12, }, ]); }); + + it('should handle missing field values', function () { + const data = [{ x: 1 }, { y: 2 }, { y: 3 }, { x: 4 }]; + const result = EMA({ field: 'y', alpha: 0.5 })(data); + expect(result[0].y).toBe(null); + }); + + it('should handle missing alpha value', function () { + const data = [{ y: 1 }, { y: 2 }, { y: 3 }]; + const r = EMA({ field: 'y' })(data); + r.forEach((d, i) => { + if (i > 0) { + expect(d.y).not.toBe(data[i].y); + } else { + expect(d.y).toBe(data[i].y); + } + }); + }); + it('The value of alpha should be greater than zero and less than one', function () { + const data = [{ y: 1 }, { y: 2 }, { y: 3 }]; + let alpha = 1.1; + expect(EMA({ field: 'y', alpha })(data)).toThrow(); + + alpha = -0.1; + expect(EMA({ field: 'y', alpha })(data)).toThrow(); + }); + + it('Returns an empty array if entered', function () { + const data = []; + const result = EMA({ field: 'y' })(data); + expect(result).toEqual([]); + }); }); diff --git a/site/docs/spec/data/ema.zh.md b/site/docs/spec/data/ema.zh.md index 77918d123c..58db6c999e 100644 --- a/site/docs/spec/data/ema.zh.md +++ b/site/docs/spec/data/ema.zh.md @@ -3,7 +3,19 @@ title: ema order: 1 --- -对数据进行指数平滑 + +EMA(Exponential Moving Average)是一种常用的平滑算法,用于计算数据的指数移动平均值。它通过给较近的数据赋予权重来平滑数据,从而减少噪声和波动。 + +在模型训练中,可以使用EMA来平滑数据,观察数据变化趋势。 + +如下公式显示,α越大平滑效果更明显 + + +$EMA_t = (1 - \alpha) \cdot P_t + \alpha \cdot EMA_{t-1}$ + +具体细节可参考[文档](https://en.wikipedia.org/wiki/Exponential_smoothing) + + ## 开始使用 @@ -23,6 +35,7 @@ chart type: 'ema', field: 'y', alpha: 0.6, + as: 'other' }, ], }); @@ -32,23 +45,26 @@ chart ```js [ - { - "x": 1, - "y": 2 - }, - { - "x": 4, - "y": 3.2 - }, - { - "x": 5, - "y": 5.12 - } + { + "x": 1, + "y": 2, + "other": 2, + }, + { + "x": 4, + "y": 3.2, + "other": 3.2, + }, + { + "x": 5, + "y": 5.12, + "other": 5.12, + } ]; ``` ## 开始使用 -渲染了处理前,处理后的数据 + ```js | ob (() => { const chart = new G2.Chart(); @@ -97,3 +113,4 @@ chart | -------------| ----------------------------------------------------------- | -----------------------------| --------------------| | field | 需要处理的字段列表 | string | y | | alpha | 平滑因子,范围在0-1 | number | 0.6 | +| as | 存储的字段, 默认是field传入的值,可自定义字段避免覆盖原字段数据 | string | y | diff --git a/src/data/ema.ts b/src/data/ema.ts index 0e15765988..950fb829d2 100644 --- a/src/data/ema.ts +++ b/src/data/ema.ts @@ -10,9 +10,13 @@ export function ema(values: number[], alpha: number): number[] { } let last = values[0]; - const smoothed: number[] = []; + let smoothed: number[] = []; for (const point of values) { + if (point === null || point === undefined) { + smoothed = values.map(() => null); + break; + } const smoothedVal = last * alpha + (1 - alpha) * point; smoothed.push(smoothedVal); last = smoothedVal; @@ -30,7 +34,8 @@ export type EMAOptions = Omit; */ export const EMA: DC = (options) => { - const { field = 'y', alpha = 0.6 } = options; + const { field = 'y', alpha = 0.6, as = field } = options; + return (data) => { const values = data.map((d) => { return d[field]; @@ -41,7 +46,7 @@ export const EMA: DC = (options) => { return data.map((d, i) => { return { ...d, - [field]: out[i], + [as]: out[i], }; }); }; diff --git a/src/spec/dataTransform.ts b/src/spec/dataTransform.ts index a9dc5b7ab6..fcafe8d5fb 100644 --- a/src/spec/dataTransform.ts +++ b/src/spec/dataTransform.ts @@ -13,7 +13,8 @@ export type DataTransform = | KDEDataTransform | VennDataTransform | LogDataTransform - | CustomTransform; + | CustomTransform + | EMADataTransform; export type DataTransformTypes = | 'sortBy' @@ -29,6 +30,7 @@ export type DataTransformTypes = | 'venn' | 'log' | 'custom' + | 'ema' | DataComponent; export type SortByTransform = { @@ -191,6 +193,7 @@ export type CustomTransform = { export type EMADataTransform = { type?: 'ema'; - field: string; - alpha: number; // smooth factor + field?: string; // The field to be smoothed, default: 'y' + alpha?: number; // smooth factor, default: 0.6 + as?: string; // Set the generated field, default: 'y' }; From c116c7fbbca45daafaac29803693fe1d43b16e13 Mon Sep 17 00:00:00 2001 From: ljs Date: Mon, 18 Nov 2024 21:54:38 +0800 Subject: [PATCH 4/4] feat: update test --- __tests__/unit/data/ema.spec.ts | 11 +++++++---- src/data/ema.ts | 13 ++++++++++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/__tests__/unit/data/ema.spec.ts b/__tests__/unit/data/ema.spec.ts index aa5e9db892..e4749cba81 100644 --- a/__tests__/unit/data/ema.spec.ts +++ b/__tests__/unit/data/ema.spec.ts @@ -66,8 +66,11 @@ describe('EMA', () => { it('should handle missing field values', function () { const data = [{ x: 1 }, { y: 2 }, { y: 3 }, { x: 4 }]; - const result = EMA({ field: 'y', alpha: 0.5 })(data); - expect(result[0].y).toBe(null); + const result = EMA({ field: 'y' })(data); + expect(result[0].y).toBe(undefined); + expect(result[1].y).not.toBe(undefined); + expect(result[2].y).not.toBe(undefined); + expect(result[3].y).toBe(undefined); }); it('should handle missing alpha value', function () { @@ -84,10 +87,10 @@ describe('EMA', () => { it('The value of alpha should be greater than zero and less than one', function () { const data = [{ y: 1 }, { y: 2 }, { y: 3 }]; let alpha = 1.1; - expect(EMA({ field: 'y', alpha })(data)).toThrow(); + expect(() => EMA({ field: 'y', alpha })(data)).toThrowError(); alpha = -0.1; - expect(EMA({ field: 'y', alpha })(data)).toThrow(); + expect(() => EMA({ field: 'y', alpha })(data)).toThrowError(); }); it('Returns an empty array if entered', function () { diff --git a/src/data/ema.ts b/src/data/ema.ts index 950fb829d2..2642146aab 100644 --- a/src/data/ema.ts +++ b/src/data/ema.ts @@ -10,13 +10,20 @@ export function ema(values: number[], alpha: number): number[] { } let last = values[0]; - let smoothed: number[] = []; + const smoothed: number[] = []; for (const point of values) { if (point === null || point === undefined) { - smoothed = values.map(() => null); - break; + // 如果没有数据的话,使用最近的值 + smoothed.push(point); + console.warn('EMA:The value is null or undefined', values); + continue; } + + if (last === null || last === undefined) { + last = point; + } + const smoothedVal = last * alpha + (1 - alpha) * point; smoothed.push(smoothedVal); last = smoothedVal;