From 0ec584cd01332c6431147f759d2163d37831afc9 Mon Sep 17 00:00:00 2001
From: lujs <327240969@qq.com>
Date: Tue, 19 Nov 2024 10:55:52 +0800
Subject: [PATCH] feat(transform): add exponential smoothing data transform
methods (#6522)
* feat: add exponential smoothing data transform methods
* feat: update test
* feat: add unit tests; add docs;
* feat: update test
---
.../integration/snapshots/static/emaBasic.svg | 900 ++++++++++++++++++
__tests__/plots/static/ema-basic.ts | 37 +
__tests__/plots/static/index.ts | 1 +
__tests__/unit/data/ema.spec.ts | 101 ++
__tests__/unit/lib/core.spec.ts | 2 +
__tests__/unit/lib/std.spec.ts | 2 +
site/docs/spec/data/ema.en.md | 6 +
site/docs/spec/data/ema.zh.md | 116 +++
src/data/ema.ts | 62 ++
src/data/index.ts | 2 +
src/lib/core.ts | 2 +
src/spec/dataTransform.ts | 11 +-
12 files changed, 1241 insertions(+), 1 deletion(-)
create mode 100644 __tests__/integration/snapshots/static/emaBasic.svg
create mode 100644 __tests__/plots/static/ema-basic.ts
create mode 100644 __tests__/unit/data/ema.spec.ts
create mode 100644 site/docs/spec/data/ema.en.md
create mode 100644 site/docs/spec/data/ema.zh.md
create mode 100644 src/data/ema.ts
diff --git a/__tests__/integration/snapshots/static/emaBasic.svg b/__tests__/integration/snapshots/static/emaBasic.svg
new file mode 100644
index 0000000000..9eb8e106f8
--- /dev/null
+++ b/__tests__/integration/snapshots/static/emaBasic.svg
@@ -0,0 +1,900 @@
+
\ No newline at end of file
diff --git a/__tests__/plots/static/ema-basic.ts b/__tests__/plots/static/ema-basic.ts
new file mode 100644
index 0000000000..120c5a61f3
--- /dev/null
+++ b/__tests__/plots/static/ema-basic.ts
@@ -0,0 +1,37 @@
+import { G2Spec } from '../../../src';
+
+export function emaBasic(): G2Spec {
+ return {
+ type: 'view',
+ children: [
+ {
+ type: 'line',
+ data: {
+ type: 'fetch',
+ value: 'data/aapl.csv',
+ transform: [
+ {
+ type: 'ema',
+ field: 'close',
+ alpha: 0.8,
+ },
+ ],
+ },
+ },
+ {
+ type: 'line',
+ style: {
+ opacity: 0.3,
+ },
+ data: {
+ type: 'fetch',
+ value: 'data/aapl.csv',
+ },
+ },
+ ],
+ encode: {
+ x: 'date',
+ y: 'close',
+ },
+ };
+}
diff --git a/__tests__/plots/static/index.ts b/__tests__/plots/static/index.ts
index 4fd18d2d69..3e920ba4b7 100644
--- a/__tests__/plots/static/index.ts
+++ b/__tests__/plots/static/index.ts
@@ -62,6 +62,7 @@ export { aaplLineAreaBasicSample } from './aapl-line-area-basic-sample';
export { aaplAreaLineSmoothSample } from './aapl-area-line-smooth-sample';
export { aaplLinePointBasicSample } from './aapl-line-point-basic-sample';
export { speciesDensityBasic } from './species-density-basic';
+export { emaBasic } from './ema-basic';
export { speciesViolinBasic } from './species-violin-basic';
export { speciesViolinBasicPolar } from './species-violin-basic-polar';
export { unemploymentLineMultiSeries } from './unemployment-line-multi-series';
diff --git a/__tests__/unit/data/ema.spec.ts b/__tests__/unit/data/ema.spec.ts
new file mode 100644
index 0000000000..e4749cba81
--- /dev/null
+++ b/__tests__/unit/data/ema.spec.ts
@@ -0,0 +1,101 @@
+import { EMA } from '../../../src/data';
+
+describe('EMA', () => {
+ it('EMA({...}) returns a function that is used to exponentially smooth the data', async () => {
+ const transform = EMA({ alpha: 0.6, field: 'y' });
+ const data = [
+ { x: 1, y: 2 },
+ { x: 4, y: 5 },
+ { x: 5, y: 8 },
+ ];
+ const r = await transform(data);
+ r.forEach((d, i) => {
+ if (i > 0) {
+ expect(d.y).not.toBe(data[i].y);
+ } else {
+ expect(d.y).toBe(data[i].y);
+ }
+ expect(d.x).toBe(data[i].x);
+ });
+ });
+
+ it('The "field" field determines the smoothed data', () => {
+ const transform = EMA({ alpha: 0.6, field: 'x' });
+ const data = [
+ { x: 1, y: 2 },
+ { x: 4, y: 5 },
+ { x: 5, y: 8 },
+ ];
+ const r = transform(data);
+ r.forEach((d, i) => {
+ if (i > 0) {
+ expect(d.x).not.toBe(data[i].x);
+ } else {
+ expect(d.x).toBe(data[i].x);
+ }
+ expect(d.y).toBe(data[i].y);
+ });
+ });
+
+ it('The as field will avoid overwriting the original data', () => {
+ const transform = EMA({ alpha: 0.6, field: 'y', as: 'smooth' });
+ const data = [
+ { x: 1, y: 2 },
+ { x: 4, y: 5 },
+ { x: 5, y: 8 },
+ ];
+ const r = transform(data);
+ expect(r).toEqual([
+ {
+ x: 1,
+ y: 2,
+ smooth: 2,
+ },
+ {
+ x: 4,
+ y: 5,
+ smooth: 3.2,
+ },
+ {
+ x: 5,
+ y: 8,
+ smooth: 5.12,
+ },
+ ]);
+ });
+
+ it('should handle missing field values', function () {
+ const data = [{ x: 1 }, { y: 2 }, { y: 3 }, { x: 4 }];
+ const result = EMA({ field: 'y' })(data);
+ expect(result[0].y).toBe(undefined);
+ expect(result[1].y).not.toBe(undefined);
+ expect(result[2].y).not.toBe(undefined);
+ expect(result[3].y).toBe(undefined);
+ });
+
+ it('should handle missing alpha value', function () {
+ const data = [{ y: 1 }, { y: 2 }, { y: 3 }];
+ const r = EMA({ field: 'y' })(data);
+ r.forEach((d, i) => {
+ if (i > 0) {
+ expect(d.y).not.toBe(data[i].y);
+ } else {
+ expect(d.y).toBe(data[i].y);
+ }
+ });
+ });
+ it('The value of alpha should be greater than zero and less than one', function () {
+ const data = [{ y: 1 }, { y: 2 }, { y: 3 }];
+ let alpha = 1.1;
+ expect(() => EMA({ field: 'y', alpha })(data)).toThrowError();
+
+ alpha = -0.1;
+ expect(() => EMA({ field: 'y', alpha })(data)).toThrowError();
+ });
+
+ it('Returns an empty array if entered', function () {
+ const data = [];
+ const result = EMA({ field: 'y' })(data);
+ expect(result).toEqual([]);
+ });
+});
diff --git a/__tests__/unit/lib/core.spec.ts b/__tests__/unit/lib/core.spec.ts
index 225cba8c6c..43e7b1f0dc 100644
--- a/__tests__/unit/lib/core.spec.ts
+++ b/__tests__/unit/lib/core.spec.ts
@@ -156,6 +156,7 @@ import {
KDE,
Log as DataLog,
WordCloud,
+ EMA,
} from '../../../src/data';
import {
OverflowHide,
@@ -182,6 +183,7 @@ describe('corelib', () => {
'data.join': Join,
'data.kde': KDE,
'data.log': DataLog,
+ 'data.ema': EMA,
'data.wordCloud': WordCloud,
'transform.stackY': StackY,
'transform.binX': BinX,
diff --git a/__tests__/unit/lib/std.spec.ts b/__tests__/unit/lib/std.spec.ts
index 9ded77f4f7..b05314eaff 100644
--- a/__tests__/unit/lib/std.spec.ts
+++ b/__tests__/unit/lib/std.spec.ts
@@ -170,6 +170,7 @@ import {
Arc,
Log as DataLog,
WordCloud,
+ EMA,
} from '../../../src/data';
import {
OverflowHide,
@@ -196,6 +197,7 @@ describe('stdlib', () => {
'data.join': Join,
'data.kde': KDE,
'data.venn': Venn,
+ 'data.ema': EMA,
'data.wordCloud': WordCloud,
'data.cluster': Cluster,
'data.arc': Arc,
diff --git a/site/docs/spec/data/ema.en.md b/site/docs/spec/data/ema.en.md
new file mode 100644
index 0000000000..a13951ead9
--- /dev/null
+++ b/site/docs/spec/data/ema.en.md
@@ -0,0 +1,6 @@
+---
+title: ema
+order: 1
+---
+
+
diff --git a/site/docs/spec/data/ema.zh.md b/site/docs/spec/data/ema.zh.md
new file mode 100644
index 0000000000..58db6c999e
--- /dev/null
+++ b/site/docs/spec/data/ema.zh.md
@@ -0,0 +1,116 @@
+---
+title: ema
+order: 1
+---
+
+
+EMA(Exponential Moving Average)是一种常用的平滑算法,用于计算数据的指数移动平均值。它通过给较近的数据赋予权重来平滑数据,从而减少噪声和波动。
+
+在模型训练中,可以使用EMA来平滑数据,观察数据变化趋势。
+
+如下公式显示,α越大平滑效果更明显
+
+
+$EMA_t = (1 - \alpha) \cdot P_t + \alpha \cdot EMA_{t-1}$
+
+具体细节可参考[文档](https://en.wikipedia.org/wiki/Exponential_smoothing)
+
+
+
+## 开始使用
+
+```ts
+const data = [
+ { x: 1, y: 2 },
+ { x: 4, y: 5 },
+ { x: 5, y: 8 },
+];
+
+chart
+ .data({
+ type: 'line',
+ value: data,
+ transform: [
+ {
+ type: 'ema',
+ field: 'y',
+ alpha: 0.6,
+ as: 'other'
+ },
+ ],
+ });
+```
+
+上述例子处理之后,数据变成为:
+
+```js
+[
+ {
+ "x": 1,
+ "y": 2,
+ "other": 2,
+ },
+ {
+ "x": 4,
+ "y": 3.2,
+ "other": 3.2,
+ },
+ {
+ "x": 5,
+ "y": 5.12,
+ "other": 5.12,
+ }
+];
+```
+
+## 开始使用
+
+```js | ob
+(() => {
+ const chart = new G2.Chart();
+
+ chart.options({
+ type: 'view',
+ children: [
+ {
+ type: 'line',
+ data: {
+ type: 'fetch',
+ value: 'https://gw.alipayobjects.com/os/bmw-prod/551d80c6-a6be-4f3c-a82a-abd739e12977.csv',
+ transform: [
+ {
+ type: 'ema',
+ field: 'close',
+ alpha: 0.8,
+ },
+ ],
+ },
+ },
+ {
+ type: 'line',
+ style: {
+ opacity: 0.3,
+ },
+ data: {
+ type: 'fetch',
+ value: 'https://gw.alipayobjects.com/os/bmw-prod/551d80c6-a6be-4f3c-a82a-abd739e12977.csv',
+ },
+ },
+ ],
+ encode: {
+ x: 'date',
+ y: 'close',
+ },
+ });
+
+ return chart.render().then((chart) => chart.getContainer());
+})();
+```
+
+## 选项
+
+| 属性 | 描述 | 类型 | 默认值|
+| -------------| ----------------------------------------------------------- | -----------------------------| --------------------|
+| field | 需要处理的字段列表 | string | y |
+| alpha | 平滑因子,范围在0-1 | number | 0.6 |
+| as | 存储的字段, 默认是field传入的值,可自定义字段避免覆盖原字段数据 | string | y |
diff --git a/src/data/ema.ts b/src/data/ema.ts
new file mode 100644
index 0000000000..2642146aab
--- /dev/null
+++ b/src/data/ema.ts
@@ -0,0 +1,62 @@
+import { DataComponent as DC } from '../runtime';
+import { EMADataTransform } from '../spec';
+
+export function ema(values: number[], alpha: number): number[] {
+ if (alpha < 0 || alpha > 1) {
+ throw new Error('alpha must be between 0 and 1.');
+ }
+ if (values.length === 0) {
+ return [];
+ }
+
+ let last = values[0];
+ const smoothed: number[] = [];
+
+ for (const point of values) {
+ if (point === null || point === undefined) {
+ // 如果没有数据的话,使用最近的值
+ smoothed.push(point);
+ console.warn('EMA:The value is null or undefined', values);
+ continue;
+ }
+
+ if (last === null || last === undefined) {
+ last = point;
+ }
+
+ const smoothedVal = last * alpha + (1 - alpha) * point;
+ smoothed.push(smoothedVal);
+ last = smoothedVal;
+ }
+
+ return smoothed;
+}
+
+export type EMAOptions = Omit;
+
+/**
+ * https://en.wikipedia.org/wiki/Exponential_smoothing
+ * @param options
+ * @returns
+ */
+
+export const EMA: DC = (options) => {
+ const { field = 'y', alpha = 0.6, as = field } = options;
+
+ return (data) => {
+ const values = data.map((d) => {
+ return d[field];
+ });
+
+ const out = ema(values, alpha);
+
+ return data.map((d, i) => {
+ return {
+ ...d,
+ [as]: out[i],
+ };
+ });
+ };
+};
+
+EMA.props = {};
diff --git a/src/data/index.ts b/src/data/index.ts
index 204daaa9ce..8be36e13a6 100644
--- a/src/data/index.ts
+++ b/src/data/index.ts
@@ -18,6 +18,7 @@ export { Slice } from './slice';
export { KDE } from './kde';
export { Venn } from './venn';
export { Log } from './log';
+export { EMA } from './ema';
export type { FetchOptions } from './fetch';
export type { FoldOptions } from './fold';
@@ -39,3 +40,4 @@ export type { SliceOptions } from './slice';
export type { KDEOptions } from './kde';
export type { VennOptions } from './venn';
export type { LogDataOptions } from './log';
+export type { EMAOptions } from './ema';
diff --git a/src/lib/core.ts b/src/lib/core.ts
index c1f01003c0..f96820b7a3 100644
--- a/src/lib/core.ts
+++ b/src/lib/core.ts
@@ -154,6 +154,7 @@ import {
Sort as DataSort,
KDE as DataKDE,
Log as DataLog,
+ EMA as DataEMA,
WordCloud,
} from '../data';
import {
@@ -181,6 +182,7 @@ export function corelib() {
'data.kde': DataKDE,
'data.log': DataLog,
'data.wordCloud': WordCloud,
+ 'data.ema': DataEMA,
'transform.stackY': StackY,
'transform.binX': BinX,
'transform.bin': Bin,
diff --git a/src/spec/dataTransform.ts b/src/spec/dataTransform.ts
index 08312cece5..fcafe8d5fb 100644
--- a/src/spec/dataTransform.ts
+++ b/src/spec/dataTransform.ts
@@ -13,7 +13,8 @@ export type DataTransform =
| KDEDataTransform
| VennDataTransform
| LogDataTransform
- | CustomTransform;
+ | CustomTransform
+ | EMADataTransform;
export type DataTransformTypes =
| 'sortBy'
@@ -29,6 +30,7 @@ export type DataTransformTypes =
| 'venn'
| 'log'
| 'custom'
+ | 'ema'
| DataComponent;
export type SortByTransform = {
@@ -188,3 +190,10 @@ export type CustomTransform = {
type?: DataComponent;
[key: string]: any;
};
+
+export type EMADataTransform = {
+ type?: 'ema';
+ field?: string; // The field to be smoothed, default: 'y'
+ alpha?: number; // smooth factor, default: 0.6
+ as?: string; // Set the generated field, default: 'y'
+};