Commit f22f0a89 by Marcus Andersson Committed by GitHub

Transformations: merge will properly handle empty frames and frames with…

Transformations: merge will properly handle empty frames and frames with multiple rows where values are overlapping. (#27362)

* wip.

* Fixed issue with merge not behaving exactly as the old table panel did.

* Fixed so empty data frames will be exluded prior to trying to merge the result.

* Changed so if passing only empty frames first will only be returned.

* de-duplication of configuration in tests.
parent 5c31f079
......@@ -7,16 +7,16 @@ import { ArrayVector } from '../../vector';
import { mergeTransformer, MergeTransformerOptions } from './merge';
describe('Merge multipe to single', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
beforeAll(() => {
mockTransformationsRegistry([mergeTransformer]);
});
it('combine two series into one', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
const seriesA = toDataFrame({
name: 'A',
fields: [
......@@ -43,11 +43,6 @@ describe('Merge multipe to single', () => {
});
it('combine two series with multiple values into one', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
const seriesA = toDataFrame({
name: 'A',
fields: [
......@@ -74,11 +69,6 @@ describe('Merge multipe to single', () => {
});
it('combine three series into one', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
const seriesA = toDataFrame({
name: 'A',
fields: [
......@@ -113,11 +103,6 @@ describe('Merge multipe to single', () => {
});
it('combine one serie and two tables into one table', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
const tableA = toDataFrame({
name: 'A',
fields: [
......@@ -155,11 +140,6 @@ describe('Merge multipe to single', () => {
});
it('combine one serie and two tables with ISO dates into one table', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
const tableA = toDataFrame({
name: 'A',
fields: [
......@@ -196,12 +176,118 @@ describe('Merge multipe to single', () => {
expect(unwrap(result[0].fields)).toEqual(expected);
});
it('combine three tables with multiple values into one', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
it('combine two tables, where first is partial overlapping, into one', () => {
const tableA = toDataFrame({
name: 'A',
fields: [
{
name: 'Country',
type: FieldType.string,
values: ['United States', 'United States', 'Mexico', 'Germany', 'Canada', 'Canada'],
},
{
name: 'AgeGroup',
type: FieldType.string,
values: ['50 or over', '35 - 49', '0 - 17', '35 - 49', '35 - 49', '25 - 34'],
},
{ name: 'Sum', type: FieldType.number, values: [998, 1193, 1675, 146, 166, 219] },
],
});
const tableB = toDataFrame({
name: 'B',
fields: [
{ name: 'AgeGroup', type: FieldType.string, values: ['0 - 17', '18 - 24', '25 - 34', '35 - 49', '50 or over'] },
{ name: 'Count', type: FieldType.number, values: [1, 3, 2, 4, 2] },
],
});
const result = transformDataFrame([cfg], [tableA, tableB]);
const expected: Field[] = [
createField('Country', FieldType.string, [
'United States',
'United States',
'Mexico',
'Germany',
'Canada',
'Canada',
null,
]),
createField('AgeGroup', FieldType.string, [
'50 or over',
'35 - 49',
'0 - 17',
'35 - 49',
'35 - 49',
'25 - 34',
'18 - 24',
]),
createField('Sum', FieldType.number, [998, 1193, 1675, 146, 166, 219, null]),
createField('Count', FieldType.number, [2, 4, 1, 4, 4, 2, 3]),
];
expect(unwrap(result[0].fields)).toEqual(expected);
});
it('combine two tables, where second is partial overlapping, into one', () => {
/**
* This behavior feels wrong. I would expect the same behavior regardless of the order
* of the frames. But when testing the old table panel it had this behavior so I am
* sticking with it.
*/
const tableA = toDataFrame({
name: 'A',
fields: [
{ name: 'AgeGroup', type: FieldType.string, values: ['0 - 17', '18 - 24', '25 - 34', '35 - 49', '50 or over'] },
{ name: 'Count', type: FieldType.number, values: [1, 3, 2, 4, 2] },
],
});
const tableB = toDataFrame({
name: 'B',
fields: [
{
name: 'Country',
type: FieldType.string,
values: ['United States', 'United States', 'Mexico', 'Germany', 'Canada', 'Canada'],
},
{
name: 'AgeGroup',
type: FieldType.string,
values: ['50 or over', '35 - 49', '0 - 17', '35 - 49', '35 - 49', '25 - 34'],
},
{ name: 'Sum', type: FieldType.number, values: [998, 1193, 1675, 146, 166, 219] },
],
});
const result = transformDataFrame([cfg], [tableA, tableB]);
const expected: Field[] = [
createField('AgeGroup', FieldType.string, [
'0 - 17',
'18 - 24',
'25 - 34',
'35 - 49',
'50 or over',
'35 - 49',
'35 - 49',
]),
createField('Count', FieldType.number, [1, 3, 2, 4, 2, null, null]),
createField('Country', FieldType.string, [
'Mexico',
null,
'Canada',
'United States',
'United States',
'Germany',
'Canada',
]),
createField('Sum', FieldType.number, [1675, null, 219, 1193, 998, 146, 166]),
];
expect(unwrap(result[0].fields)).toEqual(expected);
});
it('combine three tables with multiple values into one', () => {
const tableA = toDataFrame({
name: 'A',
fields: [
......@@ -242,11 +328,6 @@ describe('Merge multipe to single', () => {
});
it('combine two time series, where first serie fields has displayName, into one', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
const serieA = toDataFrame({
name: 'A',
fields: [
......@@ -276,11 +357,6 @@ describe('Merge multipe to single', () => {
});
it('combine two time series, where first serie fields has units, into one', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
const serieA = toDataFrame({
name: 'A',
fields: [
......@@ -310,11 +386,6 @@ describe('Merge multipe to single', () => {
});
it('combine two time series, where second serie fields has units, into one', () => {
const cfg: DataTransformerConfig<MergeTransformerOptions> = {
id: DataTransformerID.merge,
options: {},
};
const serieA = toDataFrame({
name: 'A',
fields: [
......@@ -342,6 +413,91 @@ describe('Merge multipe to single', () => {
expect(fields[1].config).toEqual({});
expect(fields).toEqual(expected);
});
it('combine one regular serie with an empty serie should return the regular serie', () => {
const serieA = toDataFrame({
name: 'A',
fields: [
{ name: 'Time', type: FieldType.time, values: [100, 150, 200] },
{ name: 'Temp', type: FieldType.number, values: [1, 4, 5] },
],
});
const serieB = toDataFrame({
name: 'B',
fields: [],
});
const result = transformDataFrame([cfg], [serieA, serieB]);
const expected: Field[] = [
createField('Time', FieldType.time, [100, 150, 200]),
createField('Temp', FieldType.number, [1, 4, 5]),
];
const fields = unwrap(result[0].fields);
expect(fields[1].config).toEqual({});
expect(fields).toEqual(expected);
});
it('combine two regular series with an empty serie should return the combination of the regular series', () => {
const serieA = toDataFrame({
name: 'A',
fields: [
{ name: 'Time', type: FieldType.time, values: [100, 150, 200] },
{ name: 'Temp', type: FieldType.number, values: [1, 4, 5] },
],
});
const serieB = toDataFrame({
name: 'B',
fields: [],
});
const serieC = toDataFrame({
name: 'C',
fields: [
{ name: 'Time', type: FieldType.time, values: [100, 150, 200] },
{ name: 'Humidity', type: FieldType.number, values: [6, 7, 8] },
],
});
const result = transformDataFrame([cfg], [serieA, serieB, serieC]);
const expected: Field[] = [
createField('Time', FieldType.time, [100, 150, 200]),
createField('Temp', FieldType.number, [1, 4, 5]),
createField('Humidity', FieldType.number, [6, 7, 8]),
];
const fields = unwrap(result[0].fields);
expect(fields[1].config).toEqual({});
expect(fields).toEqual(expected);
});
it('combine multiple empty series should return one empty serie', () => {
const serieA = toDataFrame({
name: 'A',
fields: [],
});
const serieB = toDataFrame({
name: 'B',
fields: [],
});
const serieC = toDataFrame({
name: 'C',
fields: [],
});
const result = transformDataFrame([cfg], [serieA, serieB, serieC]);
const expected: Field[] = [];
const fields = unwrap(result[0].fields);
expect(fields).toEqual(expected);
expect(result.length).toEqual(1);
});
});
const createField = (name: string, type: FieldType, values: any[], config = {}): Field => {
......
......@@ -5,7 +5,10 @@ import { omit } from 'lodash';
import { ArrayVector } from '../../vector/ArrayVector';
import { MutableDataFrame } from '../../dataframe';
type MergeDetailsKeyFactory = (existing: Record<string, any>, value: Record<string, any>) => string;
interface ValuePointer {
key: string;
index: number;
}
export interface MergeTransformerOptions {}
......@@ -15,12 +18,18 @@ export const mergeTransformer: DataTransformerInfo<MergeTransformerOptions> = {
description: 'Merges multiple series/tables into a single serie/table',
defaultOptions: {},
transformer: (options: MergeTransformerOptions) => {
return (data: DataFrame[]) => {
if (!Array.isArray(data) || data.length <= 1) {
return data;
return (dataFrames: DataFrame[]) => {
if (!Array.isArray(dataFrames) || dataFrames.length === 0) {
return dataFrames;
}
const fieldByName = new Set<string>();
const data = dataFrames.filter(frame => frame.fields.length > 0);
if (data.length === 0) {
return [dataFrames[0]];
}
const fieldNames = new Set<string>();
const fieldIndexByName: Record<string, Record<number, number>> = {};
const fieldNamesForKey: string[] = [];
const dataFrame = new MutableDataFrame();
......@@ -31,9 +40,9 @@ export const mergeTransformer: DataTransformerInfo<MergeTransformerOptions> = {
for (let fieldIndex = 0; fieldIndex < frame.fields.length; fieldIndex++) {
const field = frame.fields[fieldIndex];
if (!fieldByName.has(field.name)) {
if (!fieldNames.has(field.name)) {
dataFrame.addField(copyFieldStructure(field));
fieldByName.add(field.name);
fieldNames.add(field.name);
}
fieldIndexByName[field.name] = fieldIndexByName[field.name] || {};
......@@ -43,20 +52,20 @@ export const mergeTransformer: DataTransformerInfo<MergeTransformerOptions> = {
continue;
}
if (Object.keys(fieldIndexByName[field.name]).length === data.length) {
if (fieldExistsInAllFrames(fieldIndexByName, field, data)) {
fieldNamesForKey.push(field.name);
}
}
}
if (fieldNamesForKey.length === 0) {
return data;
return dataFrames;
}
const dataFrameIndexByKey: Record<string, number> = {};
const valuesByKey: Record<string, Array<Record<string, any>>> = {};
const valuesInOrder: ValuePointer[] = [];
const keyFactory = createKeyFactory(data, fieldIndexByName, fieldNamesForKey);
const detailsKeyFactory = createDetailsKeyFactory(fieldByName, fieldNamesForKey);
const valueMapper = createValueMapper(data, fieldByName, fieldIndexByName);
const valueMapper = createValueMapper(data, fieldNames, fieldIndexByName);
for (let frameIndex = 0; frameIndex < data.length; frameIndex++) {
const frame = data[frameIndex];
......@@ -64,14 +73,38 @@ export const mergeTransformer: DataTransformerInfo<MergeTransformerOptions> = {
for (let valueIndex = 0; valueIndex < frame.length; valueIndex++) {
const key = keyFactory(frameIndex, valueIndex);
const value = valueMapper(frameIndex, valueIndex);
mergeOrAdd(key, value, dataFrame, dataFrameIndexByKey, detailsKeyFactory);
if (!Array.isArray(valuesByKey[key])) {
valuesByKey[key] = [value];
valuesInOrder.push(createPointer(key, valuesByKey));
continue;
}
let valueWasMerged = false;
valuesByKey[key] = valuesByKey[key].map(existing => {
if (!isMergable(existing, value)) {
return existing;
}
valueWasMerged = true;
return { ...existing, ...value };
});
if (!valueWasMerged) {
valuesByKey[key].push(value);
valuesInOrder.push(createPointer(key, valuesByKey));
}
}
}
for (const pointer of valuesInOrder) {
const value = valuesByKey[pointer.key][pointer.index];
if (value) {
dataFrame.add(value, false);
}
}
// const timeIndex = dataFrame.fields.findIndex(field => field.type === FieldType.time);
// if (typeof timeIndex === 'number') {
// return [sortDataFrame(dataFrame, timeIndex, true)];
// }
return [dataFrame];
};
},
......@@ -107,30 +140,6 @@ const createKeyFactory = (
};
};
const createDetailsKeyFactory = (fieldByName: Set<string>, fieldNamesForKey: string[]): MergeDetailsKeyFactory => {
const fieldNamesToExclude = fieldNamesForKey.reduce((exclude: Record<string, boolean>, fieldName: string) => {
exclude[fieldName] = true;
return exclude;
}, {});
const checkOrder = Array.from(fieldByName).filter(fieldName => !fieldNamesToExclude[fieldName]);
return (existing: Record<string, any>, value: Record<string, any>) => {
return checkOrder.reduce((key: string, fieldName: string) => {
if (typeof existing[fieldName] === 'undefined') {
return key;
}
if (typeof value[fieldName] === 'undefined') {
return key;
}
if (existing[fieldName] === value[fieldName]) {
return key;
}
return key + value[fieldName];
}, '');
};
};
const createValueMapper = (
data: DataFrame[],
fieldByName: Set<string>,
......@@ -189,28 +198,17 @@ const isMergable = (existing: Record<string, any>, value: Record<string, any>):
return mergable;
};
const mergeOrAdd = (
key: string,
value: Record<string, any>,
dataFrame: MutableDataFrame,
dataFrameIndexByKey: Record<string, number>,
detailsKeyFactory: MergeDetailsKeyFactory
const fieldExistsInAllFrames = (
fieldIndexByName: Record<string, Record<number, number>>,
field: Field,
data: DataFrame[]
) => {
if (typeof dataFrameIndexByKey[key] === 'undefined') {
dataFrame.add(value);
dataFrameIndexByKey[key] = dataFrame.length - 1;
return;
}
const dataFrameIndex = dataFrameIndexByKey[key];
const existing = dataFrame.get(dataFrameIndex);
if (isMergable(existing, value)) {
const merged = { ...existing, ...value };
dataFrame.set(dataFrameIndex, merged);
return;
}
return Object.keys(fieldIndexByName[field.name]).length === data.length;
};
const nextKey = key + detailsKeyFactory(existing, value);
mergeOrAdd(nextKey, value, dataFrame, dataFrameIndexByKey, detailsKeyFactory);
const createPointer = (key: string, valuesByKey: Record<string, Array<Record<string, any>>>): ValuePointer => {
return {
key,
index: valuesByKey[key].length - 1,
};
};
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment