Commit 0112bdeb by Ryan McKinley Committed by Torkel Ödegaard

Feat: More robust csv support (#16170)

* stream csv

* merged master

* merged master

* fix test failures

* add csv files

* update boolean parsing

* add toCSV

* add toCSV

* add toCSV

* add streaming datasource

* set time range

* streaming to a graph

* streaming datasource

* streaming table

* add server to the streaming

* remove react streaming
parent 749c76f2
......@@ -15,9 +15,9 @@ TableInputStories.add('default', () => {
<div style={{ width: '90%', height: '90vh' }}>
<TableInputCSV
text={'a,b,c\n1,2,3'}
onTableParsed={(table: SeriesData, text: string) => {
console.log('Table', table, text);
action('Table')(table, text);
onSeriesParsed={(data: SeriesData[], text: string) => {
console.log('Data', data, text);
action('Data')(data, text);
}}
/>
</div>
......
......@@ -10,7 +10,7 @@ describe('TableInputCSV', () => {
.create(
<TableInputCSV
text={'a,b,c\n1,2,3'}
onTableParsed={(table: SeriesData, text: string) => {
onSeriesParsed={(data: SeriesData[], text: string) => {
// console.log('Table:', table, 'from:', text);
}}
/>
......
import React from 'react';
import debounce from 'lodash/debounce';
import { parseCSV, TableParseOptions, TableParseDetails } from '../../utils/processSeriesData';
import { SeriesData } from '../../types/data';
import { AutoSizer } from 'react-virtualized';
import { CSVConfig, readCSV } from '../../utils/csv';
interface Props {
options?: TableParseOptions;
config?: CSVConfig;
text: string;
onTableParsed: (table: SeriesData, text: string) => void;
onSeriesParsed: (data: SeriesData[], text: string) => void;
}
interface State {
text: string;
table: SeriesData;
details: TableParseDetails;
data: SeriesData[];
}
/**
......@@ -23,68 +22,61 @@ class TableInputCSV extends React.PureComponent<Props, State> {
constructor(props: Props) {
super(props);
// Shoud this happen in onComponentMounted?
const { text, options, onTableParsed } = props;
const details = {};
const table = parseCSV(text, options, details);
const { text, config } = props;
this.state = {
text,
table,
details,
data: readCSV(text, { config }),
};
onTableParsed(table, text);
}
readCSV = debounce(() => {
const details = {};
const table = parseCSV(this.state.text, this.props.options, details);
this.setState({ table, details });
const { config } = this.props;
const { text } = this.state;
this.setState({ data: readCSV(text, { config }) });
}, 150);
componentDidUpdate(prevProps: Props, prevState: State) {
const { text } = this.state;
if (text !== prevState.text || this.props.options !== prevProps.options) {
if (text !== prevState.text || this.props.config !== prevProps.config) {
this.readCSV();
}
// If the props text has changed, replace our local version
if (this.props.text !== prevProps.text && this.props.text !== text) {
this.setState({ text: this.props.text });
}
if (this.state.table !== prevState.table) {
this.props.onTableParsed(this.state.table, this.state.text);
if (this.state.data !== prevState.data) {
this.props.onSeriesParsed(this.state.data, this.state.text);
}
}
onFooterClicked = (event: any) => {
console.log('Errors', this.state);
const message = this.state.details
.errors!.map(err => {
return err.message;
})
.join('\n');
alert('CSV Parsing Errors:\n' + message);
};
onTextChange = (event: any) => {
this.setState({ text: event.target.value });
};
render() {
const { table, details } = this.state;
const hasErrors = details.errors && details.errors.length > 0;
const footerClassNames = hasErrors ? 'gf-table-input-csv-err' : '';
const { data } = this.state;
return (
<AutoSizer>
{({ height, width }) => (
<div className="gf-table-input-csv" style={{ width, height }}>
<textarea placeholder="Enter CSV here..." value={this.state.text} onChange={this.onTextChange} />
<footer onClick={this.onFooterClicked} className={footerClassNames}>
Rows:{table.rows.length}, Columns:{table.fields.length} &nbsp;
{hasErrors ? <i className="fa fa-exclamation-triangle" /> : <i className="fa fa-check-circle" />}
{data && (
<footer>
{data.map((series, index) => {
return (
<span key={index}>
Rows:{series.rows.length}, Columns:{series.fields.length} &nbsp;
<i className="fa fa-check-circle" />
</span>
);
})}
</footer>
)}
</div>
)}
</AutoSizer>
......
......@@ -18,7 +18,3 @@
font-size: 80%;
cursor: pointer;
}
.gf-table-input-csv footer.gf-table-input-csv-err {
background: yellow;
}
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`processSeriesData basic processing should generate a header and fix widths 1`] = `
exports[`read csv should get X and y 1`] = `
Object {
"fields": Array [
Object {
"name": "Field 1",
"name": "Column 1",
"type": "number",
},
Object {
"name": "Field 2",
"name": "Column 2",
"type": "number",
},
Object {
"name": "Field 3",
"name": "Column 3",
"type": "number",
},
Object {
"name": "Field 4",
"type": "number",
},
],
"rows": Array [
Array [
1,
null,
null,
],
Array [
2,
3,
4,
null,
],
Array [
5,
6,
null,
null,
],
Array [
null,
null,
null,
7,
],
],
}
`;
exports[`processSeriesData basic processing should read header and two rows 1`] = `
exports[`read csv should read csv from local file system 1`] = `
Object {
"fields": Array [
Object {
"name": "a",
"type": "number",
},
Object {
"name": "b",
"type": "number",
},
Object {
"name": "c",
"type": "number",
},
],
"rows": Array [
Array [
1,
2,
3,
10,
20,
30,
],
Array [
4,
5,
6,
40,
50,
60,
],
],
}
`;
exports[`read csv should read csv with headers 1`] = `
Object {
"fields": Array [
Object {
"name": "a",
"type": "number",
"unit": "ms",
},
Object {
"name": "b",
"type": "string",
"unit": "lengthm",
},
Object {
"name": "c",
"type": "boolean",
"unit": "s",
},
],
"rows": Array [
Array [
10,
"20",
true,
],
Array [
40,
"50",
false,
],
Array [
40,
"500",
false,
],
Array [
40,
"50",
true,
],
],
}
......
import { readCSV, toCSV, CSVHeaderStyle } from './csv';
// Test with local CSV files
const fs = require('fs');
describe('read csv', () => {
it('should get X and y', () => {
const text = ',1\n2,3,4\n5,6\n,,,7';
const data = readCSV(text);
expect(data.length).toBe(1);
const series = data[0];
expect(series.fields.length).toBe(4);
expect(series.rows.length).toBe(3);
// Make sure everythign it padded properly
for (const row of series.rows) {
expect(row.length).toBe(series.fields.length);
}
expect(series).toMatchSnapshot();
});
it('should read csv from local file system', () => {
const path = __dirname + '/testdata/simple.csv';
expect(fs.existsSync(path)).toBeTruthy();
const csv = fs.readFileSync(path, 'utf8');
const data = readCSV(csv);
expect(data.length).toBe(1);
expect(data[0]).toMatchSnapshot();
});
it('should read csv with headers', () => {
const path = __dirname + '/testdata/withHeaders.csv';
expect(fs.existsSync(path)).toBeTruthy();
const csv = fs.readFileSync(path, 'utf8');
const data = readCSV(csv);
expect(data.length).toBe(1);
expect(data[0]).toMatchSnapshot();
});
});
function norm(csv: string): string {
return csv.trim().replace(/[\r]/g, '');
}
describe('write csv', () => {
it('should write the same CSV that we read', () => {
const path = __dirname + '/testdata/roundtrip.csv';
const csv = fs.readFileSync(path, 'utf8');
const data = readCSV(csv);
const out = toCSV(data, { headerStyle: CSVHeaderStyle.full });
expect(data.length).toBe(1);
expect(data[0].fields.length).toBe(3);
expect(norm(out)).toBe(norm(csv));
// Keep the name even without special formatting
const again = readCSV(out);
const shorter = toCSV(again, { headerStyle: CSVHeaderStyle.name });
const f = readCSV(shorter);
const fields = f[0].fields;
expect(fields.length).toBe(3);
expect(fields.map(f => f.name).join(',')).toEqual('a,b,c'); // the names
});
});
// Libraries
import Papa, { ParseResult, ParseConfig, Parser } from 'papaparse';
import defaults from 'lodash/defaults';
import isNumber from 'lodash/isNumber';
// Types
import { SeriesData, Field, FieldType } from '../types/index';
import { guessFieldTypeFromValue } from './processSeriesData';
export enum CSVHeaderStyle {
full,
name,
none,
}
// Subset of all parse options
export interface CSVConfig {
delimiter?: string; // default: ","
newline?: string; // default: "\r\n"
quoteChar?: string; // default: '"'
encoding?: string; // default: "",
headerStyle?: CSVHeaderStyle;
}
export interface CSVParseCallbacks {
/**
* Get a callback before any rows are processed
* This can return a modified table to force any
* Column configurations
*/
onHeader: (table: SeriesData) => void;
// Called after each row is read and
onRow: (row: any[]) => void;
}
export interface CSVOptions {
config?: CSVConfig;
callback?: CSVParseCallbacks;
}
export function readCSV(csv: string, options?: CSVOptions): SeriesData[] {
return new CSVReader(options).readCSV(csv);
}
enum ParseState {
Starting,
InHeader,
ReadingRows,
}
type FieldParser = (value: string) => any;
export class CSVReader {
config: CSVConfig;
callback?: CSVParseCallbacks;
field: FieldParser[];
series: SeriesData;
state: ParseState;
data: SeriesData[];
constructor(options?: CSVOptions) {
if (!options) {
options = {};
}
this.config = options.config || {};
this.callback = options.callback;
this.field = [];
this.state = ParseState.Starting;
this.series = {
fields: [],
rows: [],
};
this.data = [];
}
// PapaParse callback on each line
private step = (results: ParseResult, parser: Parser): void => {
for (let i = 0; i < results.data.length; i++) {
const line: string[] = results.data[i];
if (line.length < 1) {
continue;
}
const first = line[0]; // null or value, papaparse does not return ''
if (first) {
// Comment or header queue
if (first.startsWith('#')) {
// Look for special header column
// #{columkey}#a,b,c
const idx = first.indexOf('#', 2);
if (idx > 0) {
const k = first.substr(1, idx - 1);
// Simple object used to check if headers match
const headerKeys: Field = {
name: '#',
type: FieldType.number,
unit: '#',
dateFormat: '#',
};
// Check if it is a known/supported column
if (headerKeys.hasOwnProperty(k)) {
// Starting a new table after reading rows
if (this.state === ParseState.ReadingRows) {
this.series = {
fields: [],
rows: [],
};
this.data.push(this.series);
}
padColumnWidth(this.series.fields, line.length);
const fields: any[] = this.series.fields; // cast to any so we can lookup by key
const v = first.substr(idx + 1);
fields[0][k] = v;
for (let j = 1; j < fields.length; j++) {
fields[j][k] = line[j];
}
this.state = ParseState.InHeader;
continue;
}
} else if (this.state === ParseState.Starting) {
this.series.fields = makeFieldsFor(line);
this.state = ParseState.InHeader;
continue;
}
// Ignore comment lines
continue;
}
if (this.state === ParseState.Starting) {
const type = guessFieldTypeFromValue(first);
if (type === FieldType.string) {
this.series.fields = makeFieldsFor(line);
this.state = ParseState.InHeader;
continue;
}
this.series.fields = makeFieldsFor(new Array(line.length));
this.series.fields[0].type = type;
this.state = ParseState.InHeader; // fall through to read rows
}
}
if (this.state === ParseState.InHeader) {
padColumnWidth(this.series.fields, line.length);
this.state = ParseState.ReadingRows;
}
if (this.state === ParseState.ReadingRows) {
// Make sure colum structure is valid
if (line.length > this.series.fields.length) {
padColumnWidth(this.series.fields, line.length);
if (this.callback) {
this.callback.onHeader(this.series);
} else {
// Expand all rows with nulls
for (let x = 0; x < this.series.rows.length; x++) {
const row = this.series.rows[x];
while (row.length < line.length) {
row.push(null);
}
}
}
}
const row: any[] = [];
for (let j = 0; j < line.length; j++) {
const v = line[j];
if (v) {
if (!this.field[j]) {
this.field[j] = makeFieldParser(v, this.series.fields[j]);
}
row.push(this.field[j](v));
} else {
row.push(null);
}
}
if (this.callback) {
// Send the header after we guess the type
if (this.series.rows.length === 0) {
this.callback.onHeader(this.series);
this.series.rows.push(row); // Only add the first row
}
this.callback.onRow(row);
} else {
this.series.rows.push(row);
}
}
}
};
readCSV(text: string): SeriesData[] {
this.data = [this.series];
const papacfg = {
...this.config,
dynamicTyping: false,
skipEmptyLines: true,
comments: false, // Keep comment lines
step: this.step,
} as ParseConfig;
Papa.parse(text, papacfg);
return this.data;
}
}
function makeFieldParser(value: string, field: Field): FieldParser {
if (!field.type) {
if (field.name === 'time' || field.name === 'Time') {
field.type = FieldType.time;
} else {
field.type = guessFieldTypeFromValue(value);
}
}
if (field.type === FieldType.number) {
return (value: string) => {
return parseFloat(value);
};
}
// Will convert anything that starts with "T" to true
if (field.type === FieldType.boolean) {
return (value: string) => {
return !(value[0] === 'F' || value[0] === 'f' || value[0] === '0');
};
}
// Just pass the string back
return (value: string) => value;
}
/**
* Creates a field object for each string in the list
*/
function makeFieldsFor(line: string[]): Field[] {
const fields: Field[] = [];
for (let i = 0; i < line.length; i++) {
const v = line[i] ? line[i] : 'Column ' + (i + 1);
fields.push({ name: v });
}
return fields;
}
/**
* Makes sure the colum has valid entries up the the width
*/
function padColumnWidth(fields: Field[], width: number) {
if (fields.length < width) {
for (let i = fields.length; i < width; i++) {
fields.push({
name: 'Field ' + (i + 1),
});
}
}
}
type FieldWriter = (value: any) => string;
function writeValue(value: any, config: CSVConfig): string {
const str = value.toString();
if (str.includes('"')) {
// Escape the double quote characters
return config.quoteChar + str.replace('"', '""') + config.quoteChar;
}
if (str.includes('\n') || str.includes(config.delimiter)) {
return config.quoteChar + str + config.quoteChar;
}
return str;
}
function makeFieldWriter(field: Field, config: CSVConfig): FieldWriter {
if (field.type) {
if (field.type === FieldType.boolean) {
return (value: any) => {
return value ? 'true' : 'false';
};
}
if (field.type === FieldType.number) {
return (value: any) => {
if (isNumber(value)) {
return value.toString();
}
return writeValue(value, config);
};
}
}
return (value: any) => writeValue(value, config);
}
function getHeaderLine(key: string, fields: Field[], config: CSVConfig): string {
for (const f of fields) {
if (f.hasOwnProperty(key)) {
let line = '#' + key + '#';
for (let i = 0; i < fields.length; i++) {
if (i > 0) {
line = line + config.delimiter;
}
const v = (fields[i] as any)[key];
if (v) {
line = line + writeValue(v, config);
}
}
return line + config.newline;
}
}
return '';
}
export function toCSV(data: SeriesData[], config?: CSVConfig): string {
let csv = '';
config = defaults(config, {
delimiter: ',',
newline: '\r\n',
quoteChar: '"',
encoding: '',
headerStyle: CSVHeaderStyle.name,
});
for (const series of data) {
const { rows, fields } = series;
if (config.headerStyle === CSVHeaderStyle.full) {
csv =
csv +
getHeaderLine('name', fields, config) +
getHeaderLine('type', fields, config) +
getHeaderLine('unit', fields, config) +
getHeaderLine('dateFormat', fields, config);
} else if (config.headerStyle === CSVHeaderStyle.name) {
for (let i = 0; i < fields.length; i++) {
if (i > 0) {
csv += config.delimiter;
}
csv += fields[i].name;
}
csv += config.newline;
}
const writers = fields.map(field => makeFieldWriter(field, config!));
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
for (let j = 0; j < row.length; j++) {
if (j > 0) {
csv = csv + config.delimiter;
}
const v = row[j];
if (v !== null) {
csv = csv + writers[j](v);
}
}
csv = csv + config.newline;
}
csv = csv + config.newline;
}
return csv;
}
......@@ -4,6 +4,7 @@ export * from './colors';
export * from './namedColorsPalette';
export * from './thresholds';
export * from './string';
export * from './csv';
export * from './statsCalculator';
export * from './displayValue';
export * from './deprecationWarning';
......
import { parseCSV, toSeriesData, guessFieldTypes, guessFieldTypeFromValue } from './processSeriesData';
import { toSeriesData, guessFieldTypes, guessFieldTypeFromValue } from './processSeriesData';
import { FieldType } from '../types/data';
import moment from 'moment';
describe('processSeriesData', () => {
describe('basic processing', () => {
it('should read header and two rows', () => {
const text = 'a,b,c\n1,2,3\n4,5,6';
expect(parseCSV(text)).toMatchSnapshot();
});
it('should generate a header and fix widths', () => {
const text = '1\n2,3,4\n5,6';
const series = parseCSV(text, {
headerIsFirstLine: false,
});
expect(series.rows.length).toBe(3);
expect(series).toMatchSnapshot();
});
});
});
describe('toSeriesData', () => {
it('converts timeseries to series', () => {
const input1 = {
......
......@@ -4,135 +4,9 @@ import isString from 'lodash/isString';
import isBoolean from 'lodash/isBoolean';
import moment from 'moment';
import Papa, { ParseError, ParseMeta } from 'papaparse';
// Types
import { SeriesData, Field, TimeSeries, FieldType, TableData } from '../types/index';
// Subset of all parse options
export interface TableParseOptions {
headerIsFirstLine?: boolean; // Not a papa-parse option
delimiter?: string; // default: ","
newline?: string; // default: "\r\n"
quoteChar?: string; // default: '"'
encoding?: string; // default: ""
comments?: boolean | string; // default: false
}
export interface TableParseDetails {
meta?: ParseMeta;
errors?: ParseError[];
}
/**
* This makes sure the header and all rows have equal length.
*
* @param series (immutable)
* @returns a series that has equal length rows, or the same
* series if no changes were needed
*/
export function matchRowSizes(series: SeriesData): SeriesData {
const { rows } = series;
let { fields } = series;
let sameSize = true;
let size = fields.length;
rows.forEach(row => {
if (size !== row.length) {
sameSize = false;
size = Math.max(size, row.length);
}
});
if (sameSize) {
return series;
}
// Pad Fields
if (size !== fields.length) {
const diff = size - fields.length;
fields = [...fields];
for (let i = 0; i < diff; i++) {
fields.push({
name: 'Field ' + (fields.length + 1),
});
}
}
// Pad Rows
const fixedRows: any[] = [];
rows.forEach(row => {
const diff = size - row.length;
if (diff > 0) {
row = [...row];
for (let i = 0; i < diff; i++) {
row.push(null);
}
}
fixedRows.push(row);
});
return {
fields,
rows: fixedRows,
};
}
function makeFields(values: any[]): Field[] {
return values.map((value, index) => {
if (!value) {
value = 'Field ' + (index + 1);
}
return {
name: value.toString().trim(),
};
});
}
/**
* Convert CSV text into a valid SeriesData object
*
* @param text
* @param options
* @param details, if exists the result will be filled with debugging details
*/
export function parseCSV(text: string, options?: TableParseOptions, details?: TableParseDetails): SeriesData {
const results = Papa.parse(text, { ...options, dynamicTyping: true, skipEmptyLines: true });
const { data, meta, errors } = results;
// Fill the parse details for debugging
if (details) {
details.errors = errors;
details.meta = meta;
}
if (!data || data.length < 1) {
// Show a more reasonable warning on empty input text
if (details && !text) {
errors.length = 0;
errors.push({
code: 'empty',
message: 'Empty input text',
type: 'warning',
row: 0,
});
details.errors = errors;
}
return {
fields: [],
rows: [],
};
}
// Assume the first line is the header unless the config says its not
const headerIsNotFirstLine = options && options.headerIsFirstLine === false;
const header = headerIsNotFirstLine ? [] : results.data.shift();
return matchRowSizes({
fields: makeFields(header),
rows: results.data,
});
}
function convertTableToSeriesData(table: TableData): SeriesData {
return {
// rename the 'text' to 'name' field
......
import { parseCSV } from './processSeriesData';
import { getStatsCalculators, StatID, calculateStats } from './statsCalculator';
import _ from 'lodash';
describe('Stats Calculators', () => {
const basicTable = parseCSV('a,b,c\n10,20,30\n20,30,40');
const basicTable = {
fields: [{ name: 'a' }, { name: 'b' }, { name: 'c' }],
rows: [[10, 20, 30], [20, 30, 40]],
};
it('should load all standard stats', () => {
const names = [
......
#name#a,b,c
#type#number,string,boolean
#unit#ms,,s
10,AA,true
20,XX,false
30,YY,false
40,ZZ,true
50,"X,Y",true
60,"X
Y",true
70,BB,false
#name#a,b,c
#unit#ms,lengthm,s
#type#number,string,boolean
10,20,True
40,50,FALSE
"40","500",0
40,50,1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment