fix: Add support for xls,json,tsv file types in file widget (#23159)

Fixes #17946

This PR adds support for parsing XLS,XLSX, TSV, JSON and CSV files in
file widget v2.

#### Type of change
- New feature (non-breaking change which adds functionality)
- This change requires a documentation update

## Testing
>
#### How Has This Been Tested?
- [x] Manual
- [x] Cypress
>

### Test Plan
https://github.com/appsmithorg/TestSmith/issues/2411

#### Test Plan
> Import following file types to test if the feature works fine.
1. Import xls, xlsx, json, tsv, csv file.
2. Import a large file > 1 MB, > 5 MB to test the feature.
3. Import file types of text,binary and base64 to test existing
functionality since the whole importing code has been refactored.


## Checklist:
#### Dev activity
- [x] My code follows the style guidelines of this project
- [x] I have performed a self-review of my own code
- [x] I have commented my code, particularly in hard-to-understand areas
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] New and existing unit tests pass locally with my changes
- [ ] PR is being merged under a feature flag
This commit is contained in:
Rajat Agrawal 2023-06-06 08:53:45 +05:30 committed by GitHub
parent ea6009f0ca
commit c1e8e17df9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 518 additions and 98 deletions

View File

@ -2,25 +2,28 @@ const commonlocators = require("../../../../../locators/commonlocators.json");
const dsl = require("../../../../../fixtures/filePickerTableDSL.json");
const widgetName = "filepickerwidgetv2";
const ARRAY_CSV_HELPER_TEXT = `All non csv filetypes will have an empty value`;
const ARRAY_CSV_HELPER_TEXT = `All non CSV, XLS(X), JSON or TSV filetypes will have an empty value`;
const ObjectsRegistry =
require("../../../../../support/Objects/Registry").ObjectsRegistry;
let propPane = ObjectsRegistry.PropertyPane;
describe("File picker widget v2", () => {
before(() => {
cy.addDsl(dsl);
});
it("1. Parse CSV data to table Widget", () => {
it("1. Parse CSV,XLS,JSON,TSV,Binary,Text and Base64 file data to table Widget", () => {
cy.openPropertyPane(widgetName);
cy.get(
`.t--property-control-dataformat ${commonlocators.helperText}`,
).should("not.exist");
cy.selectDropdownValue(
commonlocators.filePickerDataFormat,
"Array (CSVs only)",
"Array of Objects (CSV, XLS(X), JSON, TSV)",
);
cy.get(commonlocators.filePickerDataFormat)
.last()
.should("have.text", "Array (CSVs only)");
.should("have.text", "Array of Objects (CSV, XLS(X), JSON, TSV)");
cy.get(
`.t--property-control-dataformat ${commonlocators.helperText}`,
).should("exist");
@ -32,20 +35,113 @@ describe("File picker widget v2", () => {
.selectFile("cypress/fixtures/Test_csv.csv", {
force: true,
});
// wait for file to get uploaded
cy.wait(3000);
cy.readTableV2dataPublish("1", "1").then((tabData) => {
const tabValue = tabData;
expect(tabValue).to.be.equal("Black");
cy.log("the value is" + tabValue);
});
cy.readTableV2dataPublish("1", "2").then((tabData) => {
const tabValue = tabData;
expect(tabValue).to.be.equal("1000");
cy.log("the value is" + tabValue);
});
cy.get(
`.t--widget-tablewidgetv2 .tbody .td[data-rowindex=${1}][data-colindex=${3}] input`,
).should("not.be.checked");
cy.get(".uppy-Dashboard-Item-action--remove").click({ force: true });
// Test for XLSX file
cy.get(commonlocators.filePickerInput)
.first()
.selectFile("cypress/fixtures/TestSpreadsheet.xlsx", { force: true });
// wait for file to get uploaded
cy.wait(3000);
cy.readTableV2dataPublish("0", "0").then((tabData) => {
expect(tabData).to.be.equal("Sheet1");
});
cy.readTableV2dataPublish("0", "1").then((tabData) => {
expect(tabData).contains("Column A");
});
cy.get(".uppy-Dashboard-Item-action--remove").click({ force: true });
// Test for XLS file
cy.get(commonlocators.filePickerInput)
.first()
.selectFile("cypress/fixtures/SampleXLS.xls", { force: true });
// wait for file to get uploaded
cy.wait(3000);
cy.readTableV2dataPublish("0", "0").then((tabData) => {
expect(tabData).to.be.equal("Sheet1");
});
cy.readTableV2dataPublish("0", "1").then((tabData) => {
expect(tabData).contains("Dulce");
});
cy.get(".uppy-Dashboard-Item-action--remove").click({ force: true });
// Test for JSON File
cy.get(commonlocators.filePickerInput)
.first()
.selectFile("cypress/fixtures/largeJSONData.json", { force: true });
// wait for file to get uploaded
cy.wait(3000);
cy.readTableV2dataPublish("0", "2").then((tabData) => {
expect(tabData).to.contain("sunt aut facere");
});
cy.get(".uppy-Dashboard-Item-action--remove").click({ force: true });
// Test for TSV File
cy.get(commonlocators.filePickerInput)
.first()
.selectFile("cypress/fixtures/Sample.tsv", { force: true });
// wait for file to get uploaded
cy.wait(3000);
cy.readTableV2dataPublish("0", "0").then((tabData) => {
expect(tabData).to.be.equal("CONST");
});
cy.get(".uppy-Dashboard-Item-action--remove").click({ force: true });
// Drag and drop a text widget for binding file data
cy.dragAndDropToCanvas("textwidget", { x: 100, y: 100 });
cy.openPropertyPane("textwidget");
propPane.UpdatePropertyFieldValue("Text", `{{FilePicker1.files[0].data}}`);
// Test for Base64
cy.openPropertyPane(widgetName);
cy.selectDropdownValue(commonlocators.filePickerDataFormat, "Base64");
cy.get(commonlocators.filePickerInput)
.first()
.selectFile("cypress/fixtures/testdata.json", { force: true });
cy.get(".t--widget-textwidget").should(
"contain",
"data:application/json;base64",
);
cy.get(".uppy-Dashboard-Item-action--remove").click({ force: true });
// Test for Text file
cy.selectDropdownValue(commonlocators.filePickerDataFormat, "Text");
cy.get(commonlocators.filePickerInput)
.first()
.selectFile("cypress/fixtures/testdata.json", { force: true });
cy.get(".t--widget-textwidget").should("contain", "baseUrl");
cy.get(".uppy-Dashboard-Item-action--remove").click({ force: true });
cy.wait(3000);
cy.get(".t--widget-textwidget").should("have.text", "");
cy.selectDropdownValue(commonlocators.filePickerDataFormat, "Binary");
cy.get(commonlocators.filePickerInput)
.first()
.selectFile("cypress/fixtures/testdata.json", { force: true });
cy.get(".t--widget-textwidget").should("contain", "baseUrl");
cy.get(".uppy-Dashboard-Item-action--remove").click({ force: true });
});
});

View File

@ -0,0 +1,3 @@
Some parameter Other parameter Last parameter
CONST 123456 12.45
Row2C1 Row2C2 Row2C3
1 Some parameter Other parameter Last parameter
2 CONST 123456 12.45
3 Row2C1 Row2C2 Row2C3

Binary file not shown.

Binary file not shown.

View File

@ -139,11 +139,11 @@ export default function XlsxViewer(props: { blob?: Blob }) {
const sheetsData: RawSheetData[] = [];
const sheetNames: string[] = [];
workbook.SheetNames.forEach((name, index) => {
sheetNames.push(name);
workbook.SheetNames.forEach((sheetName) => {
sheetNames.push(sheetName);
const result: RawSheetData = XLSX.utils.sheet_to_json(
workbook.Sheets[workbook.SheetNames[index]],
workbook.Sheets[sheetName],
{ header: 1 },
);
sheetsData.push(result);

View File

@ -0,0 +1,210 @@
import FileDataTypes from "../constants";
import parseFileData from "./FileParser";
import fs from "fs";
const path = require("path");
describe("File parser formats differenty file types correctly", () => {
it("parses csv file correclty", async () => {
const fixturePath = path.resolve(
__dirname,
"../../../../cypress/fixtures/Test_csv.csv",
);
const fileData = fs.readFileSync(fixturePath);
const blob = new Blob([fileData]);
const result = await parseFileData(
blob,
FileDataTypes.Array,
"text/csv",
"csv",
false,
);
const expectedResult = [
{
"Data Id": "hsa-miR-942-5p",
String: "Blue",
Number: "23.788",
Boolean: "TRUE",
Empty: "",
Date: "Wednesday, 20 January 1999",
},
{
"Data Id": "hsa-miR-943",
String: "Black",
Number: "1000",
Boolean: "FALSE",
Empty: "",
Date: "2022-09-15",
},
];
expect(result).toStrictEqual(expectedResult);
});
it("parses json file correclty", async () => {
const fixturePath = path.resolve(
__dirname,
"../../../../cypress/fixtures/testdata.json",
);
const fileData = fs.readFileSync(fixturePath);
const blob = new Blob([fileData]);
const result = (await parseFileData(
blob,
FileDataTypes.Array,
"application/json",
"json",
false,
)) as Record<string, unknown>;
expect(result["APPURL"]).toStrictEqual(
"http://localhost:8081/app/app1/page1-63d38854252ca15b7ec9fabb",
);
});
it("parses tsv file correctly", async () => {
const fixturePath = path.resolve(
__dirname,
"../../../../cypress/fixtures/Sample.tsv",
);
const fileData = fs.readFileSync(fixturePath);
const blob = new Blob([fileData]);
const result = await parseFileData(
blob,
FileDataTypes.Array,
"text/tab-separated-values",
"tsv",
false,
);
const expectedResult = [
{
"Last parameter": "12.45",
"Other parameter": "123456",
"Some parameter": "CONST",
},
{
"Last parameter": "Row2C3",
"Other parameter": "Row2C2",
"Some parameter": "Row2C1",
},
];
expect(result).toStrictEqual(expectedResult);
});
it("parses xlsx file correctly", async () => {
const fixturePath = path.resolve(
__dirname,
"../../../../cypress/fixtures/TestSpreadsheet.xlsx",
);
const fileData = fs.readFileSync(fixturePath);
const blob = new Blob([fileData]);
const result = await parseFileData(
blob,
FileDataTypes.Array,
"openxmlformats-officedocument.spreadsheet",
"xlsx",
false,
);
const expectedResult = [
{
data: [
["Column A", "Column B", "Column C"],
["r1a", "r1b", "r1c"],
["r2a", "r2b", "r2c"],
["r3a", "r3b", "r3c"],
],
name: "Sheet1",
},
];
expect(result).toStrictEqual(expectedResult);
});
it("parses xls file correctly", async () => {
const fixturePath = path.resolve(
__dirname,
"../../../../cypress/fixtures/SampleXLS.xls",
);
const fileData = fs.readFileSync(fixturePath);
const blob = new Blob([fileData]);
const result = (await parseFileData(
blob,
FileDataTypes.Array,
"",
"xls",
false,
)) as Record<string, Record<string, unknown>[]>[];
const expectedFirstRow = [
1,
"Dulce",
"Abril",
"Female",
"United States",
32,
"15/10/2017",
1562,
];
expect(result[0]["name"]).toStrictEqual("Sheet1");
expect(result[0]["data"][1]).toStrictEqual(expectedFirstRow);
});
it("parses text file correctly", async () => {
const fixturePath = path.resolve(
__dirname,
"../../../../cypress/fixtures/testdata.json",
);
const fileData = fs.readFileSync(fixturePath);
const blob = new Blob([fileData]);
const result = await parseFileData(blob, FileDataTypes.Text, "", "", false);
expect(typeof result).toStrictEqual("string");
expect(result).toContain(
"http://localhost:8081/app/app1/page1-63d38854252ca15b7ec9fabb",
);
});
it("parses binary file correctly", async () => {
const fixturePath = path.resolve(
__dirname,
"../../../../cypress/fixtures/testdata.json",
);
const fileData = fs.readFileSync(fixturePath);
const blob = new Blob([fileData]);
const result = await parseFileData(
blob,
FileDataTypes.Binary,
"",
"",
false,
);
expect(typeof result).toStrictEqual("string");
expect(result).toContain(
"http://localhost:8081/app/app1/page1-63d38854252ca15b7ec9fabb",
);
});
it("parses base64 file correctly", async () => {
const fixturePath = path.resolve(
__dirname,
"../../../../cypress/fixtures/testdata.json",
);
const fileData = fs.readFileSync(fixturePath);
const blob = new Blob([fileData]);
const result = await parseFileData(
blob,
FileDataTypes.Base64,
"",
"",
false,
);
expect(typeof result).toStrictEqual("string");
expect(result).toContain(
"data:application/octet-stream;base64,ewogICJiYXNlVXJsIjogImh0",
);
});
});

View File

@ -0,0 +1,179 @@
import Papa from "papaparse";
import FileDataTypes from "../constants";
import log from "loglevel";
import * as XLSX from "xlsx";
interface ExcelSheetData {
name: string;
data: unknown[];
}
type CSVRowData = Record<any, any>; // key represents column name, value represents cell value
function parseFileData(
data: Blob,
type: FileDataTypes,
fileType: string,
extension: string,
dynamicTyping = false,
): Promise<unknown> {
switch (type) {
case FileDataTypes.Base64: {
return parseBase64Blob(data);
}
case FileDataTypes.Binary: {
return parseBinaryString(data);
}
case FileDataTypes.Text: {
return parseText(data);
}
case FileDataTypes.Array: {
return parseArrayTypeFile(data, fileType, extension, dynamicTyping);
}
}
}
function parseBase64Blob(data: Blob): Promise<string> {
return new Promise((resolve) => {
const reader = new FileReader();
reader.readAsDataURL(data);
reader.onloadend = () => {
resolve(reader.result as string);
};
});
}
function parseBinaryString(data: Blob): Promise<string> {
return new Promise((resolve) => {
const reader = new FileReader();
reader.readAsBinaryString(data);
reader.onloadend = () => {
resolve(reader.result as string);
};
});
}
function parseText(data: Blob): Promise<string> {
return new Promise((resolve) => {
const reader = new FileReader();
reader.readAsText(data);
reader.onloadend = () => {
resolve(reader.result as string);
};
});
}
function parseArrayTypeFile(
data: Blob,
filetype: string,
extension: string,
dynamicTyping = false,
): Promise<unknown> {
return new Promise((resolve) => {
(async () => {
let result: unknown = [];
if (filetype.indexOf("csv") > -1) {
result = await parseCSVBlob(data, dynamicTyping);
} else if (
filetype.indexOf("openxmlformats-officedocument.spreadsheet") > -1 ||
extension.indexOf("xls") > -1
) {
result = await parseXLSFile(data);
} else if (filetype.indexOf("json") > -1) {
result = parseJSONFile(data);
} else if (filetype.indexOf("text/tab-separated-values") > -1) {
result = await parseCSVBlob(data, dynamicTyping);
}
resolve(result);
})();
});
}
function parseJSONFile(data: Blob): Promise<Record<string, unknown>> {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => {
let result: Record<string, unknown> = {};
try {
result = JSON.parse(reader.result as string);
} catch {}
resolve(result);
};
reader.readAsText(data);
});
}
function parseXLSFile(data: Blob): Promise<ExcelSheetData[]> {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => {
const sheetsData: ExcelSheetData[] = [];
const workbook = XLSX.read(reader.result as ArrayBuffer, {
type: "array",
});
workbook.SheetNames.forEach((sheetName) => {
const sheetData: ExcelSheetData = { name: "", data: [] };
try {
const data = XLSX.utils.sheet_to_json(workbook.Sheets[sheetName], {
header: 1,
});
sheetData["name"] = sheetName;
sheetData["data"] = data;
sheetsData.push(sheetData);
} catch {}
});
resolve(sheetsData);
};
reader.readAsArrayBuffer(data);
});
}
function parseCSVBlob(
data: Blob,
dynamicTyping = false,
): Promise<CSVRowData[]> {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => {
let result: CSVRowData[] = [];
try {
result = parseCSVString(reader.result as string, dynamicTyping);
} catch {}
resolve(result);
};
reader.readAsText(data);
});
}
function parseCSVString(data: string, dynamicTyping = false): CSVRowData[] {
const result: CSVRowData[] = [];
const errors: Papa.ParseError[] = [];
function chunk(results: Papa.ParseStepResult<any>) {
if (results?.errors?.length) {
errors.push(...results.errors);
}
result.push(...results.data);
}
const config = {
header: true,
dynamicTyping: dynamicTyping,
chunk,
};
const startParsing = performance.now();
Papa.parse(data, config);
const endParsing = performance.now();
log.debug(
`### FILE_PICKER_WIDGET_V2 - CSV PARSING `,
`${endParsing - startParsing} ms`,
);
return result;
}
export default parseFileData;

View File

@ -17,7 +17,7 @@ import { EvaluationSubstitutionType } from "entities/DataTree/dataTreeFactory";
import { klona } from "klona";
import _, { findIndex } from "lodash";
import log from "loglevel";
import Papa from "papaparse";
import React from "react";
import shallowequal from "shallowequal";
import { createGlobalStyle } from "styled-components";
@ -29,15 +29,11 @@ import FilePickerComponent from "../component";
import FileDataTypes from "../constants";
import { DefaultAutocompleteDefinitions } from "widgets/WidgetUtils";
import type { AutocompletionDefinitions } from "widgets/constants";
import parseFileData from "./FileParser";
const CSV_ARRAY_LABEL = "Array (CSVs only)";
const CSV_FILE_TYPE_REGEX = /.+(\/csv)$/;
const CSV_ARRAY_LABEL = "Array of Objects (CSV, XLS(X), JSON, TSV)";
const ARRAY_CSV_HELPER_TEXT = `All non csv filetypes will have an empty value. \n Large files used in widgets directly might slow down the app.`;
const isCSVFileType = (str: string) => CSV_FILE_TYPE_REGEX.test(str);
type Result = string | Buffer | ArrayBuffer | null;
const ARRAY_CSV_HELPER_TEXT = `All non CSV, XLS(X), JSON or TSV filetypes will have an empty value. \n Large files used in widgets directly might slow down the app.`;
const FilePickerGlobalStyles = createGlobalStyle<{
borderRadius?: string;
@ -330,7 +326,7 @@ class FilePickerWidget extends BaseWidget<
propertyName: "dynamicTyping",
label: "Infer data-types from CSV",
helpText:
"Controls if the arrays should try to infer the best possible data type based on the values in csv files",
"Controls if the arrays should try to infer the best possible data type based on the values in CSV file",
controlType: "SWITCH",
isJSConvertible: false,
isBindProperty: true,
@ -641,7 +637,7 @@ class FilePickerWidget extends BaseWidget<
});
}
this.state.uppy.on("file-removed", (file: any, reason: any) => {
this.state.uppy.on("file-removed", (file: UppyFile, reason: any) => {
/**
* The below line will not update the selectedFiles meta prop when cancel-all event is triggered.
* cancel-all event occurs when close or reset function of uppy is executed.
@ -678,42 +674,29 @@ class FilePickerWidget extends BaseWidget<
}
});
this.state.uppy.on("files-added", (files: any[]) => {
this.state.uppy.on("files-added", (files: UppyFile[]) => {
// Deep cloning the selectedFiles
const selectedFiles = this.props.selectedFiles
? klona(this.props.selectedFiles)
: [];
const fileCount = this.props.selectedFiles?.length || 0;
const fileReaderPromises = files.map((file, index) => {
const fileReaderPromises = files.map(async (file, index) => {
return new Promise((resolve) => {
if (file.size < FILE_SIZE_LIMIT_FOR_BLOBS) {
const reader = new FileReader();
if (this.props.fileDataType === FileDataTypes.Base64) {
reader.readAsDataURL(file.data);
} else if (this.props.fileDataType === FileDataTypes.Binary) {
reader.readAsBinaryString(file.data);
(async () => {
let data: unknown;
if (file.size < FILE_SIZE_LIMIT_FOR_BLOBS) {
data = await parseFileData(
file.data,
this.props.fileDataType,
file.type || "",
file.extension,
this.props.dynamicTyping,
);
} else {
reader.readAsText(file.data);
data = createBlobUrl(file.data, this.props.fileDataType);
}
reader.onloadend = () => {
const newFile = {
type: file.type,
id: file.id,
data: this.parseUploadResult(
reader.result,
file.type,
this.props.fileDataType,
),
meta: file.meta,
name: file.meta ? file.meta.name : `File-${index + fileCount}`,
size: file.size,
dataFormat: this.props.fileDataType,
};
resolve(newFile);
};
} else {
const data = createBlobUrl(file.data, this.props.fileDataType);
const newFile = {
type: file.type,
id: file.id,
@ -724,7 +707,7 @@ class FilePickerWidget extends BaseWidget<
dataFormat: this.props.fileDataType,
};
resolve(newFile);
}
})();
});
});
@ -861,57 +844,6 @@ class FilePickerWidget extends BaseWidget<
);
}
parseUploadResult(
result: Result,
fileType: string,
dataFormat: FileDataTypes,
) {
if (
dataFormat !== FileDataTypes.Array ||
!isCSVFileType(fileType) ||
!result
) {
return result;
}
const data: Record<string, string>[] = [];
const errors: Papa.ParseError[] = [];
function chunk(results: Papa.ParseStepResult<any>) {
if (results?.errors?.length) {
errors.push(...results.errors);
}
data.push(...results.data);
}
if (typeof result === "string") {
const config = {
header: true,
dynamicTyping: this.props.dynamicTyping,
chunk,
};
try {
const startParsing = performance.now();
Papa.parse(result, config);
const endParsing = performance.now();
log.debug(
`### FILE_PICKER_WIDGET_V2 - ${this.props.widgetName} - CSV PARSING `,
`${endParsing - startParsing} ms`,
);
return data;
} catch (error) {
log.error(errors);
return [];
}
} else {
return [];
}
}
static getWidgetType(): WidgetType {
return "FILE_PICKER_WIDGET_V2";
}