feat: remove papa parse and use sheetjs for csv to json parsing (#24779)

This commit is contained in:
Sangeeth Sivan 2023-07-11 16:06:33 +05:30 committed by GitHub
parent 6eb8a02e15
commit 77e3778b3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 67 additions and 43 deletions

View File

@ -133,7 +133,6 @@
"node-forge": "^1.3.0",
"normalizr": "^3.3.0",
"object-hash": "^3.0.0",
"papaparse": "^5.3.2",
"path-to-regexp": "^6.2.0",
"popper.js": "^1.15.0",
"prismjs": "^1.27.0",
@ -243,7 +242,6 @@
"@types/node": "^10.12.18",
"@types/node-forge": "^0.10.0",
"@types/object-hash": "^2.2.1",
"@types/papaparse": "^5.3.5",
"@types/prismjs": "^1.16.1",
"@types/react": "^17.0.2",
"@types/react-beautiful-dnd": "^11.0.4",

View File

@ -40,6 +40,51 @@ describe("File parser formats differenty file types correctly", () => {
expect(result).toStrictEqual(expectedResult);
});
it("parses csv file correclty with dynamic bindig - Infer data types", async () => {
const fixturePath = path.resolve(
__dirname,
"../../../../cypress/fixtures/Test_csv.csv",
);
const fileData = fs.readFileSync(fixturePath);
const blob = new Blob([fileData]);
const result = await parseFileData(
blob,
FileDataTypes.Array,
"text/csv",
"csv",
true,
);
const dateString = "2022-09-15";
const date = new Date(dateString);
const timezoneOffset = date.getTimezoneOffset();
const offsetMilliseconds = timezoneOffset * 60 * 1000;
const convertedDate = new Date(date.getTime() + offsetMilliseconds);
const expectedResult = [
{
"Data Id": "hsa-miR-942-5p",
String: "Blue",
Number: 23.788,
Boolean: true,
Empty: "",
Date: "Wednesday, 20 January 1999",
},
{
"Data Id": "hsa-miR-943",
String: "Black",
Number: 1000,
Boolean: false,
Empty: "",
Date: convertedDate,
},
];
expect(result).toStrictEqual(expectedResult);
});
it("parses json file correclty", async () => {
const fixturePath = path.resolve(
__dirname,

View File

@ -1,6 +1,4 @@
import Papa from "papaparse";
import FileDataTypes from "../constants";
import log from "loglevel";
import * as XLSX from "xlsx";
interface ExcelSheetData {
@ -149,30 +147,31 @@ function parseCSVBlob(
function parseCSVString(data: string, dynamicTyping = false): CSVRowData[] {
const result: CSVRowData[] = [];
const errors: Papa.ParseError[] = [];
const workbook = XLSX.read(data, {
type: "binary",
cellDates: true,
dateNF: "yyyy-mm-dd",
raw: dynamicTyping ? false : true, // parse values
});
const sheetName = workbook.SheetNames[0];
const worksheet = workbook.Sheets[sheetName];
const jsonData: XLSX.CellObject[] = XLSX.utils.sheet_to_json(worksheet, {
header: 1, // to notify that the first row is the header row
defval: "", // to get empty cells as empty strings
});
const headerRow: any[] = jsonData[0] as any;
const dataRows: any[][] = jsonData.slice(1) as any;
function chunk(results: Papa.ParseStepResult<any>) {
if (results?.errors?.length) {
errors.push(...results.errors);
dataRows.forEach((row: string[]) => {
const rowData: CSVRowData = {};
for (let i = 0; i < row.length; i++) {
const columnName = headerRow[i];
const cellValue = row[i];
rowData[columnName] = cellValue;
}
result.push(...results.data);
}
result.push(rowData);
});
const config = {
header: true,
dynamicTyping: dynamicTyping,
chunk,
};
const startParsing = performance.now();
Papa.parse(data, config);
const endParsing = performance.now();
log.debug(
`### FILE_PICKER_WIDGET_V2 - CSV PARSING `,
`${endParsing - startParsing} ms`,
);
return result;
}

View File

@ -7699,15 +7699,6 @@ __metadata:
languageName: node
linkType: hard
"@types/papaparse@npm:^5.3.5":
version: 5.3.5
resolution: "@types/papaparse@npm:5.3.5"
dependencies:
"@types/node": "*"
checksum: f9833662e5536836be9586b9344757d99e22c7cfd6997ab212700c3b623491e28548676e023adfa801fc7c60f3f6d1d417dbf02c5f138442ed9b020b128a7f5f
languageName: node
linkType: hard
"@types/parse-json@npm:^4.0.0":
version: 4.0.0
resolution: "@types/parse-json@npm:4.0.0"
@ -9595,7 +9586,6 @@ __metadata:
"@types/node": ^10.12.18
"@types/node-forge": ^0.10.0
"@types/object-hash": ^2.2.1
"@types/papaparse": ^5.3.5
"@types/prismjs": ^1.16.1
"@types/react": ^17.0.2
"@types/react-beautiful-dnd": ^11.0.4
@ -9731,7 +9721,6 @@ __metadata:
node-forge: ^1.3.0
normalizr: ^3.3.0
object-hash: ^3.0.0
papaparse: ^5.3.2
path-to-regexp: ^6.2.0
plop: ^3.1.1
popper.js: ^1.15.0
@ -22726,13 +22715,6 @@ __metadata:
languageName: node
linkType: hard
"papaparse@npm:^5.3.2":
version: 5.3.2
resolution: "papaparse@npm:5.3.2"
checksum: a5950ef931a42f6759a8d3823a43dd30f375b37a0ddea6ea5448c0c5024cd226819231958c49c24fbcdeab297c63fd1d630130b3439876ea0fd17d8a267738ae
languageName: node
linkType: hard
"param-case@npm:^3.0.3, param-case@npm:^3.0.4":
version: 3.0.4
resolution: "param-case@npm:3.0.4"