Add node.js support + tests + change API

This commit is contained in:
David Bruant 2024-06-18 11:06:41 +02:00
parent 0271b68452
commit 7a93508043
11 changed files with 3933 additions and 269 deletions

View File

@ -16,7 +16,7 @@
<link crossorigin="anonymous" rel="stylesheet" href="./build/bundle.css">
<script src="./build/bundle.js" type="module" crossorigin="anonymous"></script>
<script src="./build/front-end.js" type="module" crossorigin="anonymous"></script>
</head>
<body>
<main>

3597
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -2,17 +2,19 @@
"name": "front-end-template",
"version": "0.2.0",
"type": "module",
"browser": "./scripts/main.js",
"main": "./scripts/main.js",
"scripts": {
"build": "rollup -c",
"dev": "npm-run-all --parallel dev:* start",
"dev:rollup": "rollup -c -w",
"start": "http-server -c-1 ."
"start": "http-server -c-1 .",
"test": "ava"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^25.0.7",
"@rollup/plugin-node-resolve": "^15.2.3",
"@rollup/plugin-terser": "^0.4.4",
"ava": "^6.1.3",
"http-server": "^14.1.1",
"npm-run-all": "^4.1.5",
"rollup": "^4.18.0",
@ -23,6 +25,7 @@
"svelte-preprocess": "^5.1.3"
},
"dependencies": {
"@xmldom/xmldom": "^0.8.10",
"unzipit": "^1.4.3"
}
}

View File

@ -1,6 +1,6 @@
# ods-xlsx
Small lib to parse/understand .ods and .xsls files in the browser
Small lib to parse/understand .ods and .xsls files in the browser and node.js
## Usage
@ -17,14 +17,16 @@ npm i github:DavidBruant/ods-xlsx#v0.2.0
#### Basic
```js
import {getTableRawContentFromFile, tableRawContentToObjects} from './main.js'
import {tableRawContentToObjects, tableWithoutEmptyRows, getODSTableRawContent} from 'ods-xlsx'
/**
* @param {File} file - a file like the ones you get from an <input type=file>
* @param {File} file - an .ods file like the ones you get from an <input type=file>
* @return {Promise<any[]>}
*/
async function getFileData(file){
return getTableRawContentFromFile(file).then(tableRawContentToObjects)
return tableRawContent
.then(tableWithoutEmptyRows)
.then(tableRawContentToObjects)
}
```
@ -36,14 +38,7 @@ to the appropriate JavaScript value
#### Low-level
`getTableRawContentFromFile` returns a `Promise` for an array of array of `{value, type}` objects where:
- `value` is a string or `undefined` or `null` and
- `type` is a type defined in the .ods or .xlsx standards
See the `convertCellValue` function in the source code for an example of how to handle the `type` value
`tableRawContentToObjects` performs a conversion on values and also removes empty rows
See exports
### Demo

View File

@ -13,7 +13,7 @@ export default {
output: {
sourcemap: true,
format: 'es',
file: 'build/bundle.js'
dir: 'build'
},
plugins: [
svelte({

View File

@ -1,9 +1,26 @@
<script>
import {getTableRawContentFromFile, tableRawContentToObjects} from './main.js'
//@ts-check
import {tableRawContentToObjects, tableWithoutEmptyRows, getODSTableRawContent, getXLSXTableRawContent} from './main.js'
const ODS_TYPE = "application/vnd.oasis.opendocument.spreadsheet";
const XLSX_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
/**
*
* @param {File} file
* @returns {Promise<Map<SheetName, SheetRawContent>>}
*/
async function getTableRawContentFromFile(file){
if(file.type === ODS_TYPE)
return getODSTableRawContent(await file.arrayBuffer())
if(file.type === XLSX_TYPE)
return getXLSXTableRawContent(await file.arrayBuffer())
throw new TypeError(`Unsupported file type: ${file.type} (${file.name})`)
}
let files
let tableRawContent;
@ -11,7 +28,7 @@
/** @type {File} */
$: file = files && files[0]
$: tableRawContent = file && getTableRawContentFromFile(file)
$: tableObjectSheets = tableRawContent && tableRawContent.then(tableRawContentToObjects) || []
$: tableObjectSheets = tableRawContent && tableRawContent.then(tableWithoutEmptyRows).then(tableRawContentToObjects) || []
$: Promise.resolve(tableObjectSheets).then(x => console.log('tableObjectSheets', x))
</script>

View File

@ -1,251 +1,50 @@
//@ts-check
import { unzip } from 'unzipit';
let _DOMParser
/**
* @typedef SheetCellRawContent
* @prop {string | null | undefined} value
* @prop {'float' | 'percentage' | 'currency' | 'date' | 'time' | 'boolean' | 'string' | 'b' | 'd' | 'e' | 'inlineStr' | 'n' | 's' | 'str'} type
*/
if(typeof DOMParser !== 'undefined' && Object(DOMParser) === DOMParser && DOMParser.prototype && typeof DOMParser.prototype.parseFromString === 'function'){
console.info('[ods-xlsx] Already existing DOMParser. Certainly in the browser')
_DOMParser = DOMParser
}
else{
console.info('[ods-xlsx] No native DOMParser. Certainly in Node.js')
/** @typedef {SheetCellRawContent[]} SheetRowRawContent */
/** @typedef {SheetRowRawContent[]} SheetRawContent */
/** @typedef {string} SheetName */
const ODS_TYPE = "application/vnd.oasis.opendocument.spreadsheet";
const XLSX_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
/**
* Extracts raw table content from an ODS file.
* @param {File} file - The ODS file.
* @param {Function} unzip - Function to unzip the file.
* @param {Function} parseXML - Function to parse XML content.
* @returns {Promise<Map<SheetName, SheetRawContent>>}
*/
async function getTableRawContentFromODSFile(file, unzip, parseXML) {
const zip = await unzip(file);
const entries = zip.entries;
// Extract the content.xml file which contains the spreadsheet data
const contentXml = await entries['content.xml'].text();
const contentDoc = parseXML(contentXml);
const tableMap = new Map();
const tables = contentDoc.getElementsByTagName('table:table');
for (let table of tables) {
const sheetName = table.getAttribute('table:name');
const rows = table.getElementsByTagName('table:table-row');
const sheetData = [];
for (let row of rows) {
const cells = row.getElementsByTagName('table:table-cell');
const rowData = [];
for (let cell of cells) {
const cellType = cell.getAttribute('office:value-type');
const cellValue = cellType === 'string' ? cell.textContent : cell.getAttribute('office:value');
rowData.push({
value: cellValue,
type: cellType
});
}
sheetData.push(rowData);
}
tableMap.set(sheetName, sheetData);
}
return tableMap;
const xmldom = await import('@xmldom/xmldom')
_DOMParser = xmldom.DOMParser
}
/**
* Extracts raw table content from an XLSX file.
* @param {File} file - The XLSX file.
* @param {Function} unzip - Function to unzip the file.
* @param {Function} parseXML - Function to parse XML content.
* @returns {Promise<Map<SheetName, SheetRawContent>>}
*/
async function getTableRawContentFromXSLXFile(file, unzip, parseXML) {
const zip = await unzip(file);
const entries = zip.entries;
const sharedStringsXml = await entries['xl/sharedStrings.xml'].text();
const sharedStringsDoc = parseXML(sharedStringsXml);
const sharedStrings = Array.from(sharedStringsDoc.getElementsByTagName('sst')[0].getElementsByTagName('si')).map(si => si.textContent);
// Get sheet names and their corresponding XML files
const workbookXml = await entries['xl/workbook.xml'].text();
const workbookDoc = parseXML(workbookXml);
const sheets = Array.from(workbookDoc.getElementsByTagName('sheets')[0].getElementsByTagName('sheet'));
const sheetNames = sheets.map(sheet => sheet.getAttribute('name'));
const sheetIds = sheets.map(sheet => sheet.getAttribute('r:id'));
// Read the relations to get the actual filenames for each sheet
const workbookRelsXml = await entries['xl/_rels/workbook.xml.rels'].text();
const workbookRelsDoc = parseXML(workbookRelsXml);
const sheetRels = Array.from(workbookRelsDoc.getElementsByTagName('Relationship'));
const sheetFiles = sheetIds.map(id => sheetRels.find(rel => rel.getAttribute('Id') === id).getAttribute('Target').replace('worksheets/', ''));
// Read each sheet's XML and extract data in parallel
const sheetDataPs = sheetFiles.map((sheetFile, index) => (
entries[`xl/worksheets/${sheetFile}`].text().then(sheetXml => {
const sheetDoc = parseXML(sheetXml);
const rows = sheetDoc.getElementsByTagName('sheetData')[0].getElementsByTagName('row');
const sheetData = [];
for (let row of rows) {
const cells = row.getElementsByTagName('c');
const rowData = [];
for (let cell of cells) {
const cellType = cell.getAttribute('t') || 'n';
let cellValue = cell.getElementsByTagName('v')[0]?.textContent || '';
if (cellType === 's') {
cellValue = sharedStrings[parseInt(cellValue, 10)];
}
rowData.push({
value: cellValue,
type: cellType
});
}
sheetData.push(rowData);
}
return [sheetNames[index], sheetData];
})
));
return new Map(await Promise.all(sheetDataPs));
}
const parser = new DOMParser();
/**
* @param {string} str
* @returns {Document}
*/
function parseXML(str){
return parser.parseFromString(str, 'application/xml');
return (new _DOMParser()).parseFromString(str, 'application/xml');
}
import {
_getODSTableRawContent,
_getXLSXTableRawContent
} from './shared.js'
/**
*
* @param {File} file
* @returns {Promise<Map<SheetName, SheetRawContent>>}
* @param {ArrayBuffer} odsArrBuff
* @returns {ReturnType<_getODSTableRawContent>}
*/
export function getTableRawContentFromFile(file){
if(file.type === ODS_TYPE)
return getTableRawContentFromODSFile(file, unzip, parseXML)
if(file.type === XLSX_TYPE)
return getTableRawContentFromXSLXFile(file, unzip, parseXML)
throw new TypeError(`Unsupported file type: ${file.type} (${file.name})`)
}
/**
* Converts a cell value to the appropriate JavaScript type based on its cell type.
* @param {SheetCellRawContent} _
* @returns {number | boolean | string | Date} The converted value.
*/
function convertCellValue({value, type}) {
if(value === ''){
return ''
}
if(value === null || value === undefined){
return ''
}
switch (type) {
case 'float':
case 'percentage':
case 'currency':
case 'n': // number
return parseFloat(value);
case 'date':
case 'd': // date
return new Date(value);
case 'boolean':
case 'b': // boolean
return value === '1' || value === 'true';
case 's': // shared string
case 'inlineStr': // inline string
case 'string':
case 'e': // error
case 'time':
default:
return value;
}
export function getODSTableRawContent(odsArrBuff){
return _getODSTableRawContent(odsArrBuff, parseXML)
}
/**
* @param {SheetCellRawContent} rawCellContent
* @returns {boolean}
* @param {ArrayBuffer} xlsxArrBuff
* @returns {ReturnType<_getXLSXTableRawContent>}
*/
function isCellNotEmpty({value}){
return value !== '' && value !== null && value !== undefined
}
/**
* @param {SheetRowRawContent} rawContentRow
* @returns {boolean}
*/
function isRowNotEmpty(rawContentRow){
return rawContentRow.some(isCellNotEmpty)
}
/**
*
* @param {SheetRawContent} rawContent
* @returns {any[]}
*/
function rawContentToObjects(rawContent){
let [firstRow, ...dataRows] = rawContent
/** @type {string[]} */
//@ts-expect-error this type is correct after the filter
const columns = firstRow.filter(({value}) => typeof value === 'string' && value.length >= 1).map(r => r.value)
return dataRows
.filter(isRowNotEmpty) // remove empty rows
.map(row => {
const obj = Object.create(null)
columns.forEach((column, i) => {
const rawValue = row[i]
obj[column] = rawValue ? convertCellValue(rawValue) : ''
})
return obj
})
export function getXLSXTableRawContent(xlsxArrBuff){
return _getXLSXTableRawContent(xlsxArrBuff, parseXML)
}
/**
*
* @param {Map<SheetName, SheetRawContent>} rawContentSheets
* @returns {Map<SheetName, any[]>}
*/
export function tableRawContentToObjects(rawContentSheets){
return new Map(
[...rawContentSheets].map(([sheetName, rawContent]) => {
return [sheetName, rawContentToObjects(rawContent)]
})
)
}
export {
isRowNotEmpty,
// table-level exports
tableWithoutEmptyRows,
tableRawContentToValues,
tableRawContentToStrings,
tableRawContentToObjects,
} from './shared.js'

263
scripts/shared.js Normal file
View File

@ -0,0 +1,263 @@
//@ts-check
import { unzip } from 'unzipit';
import './types.js'
/**
* Extracts raw table content from an ODS file.
* @param {ArrayBuffer} arrayBuffer - The ODS file.
* @param {(str: String) => Document} parseXML - Function to parse XML content.
* @returns {Promise<Map<SheetName, SheetRawContent>>}
*/
export async function _getODSTableRawContent(arrayBuffer, parseXML) {
const zip = await unzip(arrayBuffer);
const entries = zip.entries;
// Extract the content.xml file which contains the spreadsheet data
const contentXml = await entries['content.xml'].text();
const contentDoc = parseXML(contentXml);
const tableMap = new Map();
const tables = contentDoc.getElementsByTagName('table:table');
for (let table of Array.from(tables)) {
const sheetName = table.getAttribute('table:name');
const rows = table.getElementsByTagName('table:table-row');
const sheetData = [];
for (let row of Array.from(rows)) {
const cells = row.getElementsByTagName('table:table-cell');
const rowData = [];
for (let cell of Array.from(cells)) {
const cellType = cell.getAttribute('office:value-type');
const cellValue = cellType === 'string' ? cell.textContent : cell.getAttribute('office:value');
rowData.push({
value: cellValue,
type: cellType
});
}
sheetData.push(rowData);
}
tableMap.set(sheetName, sheetData);
}
return tableMap;
}
/**
* Extracts raw table content from an XLSX file.
* @param {ArrayBuffer} arrayBuffer - The XLSX file.
* @param {(str: String) => Document} parseXML - Function to parse XML content.
* @returns {Promise<Map<SheetName, SheetRawContent>>}
*/
export async function _getXLSXTableRawContent(arrayBuffer, parseXML) {
const zip = await unzip(arrayBuffer);
const entries = zip.entries;
const sharedStringsXml = await entries['xl/sharedStrings.xml'].text();
const sharedStringsDoc = parseXML(sharedStringsXml);
const sharedStrings = Array.from(sharedStringsDoc.getElementsByTagName('sst')[0].getElementsByTagName('si')).map(si => si.textContent);
// Get sheet names and their corresponding XML files
const workbookXml = await entries['xl/workbook.xml'].text();
const workbookDoc = parseXML(workbookXml);
const sheets = Array.from(workbookDoc.getElementsByTagName('sheets')[0].getElementsByTagName('sheet'));
const sheetNames = sheets.map(sheet => sheet.getAttribute('name'));
const sheetIds = sheets.map(sheet => sheet.getAttribute('r:id'));
// Read the relations to get the actual filenames for each sheet
const workbookRelsXml = await entries['xl/_rels/workbook.xml.rels'].text();
const workbookRelsDoc = parseXML(workbookRelsXml);
const sheetRels = Array.from(workbookRelsDoc.getElementsByTagName('Relationship'));
const sheetFiles = sheetIds.map(id => sheetRels.find(rel => rel.getAttribute('Id') === id).getAttribute('Target').replace('worksheets/', ''));
// Read each sheet's XML and extract data in parallel
const sheetDataPs = sheetFiles.map((sheetFile, index) => (
entries[`xl/worksheets/${sheetFile}`].text().then(sheetXml => {
const sheetDoc = parseXML(sheetXml);
const rows = sheetDoc.getElementsByTagName('sheetData')[0].getElementsByTagName('row');
const sheetData = [];
for (let row of rows) {
const cells = row.getElementsByTagName('c');
const rowData = [];
for (let cell of cells) {
const cellType = cell.getAttribute('t') || 'n';
let cellValue = cell.getElementsByTagName('v')[0]?.textContent || '';
if (cellType === 's') {
cellValue = sharedStrings[parseInt(cellValue, 10)];
}
rowData.push({
value: cellValue,
type: cellType
});
}
sheetData.push(rowData);
}
return [sheetNames[index], sheetData];
})
));
return new Map(await Promise.all(sheetDataPs));
}
/**
* Converts a cell value to the appropriate JavaScript type based on its cell type.
* @param {SheetCellRawContent} _
* @returns {number | boolean | string | Date} The converted value.
*/
function convertCellValue({value, type}) {
if(value === ''){
return ''
}
if(value === null || value === undefined){
return ''
}
switch (type) {
case 'float':
case 'percentage':
case 'currency':
case 'n': // number
return parseFloat(value);
case 'date':
case 'd': // date
return new Date(value);
case 'boolean':
case 'b': // boolean
return value === '1' || value === 'true';
case 's': // shared string
case 'inlineStr': // inline string
case 'string':
case 'e': // error
case 'time':
default:
return value;
}
}
/**
*
* @param {SheetRawContent} rawContent
* @returns {any[]}
*/
export function rawContentToObjects(rawContent){
let [firstRow, ...dataRows] = rawContent
/** @type {string[]} */
//@ts-expect-error this type is correct after the filter
const columns = firstRow.filter(({value}) => typeof value === 'string' && value.length >= 1).map(r => r.value)
return dataRows
.map(row => {
const obj = Object.create(null)
columns.forEach((column, i) => {
const rawValue = row[i]
obj[column] = rawValue ? convertCellValue(rawValue) : ''
})
return obj
})
}
/**
*
* @param {Map<SheetName, SheetRawContent>} rawContentSheets
* @returns {Map<SheetName, ReturnType<convertCellValue>[][]>}
*/
export function tableRawContentToValues(rawContentSheets){
return new Map(
[...rawContentSheets].map(([sheetName, rawContent]) => {
return [
sheetName,
rawContent
.map(row => row.map(c => convertCellValue(c)))
]
})
)
}
/**
*
* @param {Map<SheetName, SheetRawContent>} rawContentSheets
* @returns {Map<SheetName, string[][]>}
*/
export function tableRawContentToStrings(rawContentSheets){
return new Map(
[...rawContentSheets].map(([sheetName, rawContent]) => {
return [
sheetName,
rawContent
.map(row => row.map(c => (c.value || '')))
]
})
)
}
/**
*
* @param {Map<SheetName, SheetRawContent>} rawContentSheets
* @returns {Map<SheetName, any[]>}
*/
export function tableRawContentToObjects(rawContentSheets){
return new Map(
[...rawContentSheets].map(([sheetName, rawContent]) => {
return [sheetName, rawContentToObjects(rawContent)]
})
)
}
/**
* @param {SheetCellRawContent} rawCellContent
* @returns {boolean}
*/
export function isCellFilled({value}){
return value !== '' && value !== null && value !== undefined
}
/**
* @param {SheetRowRawContent} rawContentRow
* @returns {boolean}
*/
export function isRowNotEmpty(rawContentRow){
return rawContentRow.some(isCellFilled)
}
/**
* @param {SheetRawContent} sheet
* @returns {SheetRawContent}
*/
export function removeEmptyRowsFromSheet(sheet){
return sheet.filter(isRowNotEmpty)
}
/**
*
* @param {Map<SheetName, SheetRawContent>} rawContentTable
* @returns {Map<SheetName, SheetRawContent>}
*/
export function tableWithoutEmptyRows(rawContentTable){
return new Map(
[...rawContentTable].map(([sheetName, rawContent]) => {
return [sheetName, removeEmptyRowsFromSheet(rawContent)]
})
)
}

11
scripts/types.js Normal file
View File

@ -0,0 +1,11 @@
/**
* @typedef SheetCellRawContent
* @prop {string | null | undefined} value
* @prop {'float' | 'percentage' | 'currency' | 'date' | 'time' | 'boolean' | 'string' | 'b' | 'd' | 'e' | 'inlineStr' | 'n' | 's' | 'str'} type
*/
/** @typedef {SheetCellRawContent[]} SheetRowRawContent */
/** @typedef {SheetRowRawContent[]} SheetRawContent */
/** @typedef {string} SheetName */

19
tests/basic-node.js Normal file
View File

@ -0,0 +1,19 @@
import {readFile} from 'node:fs/promises'
import test from 'ava';
import {getODSTableRawContent} from '../scripts/main.js'
const nomAgeContent = (await readFile('./tests/data/nom-age.ods')).buffer
test('basic', async t => {
const table = await getODSTableRawContent(nomAgeContent);
t.assert(table.has('Feuille1'))
const feuille1 = table.get('Feuille1')
t.assert(Array.isArray(feuille1))
//@ts-ignore
t.assert(Array.isArray(feuille1[0]))
//@ts-ignore
t.deepEqual(feuille1[0][0], {value: 'Nom', type: 'string'})
});

BIN
tests/data/nom-age.ods Normal file

Binary file not shown.