2024-06-18 11:06:41 +02:00
|
|
|
//@ts-check
|
2025-03-25 22:06:53 +01:00
|
|
|
import { Uint8ArrayReader, ZipReader, TextWriter } from '@zip.js/zip.js';
|
2024-06-18 11:06:41 +02:00
|
|
|
|
2025-04-17 17:39:08 +02:00
|
|
|
import {parseXML} from './DOMUtils.js'
|
|
|
|
|
|
2025-03-25 22:06:53 +01:00
|
|
|
/** @import {Entry} from '@zip.js/zip.js'*/
|
2025-09-16 16:43:47 +02:00
|
|
|
/** @import {SheetName, SheetRawContent, SheetRowRawContent, SheetCellRawContent, OdfjsImage} from './types.js' */
|
2024-06-18 11:06:41 +02:00
|
|
|
|
2025-03-25 22:06:53 +01:00
|
|
|
|
2024-07-24 22:09:08 +02:00
|
|
|
// https://dom.spec.whatwg.org/#interface-node
|
|
|
|
|
const TEXT_NODE = 3
|
|
|
|
|
|
|
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
*
|
|
|
|
|
* @param {Element} cell
|
2024-07-24 22:09:08 +02:00
|
|
|
* @returns {string}
|
|
|
|
|
*/
|
|
|
|
|
function extraxtODSCellText(cell) {
|
|
|
|
|
let text = '';
|
|
|
|
|
const childNodes = cell.childNodes;
|
|
|
|
|
for (const child of Array.from(childNodes)) {
|
|
|
|
|
if (child.nodeType === TEXT_NODE) {
|
|
|
|
|
// Direct text node, append the text directly
|
|
|
|
|
text += child.nodeValue;
|
|
|
|
|
} else if (child.nodeName === 'text:p') {
|
|
|
|
|
if (text.length > 0) {
|
|
|
|
|
// Add a newline before appending new paragraph if text already exists
|
|
|
|
|
text += '\n';
|
|
|
|
|
}
|
|
|
|
|
const pNodes = child.childNodes;
|
|
|
|
|
for (const pChild of Array.from(pNodes)) {
|
|
|
|
|
if (pChild.nodeType === TEXT_NODE) {
|
|
|
|
|
text += pChild.nodeValue; // Append text inside <text:p>
|
|
|
|
|
} else if (pChild.nodeName === 'text:line-break') {
|
|
|
|
|
text += '\n'; // Append newline for <text:line-break />
|
2025-09-22 16:34:43 +02:00
|
|
|
} else if (pChild.nodeName === 'text:a' || pChild.nodeName === 'text:span') {
|
2025-09-18 10:36:59 +02:00
|
|
|
text += pChild.textContent
|
2024-07-24 22:09:08 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (child.nodeName === 'text:line-break') {
|
|
|
|
|
text += '\n'; // Append newline for <text:line-break /> directly under <table:table-cell>
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-09-22 16:34:43 +02:00
|
|
|
|
2024-07-24 22:09:08 +02:00
|
|
|
return text.trim();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2024-06-18 11:06:41 +02:00
|
|
|
/**
|
|
|
|
|
* Extracts raw table content from an ODS file.
|
|
|
|
|
* @param {ArrayBuffer} arrayBuffer - The ODS file.
|
|
|
|
|
* @returns {Promise<Map<SheetName, SheetRawContent>>}
|
|
|
|
|
*/
|
2025-04-17 17:39:08 +02:00
|
|
|
export async function getODSTableRawContent(arrayBuffer) {
|
2025-03-25 22:06:53 +01:00
|
|
|
const zipDataReader = new Uint8ArrayReader(new Uint8Array(arrayBuffer));
|
|
|
|
|
const zipReader = new ZipReader(zipDataReader);
|
|
|
|
|
const zipEntries = await zipReader.getEntries()
|
|
|
|
|
await zipReader.close();
|
|
|
|
|
|
|
|
|
|
/** @type {Map<Entry['filename'], Entry>} */
|
|
|
|
|
const entryByFilename = new Map()
|
|
|
|
|
for(const entry of zipEntries){
|
|
|
|
|
const filename = entry.filename
|
|
|
|
|
entryByFilename.set(filename, entry)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const contentXmlEntry = entryByFilename.get('content.xml')
|
|
|
|
|
|
|
|
|
|
if(!contentXmlEntry){
|
|
|
|
|
throw new TypeError(`entry 'content.xml' manquante dans le zip`)
|
|
|
|
|
}
|
2024-06-18 11:06:41 +02:00
|
|
|
|
|
|
|
|
// Extract the content.xml file which contains the spreadsheet data
|
2025-03-25 22:06:53 +01:00
|
|
|
|
|
|
|
|
//@ts-ignore
|
|
|
|
|
const contentXml = await contentXmlEntry.getData(new TextWriter());
|
|
|
|
|
//console.log('contentXml', contentXml);
|
|
|
|
|
|
2024-06-18 11:06:41 +02:00
|
|
|
const contentDoc = parseXML(contentXml);
|
|
|
|
|
|
|
|
|
|
const tableMap = new Map();
|
|
|
|
|
|
|
|
|
|
const tables = contentDoc.getElementsByTagName('table:table');
|
|
|
|
|
|
|
|
|
|
for (let table of Array.from(tables)) {
|
|
|
|
|
const sheetName = table.getAttribute('table:name');
|
|
|
|
|
const rows = table.getElementsByTagName('table:table-row');
|
|
|
|
|
const sheetData = [];
|
|
|
|
|
|
|
|
|
|
for (let row of Array.from(rows)) {
|
|
|
|
|
const cells = row.getElementsByTagName('table:table-cell');
|
|
|
|
|
const rowData = [];
|
|
|
|
|
|
|
|
|
|
for (let cell of Array.from(cells)) {
|
|
|
|
|
const cellType = cell.getAttribute('office:value-type');
|
2024-07-08 15:06:36 +02:00
|
|
|
let cellValue;
|
|
|
|
|
|
|
|
|
|
if (cellType === 'string') {
|
2024-07-24 22:09:08 +02:00
|
|
|
cellValue = extraxtODSCellText(cell)
|
2024-07-08 15:06:36 +02:00
|
|
|
} else if (cellType === 'date') {
|
|
|
|
|
cellValue = cell.getAttribute('office:date-value');
|
|
|
|
|
} else {
|
|
|
|
|
cellValue = cell.getAttribute('office:value');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const numberOfColumnsRepeated = cell.getAttribute('table:number-columns-repeated');
|
|
|
|
|
const repeatCount = numberOfColumnsRepeated ? parseInt(numberOfColumnsRepeated, 10) : 1;
|
|
|
|
|
if(repeatCount < 100){ // ignore excessive repetitions
|
|
|
|
|
for (let i = 0; i < repeatCount; i++) {
|
|
|
|
|
rowData.push({
|
|
|
|
|
value: cellValue,
|
|
|
|
|
type: cellType
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-06-18 11:06:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sheetData.push(rowData);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tableMap.set(sheetName, sheetData);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return tableMap;
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-08 15:06:36 +02:00
|
|
|
|
2024-06-18 11:06:41 +02:00
|
|
|
/**
|
|
|
|
|
* Converts a cell value to the appropriate JavaScript type based on its cell type.
|
2025-09-22 16:34:43 +02:00
|
|
|
* @param {SheetCellRawContent} _
|
2024-06-18 11:06:41 +02:00
|
|
|
* @returns {number | boolean | string | Date} The converted value.
|
|
|
|
|
*/
|
2024-06-18 11:49:57 +02:00
|
|
|
export function convertCellValue({value, type}) {
|
2024-06-18 11:06:41 +02:00
|
|
|
if(value === ''){
|
|
|
|
|
return ''
|
|
|
|
|
}
|
|
|
|
|
if(value === null || value === undefined){
|
|
|
|
|
return ''
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
|
case 'float':
|
|
|
|
|
case 'percentage':
|
|
|
|
|
case 'currency':
|
|
|
|
|
case 'n': // number
|
|
|
|
|
return parseFloat(value);
|
|
|
|
|
case 'date':
|
|
|
|
|
case 'd': // date
|
|
|
|
|
return new Date(value);
|
|
|
|
|
case 'boolean':
|
|
|
|
|
case 'b': // boolean
|
|
|
|
|
return value === '1' || value === 'true';
|
|
|
|
|
case 's': // shared string
|
|
|
|
|
case 'inlineStr': // inline string
|
|
|
|
|
case 'string':
|
|
|
|
|
case 'e': // error
|
|
|
|
|
case 'time':
|
|
|
|
|
default:
|
|
|
|
|
return value;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2025-09-16 16:43:47 +02:00
|
|
|
/**
|
|
|
|
|
* @param {unknown} value
|
2025-09-22 16:34:43 +02:00
|
|
|
* @returns {value is OdfjsImage}
|
2025-09-16 16:43:47 +02:00
|
|
|
*/
|
|
|
|
|
export function isOdfjsImage(value) {
|
2025-09-22 16:34:43 +02:00
|
|
|
if (typeof value === 'object' && value!==null
|
2025-09-16 16:43:47 +02:00
|
|
|
&& "content" in value && value.content instanceof ArrayBuffer
|
|
|
|
|
&& "fileName" in value && typeof value.fileName === 'string'
|
|
|
|
|
&& "mediaType" in value && typeof value.mediaType === 'string'
|
|
|
|
|
) {
|
|
|
|
|
return true
|
|
|
|
|
} else {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-18 11:06:41 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
*
|
|
|
|
|
* @param {Map<SheetName, SheetRawContent>} rawContentSheets
|
2024-06-18 11:06:41 +02:00
|
|
|
* @returns {Map<SheetName, ReturnType<convertCellValue>[][]>}
|
|
|
|
|
*/
|
|
|
|
|
export function tableRawContentToValues(rawContentSheets){
|
|
|
|
|
return new Map(
|
|
|
|
|
[...rawContentSheets].map(([sheetName, rawContent]) => {
|
|
|
|
|
return [
|
2025-09-22 16:34:43 +02:00
|
|
|
sheetName,
|
2024-06-18 11:06:41 +02:00
|
|
|
rawContent
|
|
|
|
|
.map(row => row.map(c => convertCellValue(c)))
|
|
|
|
|
]
|
|
|
|
|
})
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-18 11:49:57 +02:00
|
|
|
/**
|
|
|
|
|
* Convert values to strings
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
*
|
2024-06-18 11:49:57 +02:00
|
|
|
* @param {SheetCellRawContent} rawContentCell
|
|
|
|
|
* @returns {string}
|
|
|
|
|
*/
|
|
|
|
|
export function cellRawContentToStrings(rawContentCell){
|
|
|
|
|
return rawContentCell.value || ''
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
*
|
|
|
|
|
* @param {SheetRowRawContent} rawContentRow
|
2024-06-18 11:49:57 +02:00
|
|
|
* @returns {string[]}
|
|
|
|
|
*/
|
|
|
|
|
export function rowRawContentToStrings(rawContentRow){
|
|
|
|
|
return rawContentRow.map(cellRawContentToStrings)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
*
|
|
|
|
|
* @param {SheetRawContent} rawContentSheet
|
2024-06-18 11:49:57 +02:00
|
|
|
* @returns {string[][]}
|
|
|
|
|
*/
|
|
|
|
|
export function sheetRawContentToStrings(rawContentSheet){
|
|
|
|
|
return rawContentSheet.map(rowRawContentToStrings)
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-18 11:06:41 +02:00
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
*
|
|
|
|
|
* @param {Map<SheetName, SheetRawContent>} rawContentSheets
|
2024-06-18 11:06:41 +02:00
|
|
|
* @returns {Map<SheetName, string[][]>}
|
|
|
|
|
*/
|
|
|
|
|
export function tableRawContentToStrings(rawContentSheets){
|
|
|
|
|
return new Map(
|
|
|
|
|
[...rawContentSheets].map(([sheetName, rawContent]) => {
|
2024-06-18 11:49:57 +02:00
|
|
|
return [ sheetName, sheetRawContentToStrings(rawContent) ]
|
2024-06-18 11:06:41 +02:00
|
|
|
})
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-06-18 11:49:57 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert rows to objects
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* This function expects the first row to contain string values which are used as column names
|
2025-09-22 16:34:43 +02:00
|
|
|
* It outputs an array of objects which keys are
|
2024-06-18 11:49:57 +02:00
|
|
|
*
|
2025-09-22 16:34:43 +02:00
|
|
|
* @param {SheetRawContent} rawContent
|
2024-06-18 11:49:57 +02:00
|
|
|
* @returns {any[]}
|
|
|
|
|
*/
|
|
|
|
|
export function sheetRawContentToObjects(rawContent){
|
|
|
|
|
let [firstRow, ...dataRows] = rawContent
|
|
|
|
|
|
|
|
|
|
/** @type {string[]} */
|
2025-09-22 16:34:43 +02:00
|
|
|
|
2024-07-16 17:00:28 +02:00
|
|
|
const columns = firstRow.map((r, i) => {
|
|
|
|
|
if (r.value === undefined || r.value === null || r.value === "") {
|
|
|
|
|
return `Column ${i+1}`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return r.value
|
|
|
|
|
})
|
2024-06-18 11:49:57 +02:00
|
|
|
|
|
|
|
|
return dataRows
|
|
|
|
|
.map(row => {
|
|
|
|
|
const obj = Object.create(null)
|
|
|
|
|
columns.forEach((column, i) => {
|
|
|
|
|
const rawValue = row[i]
|
|
|
|
|
obj[column] = rawValue ? convertCellValue(rawValue) : ''
|
|
|
|
|
})
|
|
|
|
|
return obj
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-18 11:06:41 +02:00
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
*
|
|
|
|
|
* @param {Map<SheetName, SheetRawContent>} rawContentSheets
|
2024-06-18 11:06:41 +02:00
|
|
|
* @returns {Map<SheetName, any[]>}
|
|
|
|
|
*/
|
|
|
|
|
export function tableRawContentToObjects(rawContentSheets){
|
|
|
|
|
return new Map(
|
|
|
|
|
[...rawContentSheets].map(([sheetName, rawContent]) => {
|
2024-06-18 11:49:57 +02:00
|
|
|
return [sheetName, sheetRawContentToObjects(rawContent)]
|
2024-06-18 11:06:41 +02:00
|
|
|
})
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2024-06-18 11:49:57 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Emptiness
|
|
|
|
|
*/
|
|
|
|
|
|
2024-06-18 11:06:41 +02:00
|
|
|
/**
|
|
|
|
|
* @param {SheetCellRawContent} rawCellContent
|
|
|
|
|
* @returns {boolean}
|
|
|
|
|
*/
|
|
|
|
|
export function isCellFilled({value}){
|
|
|
|
|
return value !== '' && value !== null && value !== undefined
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
* @param {SheetRowRawContent} rawContentRow
|
2024-06-18 11:06:41 +02:00
|
|
|
* @returns {boolean}
|
|
|
|
|
*/
|
|
|
|
|
export function isRowNotEmpty(rawContentRow){
|
|
|
|
|
return rawContentRow.some(isCellFilled)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
* @param {SheetRawContent} sheet
|
2024-06-18 11:06:41 +02:00
|
|
|
* @returns {SheetRawContent}
|
|
|
|
|
*/
|
|
|
|
|
export function removeEmptyRowsFromSheet(sheet){
|
|
|
|
|
return sheet.filter(isRowNotEmpty)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2025-09-22 16:34:43 +02:00
|
|
|
*
|
|
|
|
|
* @param {Map<SheetName, SheetRawContent>} rawContentTable
|
2024-06-18 11:06:41 +02:00
|
|
|
* @returns {Map<SheetName, SheetRawContent>}
|
|
|
|
|
*/
|
|
|
|
|
export function tableWithoutEmptyRows(rawContentTable){
|
|
|
|
|
return new Map(
|
|
|
|
|
[...rawContentTable].map(([sheetName, rawContent]) => {
|
|
|
|
|
return [sheetName, removeEmptyRowsFromSheet(rawContent)]
|
|
|
|
|
})
|
|
|
|
|
)
|
2025-09-22 16:34:43 +02:00
|
|
|
}
|