expose odt text function (#1)
* Remove xlsx support * Restructure exports to avoid duplication of DOM-related code * browser DOM exports * Fixing exports field in package.json
This commit is contained in:
parent
c345323524
commit
5a539f333d
29
exports.js
Normal file
29
exports.js
Normal file
@ -0,0 +1,29 @@
|
||||
//@ts-check
|
||||
|
||||
export {default as fillOdtTemplate} from './scripts/odf/fillOdtTemplate.js'
|
||||
export {getOdtTextContent} from './scripts/odf/odt/getOdtTextContent.js'
|
||||
|
||||
export { createOdsFile } from './scripts/createOdsFile.js'
|
||||
|
||||
export {
|
||||
getODSTableRawContent,
|
||||
|
||||
// table-level exports
|
||||
tableWithoutEmptyRows,
|
||||
tableRawContentToValues,
|
||||
tableRawContentToStrings,
|
||||
tableRawContentToObjects,
|
||||
|
||||
// sheet-level exports
|
||||
sheetRawContentToObjects,
|
||||
sheetRawContentToStrings,
|
||||
|
||||
// row-level exports
|
||||
rowRawContentToStrings,
|
||||
isRowNotEmpty,
|
||||
|
||||
// cell-level exports
|
||||
cellRawContentToStrings,
|
||||
convertCellValue
|
||||
} from './scripts/shared.js'
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
<meta name="referrer" content="no-referrer">
|
||||
<link rel="icon" href="data:,">
|
||||
|
||||
<title>Upload ods/xlsx</title>
|
||||
<title>Upload ods file</title>
|
||||
|
||||
<meta name="description" content=" ">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
|
||||
@ -2,8 +2,13 @@
|
||||
"name": "@odfjs/odfjs",
|
||||
"version": "0.14.0",
|
||||
"type": "module",
|
||||
"main": "./scripts/node.js",
|
||||
"browser": "./scripts/browser.js",
|
||||
"exports": "./exports.js",
|
||||
"imports": {
|
||||
"#DOM": {
|
||||
"node": "./scripts/DOM/node.js",
|
||||
"browser": "./scripts/DOM/browser.js"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"build": "rollup -c",
|
||||
"dev": "npm-run-all --parallel dev:* start",
|
||||
|
||||
10
readme.md
10
readme.md
@ -6,7 +6,7 @@ Small lib to parse/understand .odf files (.odt, .ods) in the browser and node.js
|
||||
## Rough roadmap
|
||||
|
||||
- [x] add odt templating
|
||||
- [ ] remove support for xlsx
|
||||
- [x] remove support for xlsx
|
||||
- [ ] add a .ods minifyer
|
||||
- [ ] add a generic .ods visualizer
|
||||
- [ ] move to a dedicated odf docs org
|
||||
@ -22,7 +22,7 @@ npm i https://github.com/odfjs/odfjs.git#v0.14.0
|
||||
```
|
||||
|
||||
|
||||
### Basic - reading an ods/xlsx file
|
||||
### Basic - reading an ods file
|
||||
|
||||
```js
|
||||
import {tableRawContentToObjects, tableWithoutEmptyRows, getODSTableRawContent} from '@odfjs/odfjs'
|
||||
@ -40,14 +40,14 @@ async function getFileData(odsFile){
|
||||
|
||||
The return value is an array of objects where
|
||||
the **keys** are the column names in the first row and
|
||||
the **values** are automatically converted from the .ods or .xlsx files (which type numbers, strings, booleans and dates)
|
||||
the **values** are automatically converted from the .ods files (which type numbers, strings, booleans and dates)
|
||||
to the appropriate JavaScript value
|
||||
|
||||
|
||||
### Basic - creating an ods file
|
||||
|
||||
```js
|
||||
import {createOdsFile} from 'ods-xlsx'
|
||||
import {createOdsFile} from '@odfjs/odfjs'
|
||||
|
||||
const content = new Map([
|
||||
[
|
||||
@ -128,7 +128,7 @@ They can be used to generate lists or tables in .odt files from data and a templ
|
||||
|
||||
### Demo
|
||||
|
||||
https://davidbruant.github.io/ods-xlsx/
|
||||
https://odfjs.github.io/odfjs/
|
||||
|
||||
|
||||
## Local dev
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
<script>
|
||||
//@ts-check
|
||||
import {tableRawContentToObjects, tableWithoutEmptyRows, getODSTableRawContent, createOdsFile} from '../exports.js'
|
||||
|
||||
import {tableRawContentToObjects, tableWithoutEmptyRows, getODSTableRawContent, getXLSXTableRawContent, createOdsFile} from './browser.js'
|
||||
/** @import {SheetName, SheetRawContent} from './types.js' */
|
||||
|
||||
const ODS_TYPE = "application/vnd.oasis.opendocument.spreadsheet";
|
||||
const XLSX_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
@ -15,9 +16,6 @@
|
||||
if(file.type === ODS_TYPE)
|
||||
return getODSTableRawContent(await file.arrayBuffer())
|
||||
|
||||
if(file.type === XLSX_TYPE)
|
||||
return getXLSXTableRawContent(await file.arrayBuffer())
|
||||
|
||||
throw new TypeError(`Unsupported file type: ${file.type} (${file.name})`)
|
||||
}
|
||||
|
||||
@ -36,13 +34,13 @@
|
||||
|
||||
</script>
|
||||
|
||||
<h1>Import fichier .ods et .xslx</h1>
|
||||
<h1>Import fichier .ods</h1>
|
||||
|
||||
<section>
|
||||
<h2>Import</h2>
|
||||
<label>
|
||||
Fichier à importer:
|
||||
<input bind:files type="file" id="file-input" accept="{ ['.ods', '.xlsx', ODS_TYPE, XLSX_TYPE].join(',') }" />
|
||||
<input bind:files type="file" id="file-input" accept="{ ['.ods', ODS_TYPE].join(',') }" />
|
||||
</label>
|
||||
</section>
|
||||
|
||||
|
||||
12
scripts/DOM/browser.js
Normal file
12
scripts/DOM/browser.js
Normal file
@ -0,0 +1,12 @@
|
||||
|
||||
console.info('DOM implementation in browser')
|
||||
|
||||
/** @type { typeof DOMImplementation.prototype.createDocument } */
|
||||
export function createDocument(...args){
|
||||
// @ts-ignore
|
||||
return document.implementation.createDocument(...args)
|
||||
}
|
||||
|
||||
export const DOMParser = window.DOMParser
|
||||
export const XMLSerializer = window.XMLSerializer
|
||||
export const Node = window.Node
|
||||
17
scripts/DOM/node.js
Normal file
17
scripts/DOM/node.js
Normal file
@ -0,0 +1,17 @@
|
||||
import { DOMImplementation } from "@xmldom/xmldom"
|
||||
|
||||
console.info('DOM implementation in Node.js based on xmldom')
|
||||
|
||||
const implementation = new DOMImplementation()
|
||||
|
||||
/** @type { typeof DOMImplementation.prototype.createDocument } */
|
||||
export function createDocument(...args){
|
||||
// @ts-ignore
|
||||
return implementation.createDocument(...args)
|
||||
}
|
||||
|
||||
export {
|
||||
DOMParser,
|
||||
XMLSerializer,
|
||||
Node
|
||||
} from "@xmldom/xmldom"
|
||||
@ -1,8 +1,28 @@
|
||||
import {DOMParser, XMLSerializer} from '#DOM'
|
||||
|
||||
/*
|
||||
Since we're using xmldom in Node.js context, the entire DOM API is not implemented
|
||||
Functions here are helpers whild xmldom becomes more complete
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} str
|
||||
* @returns {Document}
|
||||
*/
|
||||
export function parseXML(str){
|
||||
return (new DOMParser()).parseFromString(str, 'application/xml');
|
||||
}
|
||||
|
||||
const serializer = new XMLSerializer()
|
||||
|
||||
/** @type { typeof XMLSerializer.prototype.serializeToString } */
|
||||
export function serializeToString(node){
|
||||
return serializer.serializeToString(node)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Traverses a DOM tree starting from the given element and applies the visit function
|
||||
* to each Element node encountered in tree order (depth-first).
|
||||
@ -21,3 +41,10 @@ export function traverse(node, visit) {
|
||||
|
||||
visit(node);
|
||||
}
|
||||
|
||||
export {
|
||||
DOMParser,
|
||||
XMLSerializer,
|
||||
createDocument,
|
||||
Node
|
||||
} from '#DOM'
|
||||
@ -1,88 +0,0 @@
|
||||
//@ts-check
|
||||
|
||||
import {
|
||||
_getODSTableRawContent,
|
||||
_getXLSXTableRawContent
|
||||
} from './shared.js'
|
||||
|
||||
import {_createOdsFile} from './createOdsFile.js'
|
||||
|
||||
import _fillOdtTemplate from './odf/fillOdtTemplate.js'
|
||||
|
||||
|
||||
/** @import {SheetCellRawContent, SheetName, SheetRawContent} from './types.js' */
|
||||
/** @import {ODTFile} from './odf/fillOdtTemplate.js' */
|
||||
|
||||
|
||||
function parseXML(str){
|
||||
return (new DOMParser()).parseFromString(str, 'application/xml');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ArrayBuffer} odsArrBuff
|
||||
* @returns {ReturnType<_getODSTableRawContent>}
|
||||
*/
|
||||
export function getODSTableRawContent(odsArrBuff){
|
||||
return _getODSTableRawContent(odsArrBuff, parseXML)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ArrayBuffer} xlsxArrBuff
|
||||
* @returns {ReturnType<_getXLSXTableRawContent>}
|
||||
*/
|
||||
export function getXLSXTableRawContent(xlsxArrBuff){
|
||||
return _getXLSXTableRawContent(xlsxArrBuff, parseXML)
|
||||
}
|
||||
|
||||
|
||||
/** @type { typeof DOMImplementation.prototype.createDocument } */
|
||||
const createDocument = function createDocument(...args){
|
||||
// @ts-ignore
|
||||
return document.implementation.createDocument(...args)
|
||||
}
|
||||
|
||||
const serializer = new XMLSerializer()
|
||||
|
||||
/** @type { typeof XMLSerializer.prototype.serializeToString } */
|
||||
const serializeToString = function serializeToString(node){
|
||||
return serializer.serializeToString(node)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ODTFile} odtTemplate
|
||||
* @param {any} data
|
||||
* @returns {Promise<ODTFile>}
|
||||
*/
|
||||
export function fillOdtTemplate(odtTemplate, data){
|
||||
return _fillOdtTemplate(odtTemplate, data, parseXML, serializeToString, Node)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param {Map<SheetName, SheetRawContent>} sheetsData
|
||||
*/
|
||||
export function createOdsFile(sheetsData){
|
||||
return _createOdsFile(sheetsData, createDocument, serializeToString)
|
||||
}
|
||||
|
||||
|
||||
export {
|
||||
// table-level exports
|
||||
tableWithoutEmptyRows,
|
||||
tableRawContentToValues,
|
||||
tableRawContentToStrings,
|
||||
tableRawContentToObjects,
|
||||
|
||||
// sheet-level exports
|
||||
sheetRawContentToObjects,
|
||||
sheetRawContentToStrings,
|
||||
|
||||
// row-level exports
|
||||
rowRawContentToStrings,
|
||||
isRowNotEmpty,
|
||||
|
||||
// cell-level exports
|
||||
cellRawContentToStrings,
|
||||
convertCellValue
|
||||
} from './shared.js'
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
import { ZipWriter, BlobWriter, TextReader } from '@zip.js/zip.js';
|
||||
|
||||
import {serializeToString, createDocument} from './DOMUtils.js'
|
||||
|
||||
|
||||
/** @import {SheetCellRawContent, SheetName, SheetRawContent} from './types.js' */
|
||||
|
||||
const stylesXml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
@ -22,11 +25,9 @@ const manifestXml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
/**
|
||||
* Crée un fichier .ods à partir d'un Map de feuilles de calcul
|
||||
* @param {Map<SheetName, SheetRawContent>} sheetsData
|
||||
* @param {typeof DOMImplementation.prototype.createDocument} createDocument
|
||||
* @param {typeof XMLSerializer.prototype.serializeToString} serializeToString
|
||||
* @returns {Promise<ArrayBuffer>}
|
||||
*/
|
||||
export async function _createOdsFile(sheetsData, createDocument, serializeToString) {
|
||||
export async function createOdsFile(sheetsData) {
|
||||
// Create a new zip writer
|
||||
const zipWriter = new ZipWriter(new BlobWriter('application/vnd.oasis.opendocument.spreadsheet'));
|
||||
|
||||
@ -44,7 +45,7 @@ export async function _createOdsFile(sheetsData, createDocument, serializeToStri
|
||||
}
|
||||
);
|
||||
|
||||
const contentXml = generateContentFileXMLString(sheetsData, createDocument, serializeToString);
|
||||
const contentXml = generateContentFileXMLString(sheetsData);
|
||||
zipWriter.add("content.xml", new TextReader(contentXml), {level: 9});
|
||||
|
||||
zipWriter.add("styles.xml", new TextReader(stylesXml));
|
||||
@ -60,11 +61,9 @@ export async function _createOdsFile(sheetsData, createDocument, serializeToStri
|
||||
/**
|
||||
* Generate the content.xml file with spreadsheet data
|
||||
* @param {Map<SheetName, SheetRawContent>} sheetsData
|
||||
* @param {typeof DOMImplementation.prototype.createDocument} createDocument
|
||||
* @param {typeof XMLSerializer.prototype.serializeToString} serializeToString
|
||||
* @returns {string}
|
||||
*/
|
||||
function generateContentFileXMLString(sheetsData, createDocument, serializeToString) {
|
||||
function generateContentFileXMLString(sheetsData) {
|
||||
const doc = createDocument('urn:oasis:names:tc:opendocument:xmlns:office:1.0', 'office:document-content');
|
||||
const root = doc.documentElement;
|
||||
|
||||
|
||||
@ -1,94 +0,0 @@
|
||||
//@ts-check
|
||||
|
||||
import {DOMParser, DOMImplementation, XMLSerializer, Node} from '@xmldom/xmldom'
|
||||
|
||||
import {
|
||||
_getODSTableRawContent,
|
||||
_getXLSXTableRawContent
|
||||
} from './shared.js'
|
||||
import { _createOdsFile } from './createOdsFile.js'
|
||||
|
||||
import _fillOdtTemplate from './odf/fillOdtTemplate.js'
|
||||
|
||||
/** @import {SheetCellRawContent, SheetName, SheetRawContent} from './types.js' */
|
||||
/** @import {ODTFile} from './odf/fillOdtTemplate.js' */
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} str
|
||||
* @returns {Document}
|
||||
*/
|
||||
function parseXML(str){
|
||||
return (new DOMParser()).parseFromString(str, 'application/xml');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param {ArrayBuffer} odsArrBuff
|
||||
* @returns {ReturnType<_getODSTableRawContent>}
|
||||
*/
|
||||
export function getODSTableRawContent(odsArrBuff){
|
||||
return _getODSTableRawContent(odsArrBuff, parseXML)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ArrayBuffer} xlsxArrBuff
|
||||
* @returns {ReturnType<_getXLSXTableRawContent>}
|
||||
*/
|
||||
export function getXLSXTableRawContent(xlsxArrBuff){
|
||||
return _getXLSXTableRawContent(xlsxArrBuff, parseXML)
|
||||
}
|
||||
|
||||
const implementation = new DOMImplementation()
|
||||
|
||||
/** @type { typeof DOMImplementation.prototype.createDocument } */
|
||||
const createDocument = function createDocument(...args){
|
||||
// @ts-ignore
|
||||
return implementation.createDocument(...args)
|
||||
}
|
||||
|
||||
const serializer = new XMLSerializer()
|
||||
|
||||
/** @type { typeof XMLSerializer.prototype.serializeToString } */
|
||||
const serializeToString = function serializeToString(node){
|
||||
return serializer.serializeToString(node)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ODTFile} odtTemplate
|
||||
* @param {any} data
|
||||
* @returns {Promise<ODTFile>}
|
||||
*/
|
||||
export function fillOdtTemplate(odtTemplate, data){
|
||||
return _fillOdtTemplate(odtTemplate, data, parseXML, serializeToString, Node)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param {Map<SheetName, SheetRawContent>} sheetsData
|
||||
*/
|
||||
export function createOdsFile(sheetsData){
|
||||
return _createOdsFile(sheetsData, createDocument, serializeToString)
|
||||
}
|
||||
|
||||
export {
|
||||
// table-level exports
|
||||
tableWithoutEmptyRows,
|
||||
tableRawContentToValues,
|
||||
tableRawContentToStrings,
|
||||
tableRawContentToObjects,
|
||||
|
||||
// sheet-level exports
|
||||
sheetRawContentToObjects,
|
||||
sheetRawContentToStrings,
|
||||
|
||||
// row-level exports
|
||||
rowRawContentToStrings,
|
||||
isRowNotEmpty,
|
||||
|
||||
// cell-level exports
|
||||
cellRawContentToStrings,
|
||||
convertCellValue
|
||||
} from './shared.js'
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { ZipReader, ZipWriter, BlobReader, BlobWriter, TextReader, Uint8ArrayReader, TextWriter, Uint8ArrayWriter } from '@zip.js/zip.js';
|
||||
|
||||
import {traverse} from '../DOMUtils.js'
|
||||
import {traverse, parseXML, serializeToString, Node} from '../DOMUtils.js'
|
||||
import {makeManifestFile, getManifestFileData} from './manifest.js';
|
||||
|
||||
/** @import {Reader, ZipWriterAddDataOptions} from '@zip.js/zip.js' */
|
||||
@ -344,12 +344,9 @@ function keepFile(filename){
|
||||
/**
|
||||
* @param {ODTFile} odtTemplate
|
||||
* @param {any} data
|
||||
* @param {Function} parseXML
|
||||
* @param {typeof XMLSerializer.prototype.serializeToString} serializeToString
|
||||
* @param {typeof Node} Node
|
||||
* @returns {Promise<ODTFile>}
|
||||
*/
|
||||
export default async function _fillOdtTemplate(odtTemplate, data, parseXML, serializeToString, Node) {
|
||||
export default async function fillOdtTemplate(odtTemplate, data) {
|
||||
|
||||
const reader = new ZipReader(new Uint8ArrayReader(new Uint8Array(odtTemplate)));
|
||||
|
||||
|
||||
69
scripts/odf/odt/getOdtTextContent.js
Normal file
69
scripts/odf/odt/getOdtTextContent.js
Normal file
@ -0,0 +1,69 @@
|
||||
import { ZipReader, Uint8ArrayReader, TextWriter } from '@zip.js/zip.js';
|
||||
import {parseXML, Node} from '../../DOMUtils.js'
|
||||
|
||||
/** @import {ODTFile} from '../fillOdtTemplate.js' */
|
||||
|
||||
/**
|
||||
* @param {ODTFile} odtFile
|
||||
* @returns {Promise<Document>}
|
||||
*/
|
||||
async function getContentDocument(odtFile) {
|
||||
const reader = new ZipReader(new Uint8ArrayReader(new Uint8Array(odtFile)));
|
||||
|
||||
const entries = await reader.getEntries();
|
||||
|
||||
const contentEntry = entries.find(entry => entry.filename === 'content.xml');
|
||||
|
||||
if (!contentEntry) {
|
||||
throw new Error('No content.xml found in the ODT file');
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
const contentText = await contentEntry.getData(new TextWriter());
|
||||
await reader.close();
|
||||
|
||||
return parseXML(contentText)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Document} odtDocument
|
||||
* @returns {Element}
|
||||
*/
|
||||
function getODTTextElement(odtDocument) {
|
||||
return odtDocument.getElementsByTagName('office:body')[0]
|
||||
.getElementsByTagName('office:text')[0]
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts plain text content from an ODT file, preserving line breaks
|
||||
* @param {ArrayBuffer} odtFile - The ODT file as an ArrayBuffer
|
||||
* @returns {Promise<string>} Extracted text content
|
||||
*/
|
||||
export async function getOdtTextContent(odtFile) {
|
||||
const contentDocument = await getContentDocument(odtFile)
|
||||
const odtTextElement = getODTTextElement(contentDocument)
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Element} element
|
||||
* @returns {string}
|
||||
*/
|
||||
function getElementTextContent(element){
|
||||
//console.log('tagName', element.tagName)
|
||||
if(element.tagName === 'text:h' || element.tagName === 'text:p')
|
||||
return element.textContent + '\n'
|
||||
else{
|
||||
const descendantTexts = Array.from(element.childNodes)
|
||||
.filter(n => n.nodeType === Node.ELEMENT_NODE)
|
||||
.map(getElementTextContent)
|
||||
|
||||
if(element.tagName === 'text:list-item')
|
||||
return `- ${descendantTexts.join('')}`
|
||||
|
||||
return descendantTexts.join('')
|
||||
}
|
||||
}
|
||||
|
||||
return getElementTextContent(odtTextElement)
|
||||
}
|
||||
@ -1,23 +1,5 @@
|
||||
import { readFile } from 'node:fs/promises'
|
||||
|
||||
import { ZipReader, Uint8ArrayReader, TextWriter } from '@zip.js/zip.js';
|
||||
import {DOMParser, Node} from '@xmldom/xmldom'
|
||||
|
||||
|
||||
/** @import {ODTFile} from './fillOdtTemplate.js' */
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Document} odtDocument
|
||||
* @returns {Element}
|
||||
*/
|
||||
function getODTTextElement(odtDocument) {
|
||||
return odtDocument.getElementsByTagName('office:body')[0]
|
||||
.getElementsByTagName('office:text')[0]
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} path
|
||||
@ -27,61 +9,3 @@ export async function getOdtTemplate(path) {
|
||||
const fileBuffer = await readFile(path)
|
||||
return fileBuffer.buffer
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts plain text content from an ODT file, preserving line breaks
|
||||
* @param {ArrayBuffer} odtFile - The ODT file as an ArrayBuffer
|
||||
* @returns {Promise<string>} Extracted text content
|
||||
*/
|
||||
export async function getOdtTextContent(odtFile) {
|
||||
const contentDocument = await getContentDocument(odtFile)
|
||||
const odtTextElement = getODTTextElement(contentDocument)
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Element} element
|
||||
* @returns {string}
|
||||
*/
|
||||
function getElementTextContent(element){
|
||||
//console.log('tagName', element.tagName)
|
||||
if(element.tagName === 'text:h' || element.tagName === 'text:p')
|
||||
return element.textContent + '\n'
|
||||
else{
|
||||
const descendantTexts = Array.from(element.childNodes)
|
||||
.filter(n => n.nodeType === Node.ELEMENT_NODE)
|
||||
.map(getElementTextContent)
|
||||
|
||||
if(element.tagName === 'text:list-item')
|
||||
return `- ${descendantTexts.join('')}`
|
||||
|
||||
return descendantTexts.join('')
|
||||
}
|
||||
}
|
||||
|
||||
return getElementTextContent(odtTextElement)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param {ODTFile} odtFile
|
||||
* @returns {Promise<Document>}
|
||||
*/
|
||||
async function getContentDocument(odtFile) {
|
||||
const reader = new ZipReader(new Uint8ArrayReader(new Uint8Array(odtFile)));
|
||||
|
||||
const entries = await reader.getEntries();
|
||||
|
||||
const contentEntry = entries.find(entry => entry.filename === 'content.xml');
|
||||
|
||||
if (!contentEntry) {
|
||||
throw new Error('No content.xml found in the ODT file');
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
const contentText = await contentEntry.getData(new TextWriter());
|
||||
await reader.close();
|
||||
|
||||
const parser = new DOMParser();
|
||||
|
||||
return parser.parseFromString(contentText, 'text/xml');
|
||||
}
|
||||
@ -1,6 +1,8 @@
|
||||
//@ts-check
|
||||
import { Uint8ArrayReader, ZipReader, TextWriter } from '@zip.js/zip.js';
|
||||
|
||||
import {parseXML} from './DOMUtils.js'
|
||||
|
||||
/** @import {Entry} from '@zip.js/zip.js'*/
|
||||
/** @import {SheetName, SheetRawContent, SheetRowRawContent, SheetCellRawContent} from './types.js' */
|
||||
|
||||
@ -46,10 +48,9 @@ function extraxtODSCellText(cell) {
|
||||
/**
|
||||
* Extracts raw table content from an ODS file.
|
||||
* @param {ArrayBuffer} arrayBuffer - The ODS file.
|
||||
* @param {(str: string) => Document} parseXML - Function to parse XML content.
|
||||
* @returns {Promise<Map<SheetName, SheetRawContent>>}
|
||||
*/
|
||||
export async function _getODSTableRawContent(arrayBuffer, parseXML) {
|
||||
export async function getODSTableRawContent(arrayBuffer) {
|
||||
const zipDataReader = new Uint8ArrayReader(new Uint8Array(arrayBuffer));
|
||||
const zipReader = new ZipReader(zipDataReader);
|
||||
const zipEntries = await zipReader.getEntries()
|
||||
@ -123,101 +124,6 @@ export async function _getODSTableRawContent(arrayBuffer, parseXML) {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Extracts raw table content from an XLSX file.
|
||||
* @param {ArrayBuffer} arrayBuffer - The XLSX file.
|
||||
* @param {(str: string) => Document} parseXML - Function to parse XML content.
|
||||
* @returns {Promise<Map<SheetName, SheetRawContent>>}
|
||||
*/
|
||||
export async function _getXLSXTableRawContent(arrayBuffer, parseXML) {
|
||||
const zipDataReader = new Uint8ArrayReader(new Uint8Array(arrayBuffer));
|
||||
const zipReader = new ZipReader(zipDataReader);
|
||||
const zipEntries = await zipReader.getEntries()
|
||||
await zipReader.close();
|
||||
|
||||
/** @type {Map<Entry['filename'], Entry>} */
|
||||
const entryByFilename = new Map()
|
||||
for(const entry of zipEntries){
|
||||
const filename = entry.filename
|
||||
entryByFilename.set(filename, entry)
|
||||
}
|
||||
|
||||
const sharedStringsEntry = entryByFilename.get('xl/sharedStrings.xml')
|
||||
|
||||
if(!sharedStringsEntry){
|
||||
throw new TypeError(`entry 'xl/sharedStrings.xml' manquante dans le zip`)
|
||||
}
|
||||
|
||||
//@ts-ignore
|
||||
const sharedStringsXml = await sharedStringsEntry.getData(new TextWriter());
|
||||
|
||||
const sharedStringsDoc = parseXML(sharedStringsXml);
|
||||
const sharedStrings = Array.from(sharedStringsDoc.getElementsByTagName('sst')[0].getElementsByTagName('si')).map(si => si.textContent);
|
||||
|
||||
// Get sheet names and their corresponding XML files
|
||||
const workbookEntry = entryByFilename.get('xl/workbook.xml')
|
||||
|
||||
if(!workbookEntry){
|
||||
throw new TypeError(`entry 'xl/workbook.xml' manquante dans le zip`)
|
||||
}
|
||||
|
||||
//@ts-ignore
|
||||
const workbookXml = await workbookEntry.getData(new TextWriter());
|
||||
const workbookDoc = parseXML(workbookXml);
|
||||
const sheets = Array.from(workbookDoc.getElementsByTagName('sheets')[0].getElementsByTagName('sheet'));
|
||||
const sheetNames = sheets.map(sheet => sheet.getAttribute('name'));
|
||||
const sheetIds = sheets.map(sheet => sheet.getAttribute('r:id'));
|
||||
|
||||
// Read the relations to get the actual filenames for each sheet
|
||||
const workbookRelsEntry = entryByFilename.get('xl/_rels/workbook.xml.rels')
|
||||
|
||||
if(!workbookRelsEntry){
|
||||
throw new TypeError(`entry 'xl/_rels/workbook.xml.rels' manquante dans le zip`)
|
||||
}
|
||||
|
||||
//@ts-ignore
|
||||
const workbookRelsXml = await workbookRelsEntry.getData(new TextWriter());
|
||||
const workbookRelsDoc = parseXML(workbookRelsXml);
|
||||
const sheetRels = Array.from(workbookRelsDoc.getElementsByTagName('Relationship'));
|
||||
const sheetFiles = sheetIds.map(id => sheetRels.find(rel => rel.getAttribute('Id') === id).getAttribute('Target').replace('worksheets/', ''));
|
||||
|
||||
// Read each sheet's XML and extract data in parallel
|
||||
const sheetDataPs = sheetFiles.map((sheetFile, index) => (
|
||||
// @ts-ignore
|
||||
entryByFilename.get(`xl/worksheets/${sheetFile}`).getData(new TextWriter()).then(sheetXml => {
|
||||
const sheetDoc = parseXML(sheetXml);
|
||||
|
||||
const rows = sheetDoc.getElementsByTagName('sheetData')[0].getElementsByTagName('row');
|
||||
const sheetData = [];
|
||||
|
||||
for (let row of Array.from(rows)) {
|
||||
const cells = row.getElementsByTagName('c');
|
||||
const rowData = [];
|
||||
|
||||
for (let cell of Array.from(cells)) {
|
||||
const cellType = cell.getAttribute('t') || 'n';
|
||||
let cellValue = cell.getElementsByTagName('v')[0]?.textContent || '';
|
||||
|
||||
if (cellType === 's') {
|
||||
cellValue = sharedStrings[parseInt(cellValue, 10)];
|
||||
}
|
||||
|
||||
rowData.push({
|
||||
value: cellValue,
|
||||
type: cellType
|
||||
});
|
||||
}
|
||||
|
||||
sheetData.push(rowData);
|
||||
}
|
||||
|
||||
return [sheetNames[index], sheetData];
|
||||
})
|
||||
));
|
||||
|
||||
return new Map(await Promise.all(sheetDataPs));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a cell value to the appropriate JavaScript type based on its cell type.
|
||||
* @param {SheetCellRawContent} _
|
||||
|
||||
@ -2,7 +2,7 @@ import {readFile} from 'node:fs/promises'
|
||||
|
||||
import test from 'ava';
|
||||
|
||||
import {getODSTableRawContent} from '../scripts/node.js'
|
||||
import {getODSTableRawContent} from '../exports.js'
|
||||
|
||||
const nomAgeContent = (await readFile('./tests/data/nom-age.ods')).buffer
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import test from 'ava';
|
||||
|
||||
import {getODSTableRawContent, createOdsFile} from '../scripts/node.js'
|
||||
import {getODSTableRawContent, createOdsFile} from '../exports.js'
|
||||
|
||||
/** @import {SheetName, SheetRawContent} from '../scripts/types.js' */
|
||||
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
import test from 'ava';
|
||||
import {join} from 'node:path';
|
||||
|
||||
import {getOdtTemplate, getOdtTextContent} from '../scripts/odf/odtTemplate-forNode.js'
|
||||
import {getOdtTemplate} from '../scripts/odf/odtTemplate-forNode.js'
|
||||
|
||||
import {fillOdtTemplate} from '../scripts/node.js'
|
||||
import {fillOdtTemplate, getOdtTextContent} from '../exports.js'
|
||||
import { listZipEntries } from './_helpers/zip-analysis.js';
|
||||
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@ import {readFile} from 'node:fs/promises'
|
||||
|
||||
import test from 'ava';
|
||||
|
||||
import {getODSTableRawContent} from '../scripts/node.js'
|
||||
import {getODSTableRawContent} from '../exports.js'
|
||||
|
||||
test('.ods file with table:number-columns-repeated attribute in cell', async t => {
|
||||
const repeatedCellFileContent = (await readFile('./tests/data/cellules-répétées.ods')).buffer
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import test from 'ava';
|
||||
import { sheetRawContentToObjects } from "../scripts/shared.js"
|
||||
import { sheetRawContentToObjects } from "../exports.js"
|
||||
|
||||
test("Empty header value should be kept", t => {
|
||||
const rawContent = [
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user