采纳答案成功!
向帮助你的同学说点啥吧!感谢那些助人为乐的人
老师,您好,我现在将一本epub电子书解析成对象了,然后我想从这个对象中提取信息,放入到数据库book表中的每个字段中,但是信息有点杂乱,希望老师能帮帮我。
这个我解析epub电子书得到的信息:
我希望从中提取信息放入到book表中:
希望老师能给一个较详细的路径,谢谢老师
同学你好,你可以看一下小慕课读书中电子书提取代码,
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 | const Epub = require( '../utils/epub' ) const fs = require( 'fs' ) const xml2js = require( 'xml2js' ).parseString const { MIME_TYPE_EPUB, UPLOAD_URL, UPLOAD_PATH, UPDATE_TYPE_FROM_WEB, OLD_UPLOAD_URL } = require( '../utils/constant' ) class Book { constructor(file, data) { if (file) { this .createBookFromFile(file) } else if (data) { this .createBookFromData(data) } } createBookFromFile(file) { const { destination: des, // 文件本地存储目录 filename, // 文件名称 mimetype = MIME_TYPE_EPUB // 文件资源类型 } = file const suffix = mimetype === MIME_TYPE_EPUB ? '.epub' : '' const oldBookPath = `${des}/${filename}` const bookPath = `${des}/${filename}${suffix}` const url = `${UPLOAD_URL}/book/${filename}${suffix}` const unzipPath = `${UPLOAD_PATH}/unzip/${filename}` const unzipUrl = `${UPLOAD_URL}/unzip/${filename}` if (!fs.existsSync(unzipPath)) { fs.mkdirSync(unzipPath, { recursive: true }) // 创建电子书解压后的目录 } if (fs.existsSync(oldBookPath) && !fs.existsSync(bookPath)) { fs.renameSync(oldBookPath, bookPath) // 重命名文件 } this .fileName = filename // 文件名 this .path = `/book/${filename}${suffix}` // epub文件路径 this .filePath = this .path // epub文件路径 this .url = url // epub文件url this .title = '' // 标题 this .author = '' // 作者 this .publisher = '' // 出版社 this .contents = [] // 目录 this .cover = '' // 封面图片URL this .category = -1 // 分类ID this .categoryText = '' // 分类名称 this .language = '' // 语种 this .unzipPath = `/unzip/${filename}` // 解压后的电子书目录 this .unzipUrl = unzipUrl // 解压后的电子书链接 this .originalName = file.originalname } createBookFromData(data) { this .fileName = data.fileName this .cover = data.coverPath this .title = data.title this .author = data.author this .publisher = data.publisher this .bookId = data.fileName this .language = data.language this .rootFile = data.rootFile this .originalName = data.originalName this .path = data.path || data.filePath this .filePath = data.path || data.filePath this .unzipPath = data.unzipPath this .coverPath = data.coverPath this .createUser = data.username this .createDt = new Date().getTime() this .updateDt = new Date().getTime() this .updateType = data.updateType === 0 ? data.updateType : UPDATE_TYPE_FROM_WEB this .contents = data.contents } parse() { return new Promise((resolve, reject) => { const bookPath = `${UPLOAD_PATH}${ this .path}` if (! this .path || !fs.existsSync(bookPath)) { reject( new Error( '电子书路径不存在' )) } const epub = new Epub(bookPath) epub.on( 'error' , err => { reject(err) }) epub.on( 'end' , err => { if (err) { reject(err) } else { // console.log('metadata', epub.metadata) let { title, language, creator, creatorFileAs, publisher, cover } = epub.metadata // title = '' if (!title) { reject( new Error( '图书标题为空' )) } else { this .title = title this .language = language || 'en' this .author = creator || creatorFileAs || 'unknown' this .publisher = publisher || 'unknown' this .rootFile = epub.rootFile const handleGetImage = (error, imgBuffer, mimeType) => { if (error) { reject(error) } else { const suffix = mimeType.split( '/' )[1] const coverPath = `${UPLOAD_PATH}/img/${ this .fileName}.${suffix}` const coverUrl = `${UPLOAD_URL}/img/${ this .fileName}.${suffix}` fs.writeFileSync(coverPath, imgBuffer, 'binary' ) this .coverPath = `/img/${ this .fileName}.${suffix}` this .cover = coverUrl resolve( this ) } } try { this .unzip() // 解压电子书 this .parseContents(epub) .then(({ chapters, chapterTree }) => { this .contents = chapters this .contentsTree = chapterTree epub.getImage(cover, handleGetImage) // 获取封面图片 }) . catch (err => reject(err)) // 解析目录 } catch (e) { reject(e) } } } }) epub.parse() this .epub = epub }) } unzip() { const AdmZip = require( 'adm-zip' ) const zip = new AdmZip(Book.genPath( this .path)) // 解析文件路径 zip.extractAllTo( /*target path*/ Book.genPath( this .unzipPath), /*overwrite*/ true ) } parseContents(epub) { function getNcxFilePath() { const manifest = epub && epub.manifest const spine = epub && epub.spine const ncx = manifest && manifest.ncx const toc = spine && spine.toc return (ncx && ncx.href) || (toc && toc.href) } /** * flatten方法,将目录转为一维数组 * * @param array * @returns {*[]} */ function flatten(array) { return [].concat(...array.map(item => { if (item.navPoint && item.navPoint.length) { return [].concat(item, ...flatten(item.navPoint)) } else if (item.navPoint) { return [].concat(item, item.navPoint) } else { return item } })) } /** * 查询当前目录的父级目录及规定层次 * * @param array * @param level * @param pid */ function findParent(array, level = 0, pid = '' ) { return array.map(item => { item.level = level item.pid = pid if (item.navPoint && item.navPoint.length) { item.navPoint = findParent(item.navPoint, level + 1, item[ '$' ].id) } else if (item.navPoint) { item.navPoint.level = level + 1 item.navPoint.pid = item[ '$' ].id } return item }) } if (! this .rootFile) { throw new Error( '目录解析失败' ) } else { const fileName = this .fileName return new Promise((resolve, reject) => { const ncxFilePath = Book.genPath(`${ this .unzipPath}/${getNcxFilePath()}`) // 获取ncx文件路径 const xml = fs.readFileSync(ncxFilePath, 'utf-8' ) // 读取ncx文件 // 将ncx文件从xml转为json xml2js(xml, { explicitArray: false , // 设置为false时,解析结果不会包裹array ignoreAttrs: false // 解析属性 }, function (err, json) { if (!err) { const navMap = json.ncx.navMap // 获取ncx的navMap属性 if (navMap.navPoint) { // 如果navMap属性存在navPoint属性,则说明目录存在 navMap.navPoint = findParent(navMap.navPoint) const newNavMap = flatten(navMap.navPoint) // 将目录拆分为扁平结构 const chapters = [] epub.flow.forEach((chapter, index) => { // 遍历epub解析出来的目录 // 如果目录大于从ncx解析出来的数量,则直接跳过 if (index + 1 > newNavMap.length) { return } const nav = newNavMap[index] // 根据index找到对应的navMap chapter.text = `${UPLOAD_URL}/unzip/${fileName}/${chapter.href}` // 生成章节的URL // console.log(`${JSON.stringify(navMap)}`) if (nav && nav.navLabel) { // 从ncx文件中解析出目录的标题 chapter.label = nav.navLabel.text || '' } else { chapter.label = '' } chapter.level = nav.level chapter.pid = nav.pid chapter.navId = nav[ '$' ].id chapter.fileName = fileName chapter.order = index + 1 chapters.push(chapter) }) const chapterTree = [] chapters.forEach(c => { c.children = [] if (c.pid === '' ) { chapterTree.push(c) } else { const parent = chapters.find(_ => _.navId === c.pid) parent.children.push(c) } }) // 将目录转化为树状结构 resolve({ chapters, chapterTree }) } else { reject( new Error( '目录解析失败,navMap.navPoint error' )) } } else { reject(err) } }) }) } } toJson() { return { path: this .path, url: this .url, title: this .title, language: this .language, author: this .author, publisher: this .publisher, cover: this .cover, coverPath: this .coverPath, unzipPath: this .unzipPath, unzipUrl: this .unzipUrl, category: this .category, categoryText: this .categoryText, contents: this .contents, contentsTree: this .contentsTree, originalName: this .originalName, rootFile: this .rootFile, fileName: this .fileName, filePath: this .filePath } } toDb() { return { fileName: this .fileName, cover: this .cover, title: this .title, author: this .author, publisher: this .publisher, bookId: this .bookId, updateType: this .updateType, language: this .language, rootFile: this .rootFile, originalName: this .originalName, filePath: this .path, unzipPath: this .unzipPath, coverPath: this .coverPath, createUser: this .createUser, createDt: this .createDt, updateDt: this .updateDt, category: this .category || 99, categoryText: this .categoryText || '自定义' } } getContents() { return this .contents } reset() { if ( this .path && Book.pathExists( this .path)) { fs.unlinkSync(Book.genPath( this .path)) } if ( this .filePath && Book.pathExists( this .filePath)) { fs.unlinkSync(Book.genPath( this .filePath)) } if ( this .coverPath && Book.pathExists( this .coverPath)) { fs.unlinkSync(Book.genPath( this .coverPath)) } if ( this .unzipPath && Book.pathExists( this .unzipPath)) { // 注意node低版本将不支持第二个属性 fs.rmdirSync(Book.genPath( this .unzipPath), { recursive: true }) } } static genPath(path) { if (path.startsWith( '/' )) { return `${UPLOAD_PATH}${path}` } else { return `${UPLOAD_PATH}/${path}` } } static pathExists(path) { if (path.startsWith(UPLOAD_PATH)) { return fs.existsSync(path) } else { return fs.existsSync(Book.genPath(path)) } } static genCoverUrl(book) { if (Number(book.updateType) === 0) { const { cover } = book if (cover) { if (cover.startsWith( '/' )) { return `${OLD_UPLOAD_URL}${cover}` } else { return `${OLD_UPLOAD_URL}/${cover}` } } else { return null } } else { if (book.cover) { if (book.cover.startsWith( '/' )) { return `${UPLOAD_URL}${book.cover}` } else { return `${UPLOAD_URL}/${book.cover}` } } else { return null } } } } module.exports = Book |
登录后可查看更多问答,登录/注册