diff --git a/package-lock.json b/package-lock.json index b4e2de1f..b82d8ed9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "jw-epub-parser", - "version": "3.19.0", + "version": "3.20.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "jw-epub-parser", - "version": "3.19.0", + "version": "3.20.0", "license": "MIT", "dependencies": { "jszip": "^3.9.1", diff --git a/src/browser/utils.browser.ts b/src/browser/utils.browser.ts index 64e04674..484a9c75 100644 --- a/src/browser/utils.browser.ts +++ b/src/browser/utils.browser.ts @@ -14,6 +14,7 @@ import TG from '../locales/tl-PH/text.json'; import TK from '../locales/tr-TR/text.json'; import TND from '../locales/mg-TND/text.json'; import TNK from '../locales/mg-TNK/text.json'; +import TPO from '../locales/pt-PT/text.json'; import TTM from '../locales/mg-TTM/text.json'; import TW from '../locales/tw-TW/text.json'; import U from '../locales/ru-RU/text.json'; @@ -21,12 +22,12 @@ import VZ from '../locales/mg-VZ/text.json'; import X from '../locales/de-DE/text.json'; declare global { - interface Window { - jw_epub_parser: any; - } + interface Window { + jw_epub_parser: any; + } } window.jw_epub_parser = { - languages: { E, F, I, J, K, M, MG, P, S, T, TG, TK, TND, TNK, TTM, TW, U, VZ, X }, - path: path, + languages: { E, F, I, J, K, M, MG, P, S, T, TG, TK, TND, TNK, TPO, TTM, TW, U, VZ, X }, + path: path, }; diff --git a/src/common/parsing_rules.ts b/src/common/parsing_rules.ts index e56d6a1d..b3448e7b 100644 --- a/src/common/parsing_rules.ts +++ b/src/common/parsing_rules.ts @@ -2,187 +2,189 @@ import { JWEPUBParserError } from '../classes/error.js'; import { getMonthNames, getPartMinutesSeparatorVariations, getStudyArticleDateVariations } from './language_rules.js'; export const extractMonthName = (src: string, lang: string) => { - let varDay; - let monthIndex; - - const text = src.toLowerCase(); - const separators = ['bis', '–', '-', '—']; - const regex = new RegExp(separators.join('|'), 'gi'); - const split = text.split(regex); - const monthNames = getMonthNames(lang); - - outerLoop: for (const splitted of split) { - for (const month of monthNames) { - const monthLang = month.name.toLowerCase(); - let searchKey = `(${monthLang})`; - - if (lang === 'J') { - searchKey = `\\b${searchKey}\\b`; - } - - const regex = new RegExp(searchKey); - const array = regex.exec(splitted); - - if (Array.isArray(array)) { - const regex = /\d+/g; - const match = text.match(regex); - - if (lang === 'J') { - varDay = +match![1]; - } - - if (lang !== 'J') { - varDay = +match![0]; - } - - monthIndex = month.index; - break outerLoop; - } - } - } - - if (typeof varDay === 'number' && typeof monthIndex === 'number') { - return { varDay, monthIndex }; - } - - throw new JWEPUBParserError('week-date', `Parsing failed when extracting the week date. The input was: ${src}`); + let varDay; + let monthIndex; + + const text = src.toLowerCase(); + const separators = ['bis', '–', '-', '—']; + const regex = new RegExp(separators.join('|'), 'gi'); + const split = text.split(regex); + const monthNames = getMonthNames(lang); + + outerLoop: for (const splitted of split) { + for (const month of monthNames) { + const monthLang = month.name.toLowerCase(); + let searchKey = `(${monthLang})`; + + if (lang === 'J') { + searchKey = `\\b${searchKey}\\b`; + } + + const regex = new RegExp(searchKey); + const array = regex.exec(splitted); + + if (Array.isArray(array)) { + const regex = /\d+/g; + const match = text.match(regex); + + if (lang === 'J') { + varDay = +match![1]; + } + + if (lang !== 'J') { + varDay = +match![0]; + } + + monthIndex = month.index; + break outerLoop; + } + } + } + + if (typeof varDay === 'number' && typeof monthIndex === 'number') { + return { varDay, monthIndex }; + } + + throw new JWEPUBParserError('week-date', `Parsing failed when extracting the week date. The input was: ${src}`); }; export const extractSongNumber = (src: string) => { - const parseNum = src.match(/(\d+)/); + const parseNum = src.match(/(\d+)/); - if (parseNum && parseNum.length > 0) { - const firstNumber = +parseNum[0]; + if (parseNum && parseNum.length > 0) { + const firstNumber = +parseNum[0]; - if (firstNumber <= 158) { - return firstNumber; - } - } + if (firstNumber <= 158) { + return firstNumber; + } + } - return src; + return src; }; export const extractSourceEnhanced = (src: string, lang: string) => { - const variations = getPartMinutesSeparatorVariations(lang).split('|'); + const variations = getPartMinutesSeparatorVariations(lang).split('|'); - let result; + let result; - for (const variation of variations) { - let textSearch = variation.replace('{{ duration }}', '\\d+'); - textSearch = textSearch.replace('(', '\\('); - textSearch = textSearch.replace(')', '\\)'); - textSearch = textSearch.replace(') ', ') ?'); - textSearch = textSearch.replace('??', '?'); + for (const variation of variations) { + let textSearch = variation.replace('{{ duration }}', '\\d+'); + textSearch = textSearch.replace('(', '\\('); + textSearch = textSearch.replace(')', '\\)'); + textSearch = textSearch.replace(') ', ') ?'); + textSearch = textSearch.replace('??', '?'); - const regex = new RegExp(textSearch.trim()); - const match = src.match(regex); + const regex = new RegExp(textSearch.trim()); + const match = src.match(regex); - if (match) { - const splits = src.split(regex); - const duration = +match[0].match(/\d+/)![0]; - const regexStartColumn = /^[:.「]/; - const regexEndColumn = /[:」]$/; + if (match) { + const splits = src.split(regex); + const duration = +match[0].match(/\d+/)![0]; + const regexStartColumn = /^[:.「]/; + const regexEndColumn = /[:」]$/; - const tmpAssignment = splits[0].trim(); - const source = splits[1].trim().replace(regexStartColumn, '').replace(regexEndColumn, '').trim(); + const tmpAssignment = splits[0].trim(); + const source = splits[1].trim().replace(regexStartColumn, '').replace(regexEndColumn, '').trim(); - const indexSep = /\d{1,2}[-.] /g; - const index = tmpAssignment.match(indexSep); - const assignmentSplits = tmpAssignment.split(indexSep); - let assignment; + const indexSep = /\d{1,2}[-.] /g; + const index = tmpAssignment.match(indexSep); + const assignmentSplits = tmpAssignment.split(indexSep); + let assignment; - if (index) { - assignment = assignmentSplits[1].trim(); - } else { - assignment = tmpAssignment; - } + if (index) { + assignment = assignmentSplits[1].trim(); + } else { + assignment = tmpAssignment; + } - assignment = assignment.replace(regexStartColumn, '').replace(regexEndColumn, '').trim(); + assignment = assignment.replace(regexStartColumn, '').replace(regexEndColumn, '').trim(); - result = { type: assignment, time: duration, src: source, fulltitle: tmpAssignment }; - } - } + result = { type: assignment, time: duration, src: source, fulltitle: tmpAssignment }; + } + } - if (result) return result; + if (result) return result; - throw new JWEPUBParserError('jw-epub-parser', `Parsing failed. The input was: ${src}`); + throw new JWEPUBParserError('jw-epub-parser', `Parsing failed. The input was: ${src}`); }; export const extractWTStudyDate = (src: string, lang: string) => { - let varDay; - let monthIndex; - let varYear; + let varDay; + let monthIndex; + let varYear; - const variations = getStudyArticleDateVariations(lang).split('|'); + const variations = getStudyArticleDateVariations(lang).split('|'); - const patternNumber = '{{ number }}'; - const patternDate = '{{ date }}'; + const patternNumber = '{{ number }}'; + const patternDate = '{{ date }}'; - src = src.toLowerCase(); + src = src.toLowerCase(); - outerLoop: for (const variation of variations) { - let textSearch = variation.toLowerCase().replace(patternDate, ''); - textSearch = textSearch.replace(patternNumber, '\\d+'); + outerLoop: for (const variation of variations) { + let textSearch = variation.toLowerCase().replace(patternDate, ''); + textSearch = textSearch.replace(patternNumber, '\\d+'); - const regex = new RegExp(textSearch.trim()); - const array = regex.exec(src); + const regex = new RegExp(textSearch.trim()); + const array = regex.exec(src); - if (array !== null) { - const dateStartIndex = array[0].length; - if (dateStartIndex > 0) { - const dateValue = src.substring(dateStartIndex); + if (array !== null) { + const dateStartIndex = array[0].length; + if (dateStartIndex > 0) { + const dateValue = src.substring(dateStartIndex); - textSearch = dateValue.trim(); + textSearch = dateValue.trim(); - const text = textSearch.toLowerCase(); - const separators = ['bis', '–', '-', '—', 'do']; - const regex = new RegExp(separators.join('|'), 'gi'); - const split = text.split(regex); - const monthNames = getMonthNames(lang); + const text = textSearch.toLowerCase(); + const separators = ['bis', '–', '-', '—', 'do']; + const regex = new RegExp(separators.join('|'), 'gi'); + const split = text.split(regex); + const monthNames = getMonthNames(lang); - for (const splitted of split) { - for (const month of monthNames) { - const monthLang = month.name.toLowerCase(); - let searchKey = `(${monthLang})`; + for (const splitted of split) { + for (const month of monthNames) { + const monthLang = month.name.toLowerCase(); + let searchKey = `(${monthLang})`; - if (lang === 'J') { - searchKey = `\\b${searchKey}\\b`; - } + if (lang === 'J') { + searchKey = `\\b${searchKey}\\b`; + } - const regex = new RegExp(searchKey); - const array2 = regex.exec(splitted); + const regex = new RegExp(searchKey); + const array2 = regex.exec(splitted); - if (Array.isArray(array2)) { - const regex = /\d+/g; - const match = textSearch.match(regex); + if (Array.isArray(array2)) { + if (array2.index < 29) { + const regex = /\d+/g; + const match = textSearch.match(regex); - if (lang === 'J') { - varDay = +match![2]; - } + if (lang === 'J') { + varDay = +match![2]; + } - if (lang !== 'J') { - varDay = +match![0]; - } + if (lang !== 'J') { + varDay = +match![0]; + } - monthIndex = month.index; + monthIndex = month.index; - const findYear = /\b\d{4}\b/; - const array3 = findYear.exec(dateValue); - if (array3 !== null) { - varYear = +array3[0]; - } + const findYear = /\b\d{4}\b/; + const array3 = findYear.exec(dateValue); + if (array3 !== null) { + varYear = +array3[0]; + } - break outerLoop; - } - } - } - } - } - } + break outerLoop; + } + } + } + } + } + } + } - if (typeof varDay === 'number' && typeof monthIndex === 'number' && typeof varYear === 'number') { - return { varDay, monthIndex, varYear }; - } + if (typeof varDay === 'number' && typeof monthIndex === 'number' && typeof varYear === 'number') { + return { varDay, monthIndex, varYear }; + } - throw new JWEPUBParserError('wtstudy', `Parsing failed for Watchtower Study Date. The input was: ${src}`); + throw new JWEPUBParserError('wtstudy', `Parsing failed for Watchtower Study Date. The input was: ${src}`); }; diff --git a/src/node/utils.node.ts b/src/node/utils.node.ts index 749d1190..8d9c8d7e 100644 --- a/src/node/utils.node.ts +++ b/src/node/utils.node.ts @@ -15,6 +15,7 @@ import TG from '../locales/tl-PH/text.json' assert { type: 'json' }; import TK from '../locales/tr-TR/text.json' assert { type: 'json' }; import TND from '../locales/mg-TND/text.json' assert { type: 'json' }; import TNK from '../locales/mg-TNK/text.json' assert { type: 'json' }; +import TPO from '../locales/pt-PT/text.json' assert { type: 'json' }; import TTM from '../locales/mg-TTM/text.json' assert { type: 'json' }; import TW from '../locales/tw-TW/text.json' assert { type: 'json' }; import U from '../locales/ru-RU/text.json' assert { type: 'json' }; @@ -22,11 +23,11 @@ import VZ from '../locales/mg-VZ/text.json' assert { type: 'json' }; import X from '../locales/de-DE/text.json' assert { type: 'json' }; declare global { - var jw_epub_parser: any; + var jw_epub_parser: any; } global.jw_epub_parser = { - languages: { E, F, I, J, K, M, MG, P, S, T, TG, TK, TND, TNK, TTM, TW, U, VZ, X }, - path: path, - readFile: readFile, + languages: { E, F, I, J, K, M, MG, P, S, T, TG, TK, TND, TNK, TPO, TTM, TW, U, VZ, X }, + path: path, + readFile: readFile, };