Skip to content

Commit

Permalink
fix(rules): update month extraction rule
Browse files Browse the repository at this point in the history
  • Loading branch information
rhahao committed Aug 6, 2024
1 parent f7cdd1a commit c34604c
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 152 deletions.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 6 additions & 5 deletions src/browser/utils.browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,20 @@ import TG from '../locales/tl-PH/text.json';
import TK from '../locales/tr-TR/text.json';
import TND from '../locales/mg-TND/text.json';
import TNK from '../locales/mg-TNK/text.json';
import TPO from '../locales/pt-PT/text.json';
import TTM from '../locales/mg-TTM/text.json';
import TW from '../locales/tw-TW/text.json';
import U from '../locales/ru-RU/text.json';
import VZ from '../locales/mg-VZ/text.json';
import X from '../locales/de-DE/text.json';

declare global {
interface Window {
jw_epub_parser: any;
}
interface Window {
jw_epub_parser: any;
}
}

window.jw_epub_parser = {
languages: { E, F, I, J, K, M, MG, P, S, T, TG, TK, TND, TNK, TTM, TW, U, VZ, X },
path: path,
languages: { E, F, I, J, K, M, MG, P, S, T, TG, TK, TND, TNK, TPO, TTM, TW, U, VZ, X },
path: path,
};
284 changes: 143 additions & 141 deletions src/common/parsing_rules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,187 +2,189 @@ import { JWEPUBParserError } from '../classes/error.js';
import { getMonthNames, getPartMinutesSeparatorVariations, getStudyArticleDateVariations } from './language_rules.js';

export const extractMonthName = (src: string, lang: string) => {
let varDay;
let monthIndex;

const text = src.toLowerCase();
const separators = ['bis', '–', '-', '—'];
const regex = new RegExp(separators.join('|'), 'gi');
const split = text.split(regex);
const monthNames = getMonthNames(lang);

outerLoop: for (const splitted of split) {
for (const month of monthNames) {
const monthLang = month.name.toLowerCase();
let searchKey = `(${monthLang})`;

if (lang === 'J') {
searchKey = `\\b${searchKey}\\b`;
}

const regex = new RegExp(searchKey);
const array = regex.exec(splitted);

if (Array.isArray(array)) {
const regex = /\d+/g;
const match = text.match(regex);

if (lang === 'J') {
varDay = +match![1];
}

if (lang !== 'J') {
varDay = +match![0];
}

monthIndex = month.index;
break outerLoop;
}
}
}

if (typeof varDay === 'number' && typeof monthIndex === 'number') {
return { varDay, monthIndex };
}

throw new JWEPUBParserError('week-date', `Parsing failed when extracting the week date. The input was: ${src}`);
let varDay;
let monthIndex;

const text = src.toLowerCase();
const separators = ['bis', '–', '-', '—'];
const regex = new RegExp(separators.join('|'), 'gi');
const split = text.split(regex);
const monthNames = getMonthNames(lang);

outerLoop: for (const splitted of split) {
for (const month of monthNames) {
const monthLang = month.name.toLowerCase();
let searchKey = `(${monthLang})`;

if (lang === 'J') {
searchKey = `\\b${searchKey}\\b`;
}

const regex = new RegExp(searchKey);
const array = regex.exec(splitted);

if (Array.isArray(array)) {
const regex = /\d+/g;
const match = text.match(regex);

if (lang === 'J') {
varDay = +match![1];
}

if (lang !== 'J') {
varDay = +match![0];
}

monthIndex = month.index;
break outerLoop;
}
}
}

if (typeof varDay === 'number' && typeof monthIndex === 'number') {
return { varDay, monthIndex };
}

throw new JWEPUBParserError('week-date', `Parsing failed when extracting the week date. The input was: ${src}`);
};

export const extractSongNumber = (src: string) => {
const parseNum = src.match(/(\d+)/);
const parseNum = src.match(/(\d+)/);

if (parseNum && parseNum.length > 0) {
const firstNumber = +parseNum[0];
if (parseNum && parseNum.length > 0) {
const firstNumber = +parseNum[0];

if (firstNumber <= 158) {
return firstNumber;
}
}
if (firstNumber <= 158) {
return firstNumber;
}
}

return src;
return src;
};

export const extractSourceEnhanced = (src: string, lang: string) => {
const variations = getPartMinutesSeparatorVariations(lang).split('|');
const variations = getPartMinutesSeparatorVariations(lang).split('|');

let result;
let result;

for (const variation of variations) {
let textSearch = variation.replace('{{ duration }}', '\\d+');
textSearch = textSearch.replace('(', '\\(');
textSearch = textSearch.replace(')', '\\)');
textSearch = textSearch.replace(') ', ') ?');
textSearch = textSearch.replace('??', '?');
for (const variation of variations) {
let textSearch = variation.replace('{{ duration }}', '\\d+');
textSearch = textSearch.replace('(', '\\(');
textSearch = textSearch.replace(')', '\\)');
textSearch = textSearch.replace(') ', ') ?');
textSearch = textSearch.replace('??', '?');

const regex = new RegExp(textSearch.trim());
const match = src.match(regex);
const regex = new RegExp(textSearch.trim());
const match = src.match(regex);

if (match) {
const splits = src.split(regex);
const duration = +match[0].match(/\d+/)![0];
const regexStartColumn = /^[:.「]/;
const regexEndColumn = /[:」]$/;
if (match) {
const splits = src.split(regex);
const duration = +match[0].match(/\d+/)![0];
const regexStartColumn = /^[:.「]/;
const regexEndColumn = /[:」]$/;

const tmpAssignment = splits[0].trim();
const source = splits[1].trim().replace(regexStartColumn, '').replace(regexEndColumn, '').trim();
const tmpAssignment = splits[0].trim();
const source = splits[1].trim().replace(regexStartColumn, '').replace(regexEndColumn, '').trim();

const indexSep = /\d{1,2}[-.] /g;
const index = tmpAssignment.match(indexSep);
const assignmentSplits = tmpAssignment.split(indexSep);
let assignment;
const indexSep = /\d{1,2}[-.] /g;
const index = tmpAssignment.match(indexSep);
const assignmentSplits = tmpAssignment.split(indexSep);
let assignment;

if (index) {
assignment = assignmentSplits[1].trim();
} else {
assignment = tmpAssignment;
}
if (index) {
assignment = assignmentSplits[1].trim();
} else {
assignment = tmpAssignment;
}

assignment = assignment.replace(regexStartColumn, '').replace(regexEndColumn, '').trim();
assignment = assignment.replace(regexStartColumn, '').replace(regexEndColumn, '').trim();

result = { type: assignment, time: duration, src: source, fulltitle: tmpAssignment };
}
}
result = { type: assignment, time: duration, src: source, fulltitle: tmpAssignment };
}
}

if (result) return result;
if (result) return result;

throw new JWEPUBParserError('jw-epub-parser', `Parsing failed. The input was: ${src}`);
throw new JWEPUBParserError('jw-epub-parser', `Parsing failed. The input was: ${src}`);
};

export const extractWTStudyDate = (src: string, lang: string) => {
let varDay;
let monthIndex;
let varYear;
let varDay;
let monthIndex;
let varYear;

const variations = getStudyArticleDateVariations(lang).split('|');
const variations = getStudyArticleDateVariations(lang).split('|');

const patternNumber = '{{ number }}';
const patternDate = '{{ date }}';
const patternNumber = '{{ number }}';
const patternDate = '{{ date }}';

src = src.toLowerCase();
src = src.toLowerCase();

outerLoop: for (const variation of variations) {
let textSearch = variation.toLowerCase().replace(patternDate, '');
textSearch = textSearch.replace(patternNumber, '\\d+');
outerLoop: for (const variation of variations) {
let textSearch = variation.toLowerCase().replace(patternDate, '');
textSearch = textSearch.replace(patternNumber, '\\d+');

const regex = new RegExp(textSearch.trim());
const array = regex.exec(src);
const regex = new RegExp(textSearch.trim());
const array = regex.exec(src);

if (array !== null) {
const dateStartIndex = array[0].length;
if (dateStartIndex > 0) {
const dateValue = src.substring(dateStartIndex);
if (array !== null) {
const dateStartIndex = array[0].length;
if (dateStartIndex > 0) {
const dateValue = src.substring(dateStartIndex);

textSearch = dateValue.trim();
textSearch = dateValue.trim();

const text = textSearch.toLowerCase();
const separators = ['bis', '–', '-', '—', 'do'];
const regex = new RegExp(separators.join('|'), 'gi');
const split = text.split(regex);
const monthNames = getMonthNames(lang);
const text = textSearch.toLowerCase();
const separators = ['bis', '–', '-', '—', 'do'];
const regex = new RegExp(separators.join('|'), 'gi');
const split = text.split(regex);
const monthNames = getMonthNames(lang);

for (const splitted of split) {
for (const month of monthNames) {
const monthLang = month.name.toLowerCase();
let searchKey = `(${monthLang})`;
for (const splitted of split) {
for (const month of monthNames) {
const monthLang = month.name.toLowerCase();
let searchKey = `(${monthLang})`;

if (lang === 'J') {
searchKey = `\\b${searchKey}\\b`;
}
if (lang === 'J') {
searchKey = `\\b${searchKey}\\b`;
}

const regex = new RegExp(searchKey);
const array2 = regex.exec(splitted);
const regex = new RegExp(searchKey);
const array2 = regex.exec(splitted);

if (Array.isArray(array2)) {
const regex = /\d+/g;
const match = textSearch.match(regex);
if (Array.isArray(array2)) {
if (array2.index < 29) {
const regex = /\d+/g;
const match = textSearch.match(regex);

if (lang === 'J') {
varDay = +match![2];
}
if (lang === 'J') {
varDay = +match![2];
}

if (lang !== 'J') {
varDay = +match![0];
}
if (lang !== 'J') {
varDay = +match![0];
}

monthIndex = month.index;
monthIndex = month.index;

const findYear = /\b\d{4}\b/;
const array3 = findYear.exec(dateValue);
if (array3 !== null) {
varYear = +array3[0];
}
const findYear = /\b\d{4}\b/;
const array3 = findYear.exec(dateValue);
if (array3 !== null) {
varYear = +array3[0];
}

break outerLoop;
}
}
}
}
}
}
break outerLoop;
}
}
}
}
}
}
}

if (typeof varDay === 'number' && typeof monthIndex === 'number' && typeof varYear === 'number') {
return { varDay, monthIndex, varYear };
}
if (typeof varDay === 'number' && typeof monthIndex === 'number' && typeof varYear === 'number') {
return { varDay, monthIndex, varYear };
}

throw new JWEPUBParserError('wtstudy', `Parsing failed for Watchtower Study Date. The input was: ${src}`);
throw new JWEPUBParserError('wtstudy', `Parsing failed for Watchtower Study Date. The input was: ${src}`);
};
9 changes: 5 additions & 4 deletions src/node/utils.node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,19 @@ import TG from '../locales/tl-PH/text.json' assert { type: 'json' };
import TK from '../locales/tr-TR/text.json' assert { type: 'json' };
import TND from '../locales/mg-TND/text.json' assert { type: 'json' };
import TNK from '../locales/mg-TNK/text.json' assert { type: 'json' };
import TPO from '../locales/pt-PT/text.json' assert { type: 'json' };
import TTM from '../locales/mg-TTM/text.json' assert { type: 'json' };
import TW from '../locales/tw-TW/text.json' assert { type: 'json' };
import U from '../locales/ru-RU/text.json' assert { type: 'json' };
import VZ from '../locales/mg-VZ/text.json' assert { type: 'json' };
import X from '../locales/de-DE/text.json' assert { type: 'json' };

declare global {
var jw_epub_parser: any;
var jw_epub_parser: any;
}

global.jw_epub_parser = {
languages: { E, F, I, J, K, M, MG, P, S, T, TG, TK, TND, TNK, TTM, TW, U, VZ, X },
path: path,
readFile: readFile,
languages: { E, F, I, J, K, M, MG, P, S, T, TG, TK, TND, TNK, TPO, TTM, TW, U, VZ, X },
path: path,
readFile: readFile,
};

0 comments on commit c34604c

Please sign in to comment.