Skip to content

Commit

Permalink
Minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica committed Aug 16, 2024
1 parent 30552e7 commit 4e7859f
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
2 changes: 1 addition & 1 deletion js/import/convertPageStext.js
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ export async function convertPageStext({ ocrStr, n }) {

// Label as `smallCapsAlt` rather than `smallCaps`, as we confirm the word is all caps before marking as `smallCaps`.
smallCapsCurrentAlt = smallCapsCurrentAlt ?? smallCapsAltArr[smallCapsAltArr.length - 1];
smallCapsCurrent = /(small\W?cap)|sc$/i.test(fontNameStrI);
smallCapsCurrent = /(small\W?cap)|(sc$)|(caps$)/i.test(fontNameStrI);
smallCapsWord = smallCapsCurrent;

if (/italic/i.test(fontNameStrI) || /-\w*ital/i.test(fontNameStrI)) {
Expand Down
24 changes: 22 additions & 2 deletions js/objects/ocrObjects.js
Original file line number Diff line number Diff line change
Expand Up @@ -403,8 +403,28 @@ function calcWordAngleAdj(word) {
* @param {string} text
*/
function replaceLigatures(text) {
return text.replace(//g, 'fl').replace(//g, 'fi').replace(//g, 'ff').replace(//g, 'ffi')
.replace(//g, 'ffl');
return text.replace(/IJ/g, 'IJ')
.replace(/ij/g, 'ij')
.replace(/ʼn/g, 'ʼn')
.replace(/DZ/g, 'DZ')
.replace(/Dz/g, 'Dz')
.replace(/dz/g, 'dz')
.replace(/DŽ/g, 'DŽ')
.replace(/Dž/g, 'Dž')
.replace(/dž/g, 'dž')
.replace(/LJ/g, 'LJ')
.replace(/Lj/g, 'Lj')
.replace(/lj/g, 'lj')
.replace(/NJ/g, 'NJ')
.replace(/Nj/g, 'Nj')
.replace(/nj/g, 'nj')
.replace(//g, 'ff')
.replace(//g, 'fi')
.replace(//g, 'fl')
.replace(//g, 'ffi')
.replace(//g, 'ffl')
.replace(//g, 'ſt')
.replace(//g, 'st');
}

/**
Expand Down

0 comments on commit 4e7859f

Please sign in to comment.