From 8d0f3eb818f140963ed9f37dc9f13b484484b962 Mon Sep 17 00:00:00 2001 From: Gwen Lg Date: Sat, 6 Jan 2024 15:42:06 +0100 Subject: [PATCH] Add OcrOpt for transfer ocr options It's avoid ocr dependends on command line struct. This help better use as library. --- src/lib.rs | 5 +++-- src/ocr.rs | 32 ++++++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 21a4141..dc5d355 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ mod ocr; mod opt; mod preprocessor; -pub use crate::ocr::process as ocr_process; +pub use crate::ocr::{process as ocr_process, OcrOpt}; pub use crate::opt::Opt; pub use crate::preprocessor::{preprocess_subtitles, ImagePreprocessOpt}; @@ -55,7 +55,8 @@ pub fn run(opt: &Opt) -> anyhow::Result<()> { dump_images(&vobsubs)?; } - let subtitles = ocr::process(vobsubs, opt)?; + let ocr_opt = OcrOpt::new(&opt.tessdata_dir, opt.lang.as_str(), &opt.config, opt.dpi); + let subtitles = ocr::process(vobsubs, &ocr_opt)?; let subtitles = check_subtitles(subtitles)?; // Create subtitle file. diff --git a/src/ocr.rs b/src/ocr.rs index 2f4bfc9..2dcd170 100644 --- a/src/ocr.rs +++ b/src/ocr.rs @@ -1,6 +1,6 @@ use std::{io::Cursor, str::Utf8Error}; -use crate::{opt::Opt, preprocessor::PreprocessedVobSubtitle}; +use crate::preprocessor::PreprocessedVobSubtitle; use image::{ codecs::pnm::{PnmSubtype, SampleEncoding}, DynamicImage, GrayImage, @@ -17,6 +17,30 @@ use thiserror::Error; scoped_thread_local!(static mut TESSERACT: Option); +/// Options for orc with Tesseract +pub struct OcrOpt<'a> { + tessdata_dir: &'a Option, + lang: &'a str, + config: &'a Vec<(Variable, String)>, + dpi: i32, +} + +impl<'a> OcrOpt<'a> { + pub fn new( + tessdata_dir: &'a Option, + lang: &'a str, + config: &'a Vec<(Variable, String)>, + dpi: i32, + ) -> Self { + Self { + tessdata_dir, + lang, + config, + dpi, + } + } +} + #[derive(Error, Debug)] pub enum Error { #[error("Could not build tesseract thread pool")] @@ -42,7 +66,7 @@ pub type Result = std::result::Result; pub fn process( vobsubs: Vec, - opt: &Opt, + opt: &OcrOpt, ) -> Result>> { std::env::set_var("OMP_THREAD_LIMIT", "1"); let subs = rayon::ThreadPoolBuilder::new().build_scoped( @@ -65,8 +89,8 @@ pub fn process( None => { let tesseract = TesseractWrapper::new( opt.tessdata_dir.as_deref(), - &opt.lang, - &opt.config, + opt.lang, + opt.config, )?; maybe_tesseract.insert(tesseract) }