Skip to content

Commit

Permalink
Add OcrOpt for transfer ocr options
Browse files Browse the repository at this point in the history
It's avoid ocr dependends on command line struct.
This help better use as library.
  • Loading branch information
gwen-lg committed Feb 3, 2024
1 parent ba7c71b commit 8d0f3eb
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
5 changes: 3 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod ocr;
mod opt;
mod preprocessor;

pub use crate::ocr::process as ocr_process;
pub use crate::ocr::{process as ocr_process, OcrOpt};
pub use crate::opt::Opt;
pub use crate::preprocessor::{preprocess_subtitles, ImagePreprocessOpt};

Expand Down Expand Up @@ -55,7 +55,8 @@ pub fn run(opt: &Opt) -> anyhow::Result<()> {
dump_images(&vobsubs)?;
}

let subtitles = ocr::process(vobsubs, opt)?;
let ocr_opt = OcrOpt::new(&opt.tessdata_dir, opt.lang.as_str(), &opt.config, opt.dpi);
let subtitles = ocr::process(vobsubs, &ocr_opt)?;
let subtitles = check_subtitles(subtitles)?;

// Create subtitle file.
Expand Down
32 changes: 28 additions & 4 deletions src/ocr.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{io::Cursor, str::Utf8Error};

use crate::{opt::Opt, preprocessor::PreprocessedVobSubtitle};
use crate::preprocessor::PreprocessedVobSubtitle;
use image::{
codecs::pnm::{PnmSubtype, SampleEncoding},
DynamicImage, GrayImage,
Expand All @@ -17,6 +17,30 @@ use thiserror::Error;

scoped_thread_local!(static mut TESSERACT: Option<TesseractWrapper>);

/// Options for orc with Tesseract
pub struct OcrOpt<'a> {
tessdata_dir: &'a Option<String>,
lang: &'a str,
config: &'a Vec<(Variable, String)>,
dpi: i32,
}

impl<'a> OcrOpt<'a> {
pub fn new(
tessdata_dir: &'a Option<String>,
lang: &'a str,
config: &'a Vec<(Variable, String)>,
dpi: i32,
) -> Self {
Self {
tessdata_dir,
lang,
config,
dpi,
}
}
}

#[derive(Error, Debug)]
pub enum Error {
#[error("Could not build tesseract thread pool")]
Expand All @@ -42,7 +66,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;

pub fn process(
vobsubs: Vec<PreprocessedVobSubtitle>,
opt: &Opt,
opt: &OcrOpt,
) -> Result<Vec<Result<(TimeSpan, String)>>> {
std::env::set_var("OMP_THREAD_LIMIT", "1");
let subs = rayon::ThreadPoolBuilder::new().build_scoped(
Expand All @@ -65,8 +89,8 @@ pub fn process(
None => {
let tesseract = TesseractWrapper::new(
opt.tessdata_dir.as_deref(),
&opt.lang,
&opt.config,
opt.lang,
opt.config,
)?;
maybe_tesseract.insert(tesseract)
}
Expand Down

0 comments on commit 8d0f3eb

Please sign in to comment.