Skip to content

Commit

Permalink
Merge pull request #124 from outbrain/fwq
Browse files Browse the repository at this point in the history
Range-based 16b quantization
  • Loading branch information
SkBlaz authored Jan 15, 2024
2 parents 98e0238 + 13ad276 commit 067554a
Show file tree
Hide file tree
Showing 13 changed files with 274 additions and 38 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ intel-mkl-src = {version= "0.8.1", default-features = false, features=["mkl-stat
log = "0.4.18"
env_logger = "0.10.0"
rustc-hash = "1.1.0"
half = "2.3.1"

[build-dependencies]
cbindgen = "0.23.0"
Expand Down
32 changes: 27 additions & 5 deletions src/block_ffm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use crate::optimizer;
use crate::port_buffer;
use crate::port_buffer::PortBuffer;
use crate::regressor;
use crate::quantization;
use crate::regressor::{BlockCache, FFM_CONTRA_BUF_LEN};

const FFM_STACK_BUF_LEN: usize = 131072;
Expand Down Expand Up @@ -828,18 +829,33 @@ impl<L: OptimizerTrait + 'static> BlockTrait for BlockFFM<L> {
fn write_weights_to_buf(
&self,
output_bufwriter: &mut dyn io::Write,
use_quantization: bool
) -> Result<(), Box<dyn Error>> {
block_helpers::write_weights_to_buf(&self.weights, output_bufwriter)?;
block_helpers::write_weights_to_buf(&self.optimizer, output_bufwriter)?;

if use_quantization {

let quantized_weights = quantization::quantize_ffm_weights(&self.weights);
block_helpers::write_weights_to_buf(&quantized_weights, output_bufwriter, false)?;
} else {
block_helpers::write_weights_to_buf(&self.weights, output_bufwriter, false)?;
}
block_helpers::write_weights_to_buf(&self.optimizer, output_bufwriter, false)?;
Ok(())
}

fn read_weights_from_buf(
&mut self,
input_bufreader: &mut dyn io::Read,
use_quantization: bool
) -> Result<(), Box<dyn Error>> {
block_helpers::read_weights_from_buf(&mut self.weights, input_bufreader)?;
block_helpers::read_weights_from_buf(&mut self.optimizer, input_bufreader)?;

if use_quantization {
quantization::dequantize_ffm_weights(input_bufreader, &mut self.weights);
} else {
block_helpers::read_weights_from_buf(&mut self.weights, input_bufreader, false)?;
}

block_helpers::read_weights_from_buf(&mut self.optimizer, input_bufreader, false)?;
Ok(())
}

Expand All @@ -861,12 +877,18 @@ impl<L: OptimizerTrait + 'static> BlockTrait for BlockFFM<L> {
&self,
input_bufreader: &mut dyn io::Read,
forward: &mut Box<dyn BlockTrait>,
use_quantization: bool
) -> Result<(), Box<dyn Error>> {
let forward = forward
.as_any()
.downcast_mut::<BlockFFM<optimizer::OptimizerSGD>>()
.unwrap();
block_helpers::read_weights_from_buf(&mut forward.weights, input_bufreader)?;

if use_quantization {
quantization::dequantize_ffm_weights(input_bufreader, &mut forward.weights);
} else {
block_helpers::read_weights_from_buf(&mut forward.weights, input_bufreader, false)?;
}
block_helpers::skip_weights_from_buf::<OptimizerData<L>>(
self.ffm_weights_len as usize,
input_bufreader,
Expand Down
2 changes: 2 additions & 0 deletions src/block_helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ macro_rules! assert_epsilon {
pub fn read_weights_from_buf<L>(
weights: &mut Vec<L>,
input_bufreader: &mut dyn io::Read,
_use_quantization: bool
) -> Result<(), Box<dyn Error>> {
if weights.is_empty() {
return Err("Loading weights to unallocated weighs buffer".to_string())?;
Expand Down Expand Up @@ -74,6 +75,7 @@ pub fn skip_weights_from_buf<L>(
pub fn write_weights_to_buf<L>(
weights: &Vec<L>,
output_bufwriter: &mut dyn io::Write,
_use_quantization: bool
) -> Result<(), Box<dyn Error>> {
if weights.is_empty() {
assert!(false);
Expand Down
7 changes: 5 additions & 2 deletions src/block_lr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,21 +263,24 @@ impl<L: OptimizerTrait + 'static> BlockTrait for BlockLR<L> {
fn read_weights_from_buf(
&mut self,
input_bufreader: &mut dyn io::Read,
_use_quantization: bool
) -> Result<(), Box<dyn Error>> {
block_helpers::read_weights_from_buf(&mut self.weights, input_bufreader)
block_helpers::read_weights_from_buf(&mut self.weights, input_bufreader, false)
}

fn write_weights_to_buf(
&self,
output_bufwriter: &mut dyn io::Write,
_use_quantization: bool
) -> Result<(), Box<dyn Error>> {
block_helpers::write_weights_to_buf(&self.weights, output_bufwriter)
block_helpers::write_weights_to_buf(&self.weights, output_bufwriter, false)
}

fn read_weights_from_buf_into_forward_only(
&self,
input_bufreader: &mut dyn io::Read,
forward: &mut Box<dyn BlockTrait>,
_use_quantization: bool
) -> Result<(), Box<dyn Error>> {
let forward = forward
.as_any()
Expand Down
13 changes: 8 additions & 5 deletions src/block_neural.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,18 +430,20 @@ impl<L: OptimizerTrait + 'static> BlockTrait for BlockNeuronLayer<L> {
fn write_weights_to_buf(
&self,
output_bufwriter: &mut dyn io::Write,
_use_quantization: bool
) -> Result<(), Box<dyn Error>> {
block_helpers::write_weights_to_buf(&self.weights, output_bufwriter)?;
block_helpers::write_weights_to_buf(&self.weights_optimizer, output_bufwriter)?;
block_helpers::write_weights_to_buf(&self.weights, output_bufwriter, false)?;
block_helpers::write_weights_to_buf(&self.weights_optimizer, output_bufwriter, false)?;
Ok(())
}

fn read_weights_from_buf(
&mut self,
input_bufreader: &mut dyn io::Read,
_use_quantization: bool
) -> Result<(), Box<dyn Error>> {
block_helpers::read_weights_from_buf(&mut self.weights, input_bufreader)?;
block_helpers::read_weights_from_buf(&mut self.weights_optimizer, input_bufreader)?;
block_helpers::read_weights_from_buf(&mut self.weights, input_bufreader, false)?;
block_helpers::read_weights_from_buf(&mut self.weights_optimizer, input_bufreader, false)?;
Ok(())
}

Expand All @@ -464,12 +466,13 @@ impl<L: OptimizerTrait + 'static> BlockTrait for BlockNeuronLayer<L> {
&self,
input_bufreader: &mut dyn io::Read,
forward: &mut Box<dyn BlockTrait>,
_use_quantization: bool
) -> Result<(), Box<dyn Error>> {
let forward = forward
.as_any()
.downcast_mut::<BlockNeuronLayer<optimizer::OptimizerSGD>>()
.unwrap();
block_helpers::read_weights_from_buf(&mut forward.weights, input_bufreader)?;
block_helpers::read_weights_from_buf(&mut forward.weights, input_bufreader, false)?;
block_helpers::skip_weights_from_buf::<OptimizerData<L>>(
self.weights_len as usize,
input_bufreader,
Expand Down
5 changes: 5 additions & 0 deletions src/cmdline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,11 @@ pub fn create_expected_args<'a>() -> App<'a, 'a> {
.value_name("num_threads")
.help("Number of threads to use with hogwild training")
.takes_value(true))
.arg(Arg::with_name("weight_quantization")
.long("weight_quantization")
.value_name("Whether to consider weight quantization when reading/writing weights.")
.help("Half-float quantization trigger (inference only is the suggested use).")
.takes_value(false))
.arg(Arg::with_name("predictions_stdout")
.long("predictions_stdout")
.value_name("Output predictions to stdout")
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod quantization;
pub mod block_ffm;
pub mod block_helpers;
pub mod block_loss_functions;
Expand Down Expand Up @@ -29,6 +30,7 @@ pub mod vwmap;

extern crate blas;
extern crate intel_mkl_src;
extern crate half;

use crate::feature_buffer::FeatureBufferTranslator;
use crate::multithread_helpers::BoxedRegressorTrait;
Expand Down
10 changes: 7 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use std::time::Instant;

extern crate blas;
extern crate intel_mkl_src;
extern crate half;

#[macro_use]
extern crate nom;
Expand Down Expand Up @@ -114,7 +115,7 @@ fn main2() -> Result<(), Box<dyn Error>> {
};

let testonly = cl.is_present("testonly");

let quantize_weights = cl.is_present("weight_quantization");
let final_regressor_filename = cl.value_of("final_regressor");
let output_pred_sto: bool = cl.is_present("predictions_stdout");
if let Some(filename) = final_regressor_filename {
Expand Down Expand Up @@ -148,8 +149,11 @@ fn main2() -> Result<(), Box<dyn Error>> {
let (mut mi2, vw2, re_fixed) =
new_regressor_from_filename(filename, true, Option::Some(&cl))?;
mi2.optimizer = Optimizer::SGD;
if cl.is_present("weight_quantization") {
mi2.dequantize_weights = Some(true);
}
if let Some(filename1) = inference_regressor_filename {
save_regressor_to_filename(filename1, &mi2, &vw2, re_fixed).unwrap()
save_regressor_to_filename(filename1, &mi2, &vw2, re_fixed, quantize_weights).unwrap()
}
} else {
let vw: VwNamespaceMap;
Expand Down Expand Up @@ -296,7 +300,7 @@ fn main2() -> Result<(), Box<dyn Error>> {
log::info!("Elapsed: {:.2?} rows: {}", elapsed, example_num);

if let Some(filename) = final_regressor_filename {
save_sharable_regressor_to_filename(filename, &mi, &vw, sharable_regressor)
save_sharable_regressor_to_filename(filename, &mi, &vw, sharable_regressor, quantize_weights)
.unwrap()
}
}
Expand Down
4 changes: 4 additions & 0 deletions src/model_instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ pub struct ModelInstance {
pub optimizer: Optimizer,

pub transform_namespaces: feature_transform_parser::NamespaceTransforms,

pub dequantize_weights: Option<bool>,

}

fn default_u32_zero() -> u32 {
Expand Down Expand Up @@ -142,6 +145,7 @@ impl ModelInstance {
optimizer: Optimizer::SGD,
transform_namespaces: feature_transform_parser::NamespaceTransforms::new(),
nn_config: NNConfig::new(),
dequantize_weights: Some(false),
};
Ok(mi)
}
Expand Down
41 changes: 28 additions & 13 deletions src/persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ pub fn save_sharable_regressor_to_filename(
mi: &model_instance::ModelInstance,
vwmap: &vwmap::VwNamespaceMap,
re: BoxedRegressorTrait,
quantize_weights: bool
) -> Result<(), Box<dyn Error>> {
let output_bufwriter = &mut io::BufWriter::new(
fs::File::create(filename)
Expand All @@ -65,7 +66,7 @@ pub fn save_sharable_regressor_to_filename(
write_regressor_header(output_bufwriter)?;
vwmap.save_to_buf(output_bufwriter)?;
mi.save_to_buf(output_bufwriter)?;
re.write_weights_to_buf(output_bufwriter)?;
re.write_weights_to_buf(output_bufwriter, quantize_weights)?;
Ok(())
}

Expand All @@ -74,6 +75,7 @@ pub fn save_regressor_to_filename(
mi: &model_instance::ModelInstance,
vwmap: &vwmap::VwNamespaceMap,
re: Regressor,
quantize_weights: bool,
) -> Result<(), Box<dyn Error>> {
let output_bufwriter = &mut io::BufWriter::new(
fs::File::create(filename)
Expand All @@ -82,7 +84,7 @@ pub fn save_regressor_to_filename(
write_regressor_header(output_bufwriter)?;
vwmap.save_to_buf(output_bufwriter)?;
mi.save_to_buf(output_bufwriter)?;
re.write_weights_to_buf(output_bufwriter)?;
re.write_weights_to_buf(output_bufwriter, quantize_weights)?;
Ok(())
}

Expand Down Expand Up @@ -136,15 +138,28 @@ pub fn new_regressor_from_filename(
> {
let mut input_bufreader = io::BufReader::new(fs::File::open(filename).unwrap());
let (mut mi, vw, mut re) = load_regressor_without_weights(&mut input_bufreader, cmd_arguments)?;

// reading logic is for some reason different, so doing this again here ..

let mut quantization_flag = false;
let mut conversion_flag = false;

if cmd_arguments.is_some(){
quantization_flag = mi.dequantize_weights.unwrap_or(false);
conversion_flag = cmd_arguments.unwrap().is_present("convert_inference_regressor");
}

let weight_quantization = quantization_flag && !conversion_flag;
log::info!("Reading weights, dequantization enabled: {}", weight_quantization);
if !immutable {
re.allocate_and_init_weights(&mi);
re.overwrite_weights_from_buf(&mut input_bufreader)?;
re.overwrite_weights_from_buf(&mut input_bufreader, weight_quantization)?;
Ok((mi, vw, re))
} else {
mi.optimizer = model_instance::Optimizer::SGD;
let mut immutable_re = re.immutable_regressor_without_weights(&mi)?;
immutable_re.allocate_and_init_weights(&mi);
re.into_immutable_regressor_from_buf(&mut immutable_re, &mut input_bufreader)?;
re.into_immutable_regressor_from_buf(&mut immutable_re, &mut input_bufreader, weight_quantization)?;
Ok((mi, vw, immutable_re))
}
}
Expand All @@ -154,9 +169,9 @@ pub fn hogwild_load(re: &mut regressor::Regressor, filename: &str) -> Result<(),
let (_, _, mut re_hw) = load_regressor_without_weights(&mut input_bufreader, None)?;
// TODO: Here we should do safety comparison that the regressor is really the same;
if !re.immutable {
re.overwrite_weights_from_buf(&mut input_bufreader)?;
re.overwrite_weights_from_buf(&mut input_bufreader, false)?;
} else {
re_hw.into_immutable_regressor_from_buf(re, &mut input_bufreader)?;
re_hw.into_immutable_regressor_from_buf(re, &mut input_bufreader, false)?;
}
Ok(())
}
Expand Down Expand Up @@ -209,7 +224,7 @@ B,featureB
let rr = regressor::get_regressor_with_weights(&mi);
let dir = tempfile::tempdir().unwrap();
let regressor_filepath = dir.path().join("test_regressor.fw");
save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, rr).unwrap();
save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, rr, false).unwrap();
}

fn lr_vec(v: Vec<feature_buffer::HashAndValue>) -> feature_buffer::FeatureBuffer {
Expand Down Expand Up @@ -260,7 +275,7 @@ B,featureB
// Now we test conversion to fixed regressor
{
mi.optimizer = model_instance::Optimizer::SGD;
let re_fixed = re.immutable_regressor(&mi).unwrap();
let re_fixed = re.immutable_regressor(&mi, false).unwrap();
// predict with the same feature vector
assert_eq!(re_fixed.predict(fbuf, &mut pb), expected_result);
mi.optimizer = model_instance::Optimizer::AdagradFlex;
Expand All @@ -269,7 +284,7 @@ B,featureB
{
let dir = tempdir().unwrap();
let regressor_filepath = dir.path().join("test_regressor2.fw");
save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, re).unwrap();
save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, re, false).unwrap();

// a) load as regular regressor
let (_mi2, _vw2, mut re2) =
Expand Down Expand Up @@ -364,7 +379,7 @@ B,featureB
// Now we test conversion to fixed regressor
{
mi.optimizer = Optimizer::SGD;
let re_fixed = re.immutable_regressor(&mi).unwrap();
let re_fixed = re.immutable_regressor(&mi, false).unwrap();
// predict with the same feature vector
mi.optimizer = Optimizer::AdagradFlex;
assert_epsilon!(re_fixed.predict(fbuf, &mut pb), expected_result);
Expand All @@ -373,7 +388,7 @@ B,featureB
{
let dir = tempdir().unwrap();
let regressor_filepath = dir.path().join("test_regressor2.fw");
save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, re).unwrap();
save_regressor_to_filename(regressor_filepath.to_str().unwrap(), &mi, &vw, re, false).unwrap();

// a) load as regular regressor
let (_mi2, _vw2, mut re2) =
Expand Down Expand Up @@ -537,14 +552,14 @@ B,featureB
.to_str()
.unwrap()
.to_owned();
save_regressor_to_filename(&regressor_filepath_1, &mi, &vw, re_1).unwrap();
save_regressor_to_filename(&regressor_filepath_1, &mi, &vw, re_1, false).unwrap();
let regressor_filepath_2 = dir
.path()
.join("test_regressor2.fw")
.to_str()
.unwrap()
.to_owned();
save_regressor_to_filename(&regressor_filepath_2, &mi, &vw, re_2).unwrap();
save_regressor_to_filename(&regressor_filepath_2, &mi, &vw, re_2, false).unwrap();

// The mutable path
let (_mi1, _vw1, mut new_re_1) =
Expand Down
Loading

0 comments on commit 067554a

Please sign in to comment.