diff --git a/configs/neuralangelo-colmap_dense.yaml b/configs/neuralangelo-colmap_dense.yaml
index efa1853..839bc5f 100644
--- a/configs/neuralangelo-colmap_dense.yaml
+++ b/configs/neuralangelo-colmap_dense.yaml
@@ -5,7 +5,7 @@ seed: 42
 dataset:
   name: colmap
   root_dir: ???
-  img_downscale: 1 # specify training image size by either img_wh or img_downscale
+  img_downscale: 2 # specify training image size by either img_wh or img_downscale
   up_est_method: ground # if true, use estimated ground plane normal direction as up direction
   center_est_method: lookat
   n_test_traj_steps: 30
@@ -15,7 +15,7 @@ dataset:
 
 model:
   name: neus
-  radius: 2.5
+  radius: 1.5
   num_samples_per_ray: 1024
   train_num_rays: 128
   max_train_num_rays: 8192
diff --git a/configs/neuralangelo-colmap_sparse-wmask.yaml b/configs/neuralangelo-colmap_sparse-wmask.yaml
index 5f9b7f5..e99f1d0 100644
--- a/configs/neuralangelo-colmap_sparse-wmask.yaml
+++ b/configs/neuralangelo-colmap_sparse-wmask.yaml
@@ -134,8 +134,8 @@ trainer:
   max_steps: 20000
   log_every_n_steps: 100
   num_sanity_val_steps: 0
-  val_check_interval: 5000
+  val_check_interval: 10000
   limit_train_batches: 1.0
-  limit_val_batches: 2
+  limit_val_batches: 1
   enable_progress_bar: true
   precision: 16
\ No newline at end of file
diff --git a/configs/neuralangelo-colmap_sparse-wreflection.yaml b/configs/neuralangelo-colmap_sparse-wreflection.yaml
new file mode 100644
index 0000000..aac5d75
--- /dev/null
+++ b/configs/neuralangelo-colmap_sparse-wreflection.yaml
@@ -0,0 +1,183 @@
+name: neuralangelo-colmap_sparse-wreflection-${basename:${dataset.root_dir}}
+tag: ""
+seed: 42
+
+dataset:
+  name: colmap
+  root_dir: ???
+  img_downscale: 4 # specify training image size by either img_wh or img_downscale
+  up_est_method: ground # if true, use estimated ground plane normal direction as up direction
+  center_est_method: lookat
+  n_test_traj_steps: 30
+  apply_mask: false
+  load_data_on_gpu: false
+  dense_pcd_path: null
+
+model:
+  name: neus
+  radius: 1.5
+  num_samples_per_ray: 1024
+  train_num_rays: 128
+  max_train_num_rays: 8192
+  grid_prune: true
+  grid_prune_occ_thre: 0.001
+  dynamic_ray_sampling: true
+  batch_image_sampling: true
+  randomized: true
+  ray_chunk: 2048
+  cos_anneal_end: 20000
+  learned_background: true
+  background_color: random
+  variance:
+    init_val: 0.3
+    modulate: false
+  geometry:
+    name: volume-sdf
+    radius: ${model.radius}
+    feature_dim: 65
+    grad_type: analytic
+    finite_difference_eps: progressive
+    isosurface:
+      method: mc
+      resolution: 512
+      chunk: 2097152
+      threshold: 0.001
+    xyz_encoding_config:
+      otype: ProgressiveBandHashGrid
+      n_levels: 16
+      n_features_per_level: 2
+      log2_hashmap_size: 19
+      base_resolution: 32
+      per_level_scale: 1.3195079107728942
+      include_xyz: true
+      start_level: 4
+      start_step: 5000
+      update_steps: 1000
+    mlp_network_config:
+      otype: VanillaMLP
+      activation: ReLU
+      output_activation: none
+      n_neurons: 64
+      n_hidden_layers: 2
+      sphere_init: true
+      sphere_init_radius: 0.5
+      weight_norm: true
+  texture:
+    name: volume-dual-colorV2
+    input_feature_dim: ${add:${model.geometry.feature_dim},3} # surface normal as additional input
+    diffuse_warmup_steps: 5000 
+    dir_encoding_config:
+      otype: SphericalHarmonics
+      degree: 4
+    mlp_network_config:
+      otype: VanillaMLP
+      activation: ReLU
+      output_activation: none
+      n_neurons: 256
+      n_hidden_layers: 4
+    color_activation: sigmoid
+  # background model configurations
+  num_samples_per_ray_bg: 256
+  geometry_bg:
+    name: volume-density
+    radius: ${model.radius}
+    feature_dim: 8
+    density_activation: trunc_exp
+    density_bias: -1
+    isosurface: null
+    xyz_encoding_config:
+      otype: ProgressiveBandHashGrid
+      n_levels: 16
+      n_features_per_level: 2
+      log2_hashmap_size: 19
+      base_resolution: 32
+      per_level_scale: 1.3195079107728942
+      include_xyz: true
+      start_level: 4
+      start_step: 5000
+      update_steps: 1000
+    mlp_network_config:
+      otype: VanillaMLP
+      activation: ReLU
+      output_activation: none
+      n_neurons: 64
+      n_hidden_layers: 1
+  texture_bg:
+    name: volume-radiance
+    input_feature_dim: ${model.geometry_bg.feature_dim}
+    dir_encoding_config:
+      otype: SphericalHarmonics
+      degree: 4
+    mlp_network_config:
+      otype: VanillaMLP
+      activation: ReLU
+      output_activation: none
+      n_neurons: 64
+      n_hidden_layers: 2
+    color_activation: sigmoid
+
+system:
+  name: neus-system
+  loss:
+    lambda_sdf_l1: 0
+    lambda_normal: 0.
+    lambda_rgb_mse: 5.
+    lambda_rgb_l1: 0.
+    lambda_mask: 0.0
+    lambda_eikonal: 0.1
+    lambda_curvature: [0, 0, 5.e-2, 5000]
+    lambda_sparsity: 0.0
+    lambda_distortion: 0.0
+    lambda_distortion_bg: 0.0
+    lambda_opaque: 0.0
+    sparsity_scale: 1.
+  optimizer:
+    name: AdamW
+    args:
+      lr: 0.01
+      betas: [0.9, 0.99]
+      eps: 1.e-15
+    params:
+      geometry:
+        lr: 0.01
+      texture:
+        lr: 0.01
+      geometry_bg:
+        lr: 0.01
+      texture_bg:
+        lr: 0.01
+      variance:
+        lr: 0.001
+  warmup_steps: 500
+  scheduler:
+    name: SequentialLR
+    interval: step
+    milestones:
+      - ${system.warmup_steps}
+    schedulers:
+      - name: LinearLR # linear warm-up in the first system.warmup_steps steps
+        args:
+          start_factor: 0.01
+          end_factor: 1.0
+          total_iters: ${system.warmup_steps}
+      - name: ExponentialLR
+        args:
+          gamma: ${calc_exp_lr_decay_rate:0.1,${sub:${trainer.max_steps},${system.warmup_steps}}}
+
+checkpoint:
+  save_top_k: -1
+  every_n_train_steps: ${trainer.max_steps}
+
+export:
+  chunk_size: 2097152
+  export_vertex_color: True
+
+trainer:
+  max_steps: 20000
+  log_every_n_steps: 100
+  num_sanity_val_steps: 0
+  val_check_interval: 5000
+  limit_train_batches: 1.0
+  limit_val_batches: 1
+  enable_progress_bar: true
+  precision: 16
\ No newline at end of file
diff --git a/configs/neuralangelo-colmap_sparse.yaml b/configs/neuralangelo-colmap_sparse.yaml
index f44fe2f..d6f3e75 100644
--- a/configs/neuralangelo-colmap_sparse.yaml
+++ b/configs/neuralangelo-colmap_sparse.yaml
@@ -8,7 +8,7 @@ dataset:
   img_downscale: 2 # specify training image size by either img_wh or img_downscale
   up_est_method: ground # if true, use estimated ground plane normal direction as up direction
   center_est_method: lookat
-  n_test_traj_steps: 30
+  n_test_traj_steps: 6
   apply_mask: false
   load_data_on_gpu: false
   dense_pcd_path: null
@@ -176,8 +176,8 @@ trainer:
   max_steps: 20000
   log_every_n_steps: 100
   num_sanity_val_steps: 0
-  val_check_interval: 5000
+  val_check_interval: 10000
   limit_train_batches: 1.0
-  limit_val_batches: 2
+  limit_val_batches: 1
   enable_progress_bar: true
   precision: 16
\ No newline at end of file
diff --git a/datasets/colmap.py b/datasets/colmap.py
index 806a1c2..d112b45 100644
--- a/datasets/colmap.py
+++ b/datasets/colmap.py
@@ -314,7 +314,6 @@ def setup(self, config, split):
         self.all_points = self.all_points.float()
         self.pts3d_normal = self.pts3d_normal.float()
         self.all_points_ = contract_to_unisphere(self.all_points, 1.0, ContractionType.AABB) # points normalized to (0, 1)
-        self.all_fg_indexs, self.all_bg_indexs = self.all_fg_indexs.to(self.rank), self.all_bg_indexs.to(self.rank)
 
     def query_radius_occ(self, query_points, radius=0.01):
         
diff --git a/export.py b/export.py
index f914651..a864078 100644
--- a/export.py
+++ b/export.py
@@ -5,33 +5,19 @@
 import logging
 from datetime import datetime
 import trimesh
+import numpy as np
 
 logging.basicConfig(level=logging.INFO)
 
-def decimate_mesh(mesh: str, decimation_factor: float):
-    logging.info(f"Original mesh with {len(mesh.faces)} faces.")
-
-    # Decimate the mesh
-    if decimation_factor < 1:
-        decimation_factor = int(len(mesh.faces) * decimation_factor)
-    else:
-        decimation_factor = int(decimation_factor)
-
-    mesh = mesh.simplify_quadratic_decimation(decimation_factor)
-    logging.info(f"Decimated mesh to {len(mesh.faces)} faces.")
-
-    return mesh
     
 def main():
     logging.info("Start exporting.")
     parser = argparse.ArgumentParser()
     parser.add_argument('--gpu', default='0', help='GPU(s) to be used')
     parser.add_argument('--exp_dir', required=True)
-    parser.add_argument('--output-dir', required=True)
-    
-    parser.add_argument('--decimate', type=float, help='Specifies the desired final size of the mesh. \
-                        If the number is less than 1, it represents the final size as a percentage of the initial size. \
-                        If the number is greater than 1, it represents the desired number of faces.')
+    parser.add_argument('--flip', action='store_true')
+    parser.add_argument('--res', default=1024)
+    parser.add_argument('--output-dir', default='results')
     args, extras = parser.parse_known_args()
 
     # set CUDA_VISIBLE_DEVICES then import pytorch-lightning
@@ -39,15 +25,14 @@ def main():
     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
     n_gpus = len(args.gpu.split(','))
 
-    code_dir = os.path.join(args.exp_dir, 'code')
+    # code_dir = os.path.join(args.exp_dir, 'code')
     ckpt_dir = os.path.join(args.exp_dir, 'ckpt')
     latest_ckpt = sorted(os.listdir(ckpt_dir), key=lambda s: int(s.split('-')[0].split('=')[1]), reverse=True)[0]
     latest_ckpt = os.path.join(ckpt_dir, latest_ckpt)
     config_path = os.path.join(args.exp_dir, 'config', 'parsed.yaml')
     
-    logging.info(f"Importing modules from cached code: {code_dir}")
-    sys.path.append(code_dir)
-    import datasets
+    # logging.info(f"Importing modules from cached code: {code_dir}")
+    # sys.path.append(code_dir)
     import systems
     import pytorch_lightning as pl
     from utils.misc import load_config    
@@ -55,6 +40,12 @@ def main():
     # parse YAML config to OmegaConf
     logging.info(f"Loading configuration: {config_path}")
     config = load_config(config_path, cli_args=extras)
+    
+    # Update level of ProgressiveBandHashGrid
+    if  config.model.geometry.xyz_encoding_config.otype == 'ProgressiveBandHashGrid':
+        config.model.geometry.xyz_encoding_config.start_level = config.model.geometry.xyz_encoding_config.n_levels
+    config.model.geometry.isosurface.resolution = args.res
+    config.export.export_vertex_color = True
     config.cmd_args = vars(args)
     
     if 'seed' not in config:
@@ -63,19 +54,29 @@ def main():
     logging.info(f"Creating system: {config.system.name}")
     system = systems.make(config.system.name, config, load_from_checkpoint=latest_ckpt)
     system.model.cuda()
-    mesh = system.model.isosurface()
+    mesh = system.model.export(config.export)
+    
+    mesh['v_pos'] = mesh['v_pos'][:, [0, 2, 1]].numpy()
+    if args.flip:
+        mesh['t_pos_idx'] = mesh['t_pos_idx'].numpy()[:, [0, 2, 1]]
+    else:
+        mesh['t_pos_idx'] = np.fliplr(mesh['t_pos_idx'].numpy())[:, [0, 2, 1]]
+    
     mesh = trimesh.Trimesh(
-        vertices=mesh['v_pos'].numpy(),
-        faces=mesh['t_pos_idx'].numpy()
+            vertices=mesh['v_pos'],
+            faces=mesh['t_pos_idx'],
+            vertex_colors=mesh['v_rgb'].numpy(),
+            vertex_normals=mesh['v_norm'].numpy()
+        )
+    mesh.visual.material = trimesh.visual.material.PBRMaterial(
+        metallicFactor=0.25,
+        roughnessFactor=0.25
     )
     
-    if args.decimate > 0:
-        logging.info("Decimating mesh.")
-        mesh = decimate_mesh(mesh, args.decimate)
-    
     os.makedirs(args.output_dir, exist_ok=True)
     logging.info("Exporting mesh.")
-    mesh.export(os.path.join(args.output_dir, 'iso_mesh.ply'))
+    mesh.export(os.path.join(args.output_dir, f'{config.name}.glb'))
+    mesh.export(os.path.join(args.output_dir, f'{config.name}.obj'))
     logging.info("Export finished successfully.")
     
 if __name__ == '__main__':
diff --git a/models/geometry.py b/models/geometry.py
index 300962e..972c6d5 100644
--- a/models/geometry.py
+++ b/models/geometry.py
@@ -14,6 +14,7 @@
 from systems.utils import update_module_step
 from nerfacc import ContractionType
 
+import trimesh
 
 def contract_to_unisphere(x, radius, contraction_type):
     if contraction_type == ContractionType.AABB:
@@ -29,21 +30,44 @@ def contract_to_unisphere(x, radius, contraction_type):
         raise NotImplementedError
     return x
 
-
+'''
+Modified from https://github.com/NVlabs/neuralangelo/blob/main/projects/neuralangelo/scripts/extract_mesh.py
+'''
 class MarchingCubeHelper(nn.Module):
-    def __init__(self, resolution, use_torch=True):
+    def __init__(self, sdf_func, bounds, resolution, block_res=256, method='mc'):
         super().__init__()
+        self.sdf_func = sdf_func
+        self.bounds = bounds
         self.resolution = resolution
-        self.use_torch = use_torch
+        self.intv = 2.0 / self.resolution
+        self.block_res = block_res
         self.points_range = (0, 1)
-        if self.use_torch:
-            import torchmcubes
-            self.mc_func = torchmcubes.marching_cubes
+        self.method = method
+        try:
+            import cumcubes
+        except:
+            print("Cannot find cuda accelerated marching cube, downgraded to cpu version!")
+            self.method = 'mc'
+ 
+        if self.method == 'CuMCubes':
+            self.mc_func = cumcubes.marching_cubes
         else:
             import mcubes
             self.mc_func = mcubes.marching_cubes
         self.verts = None
+        self._create_lattice_grid()
 
+    def _create_lattice_grid(self):
+        ((x_min, x_max), (y_min, y_max), (z_min, z_max)) = self.bounds
+        self.x_grid = torch.arange(x_min, x_max, self.intv)
+        self.y_grid = torch.arange(y_min, y_max, self.intv)
+        self.z_grid = torch.arange(z_min, z_max, self.intv)
+        res_x, res_y, res_z = len(self.x_grid), len(self.y_grid), len(self.z_grid)
+        print("Extracting surface at resolution", res_x, res_y, res_z)
+        self.num_blocks_x = int(np.ceil(res_x / self.block_res))
+        self.num_blocks_y = int(np.ceil(res_y / self.block_res))
+        self.num_blocks_z = int(np.ceil(res_z / self.block_res))
+        
     def grid_vertices(self):
         if self.verts is None:
             x, y, z = torch.linspace(*self.points_range, self.resolution), torch.linspace(*self.points_range, self.resolution), torch.linspace(*self.points_range, self.resolution)
@@ -52,31 +76,51 @@ def grid_vertices(self):
             self.verts = verts
         return self.verts
 
-    def forward(self, level, threshold=0.):
-        level = level.float().view(self.resolution, self.resolution, self.resolution)
-        if self.use_torch:
-            verts, faces = self.mc_func(level.to(get_rank()), threshold)
+    def forward_(self, level, threshold=0.):
+        if self.method == 'CuMCubes':
+            verts, faces = self.mc_func(-level.to(get_rank()), threshold)
             verts, faces = verts.cpu(), faces.cpu().long()
         else:
-            verts, faces = self.mc_func(-level.numpy(), threshold) # transform to numpy
+            verts, faces = self.mc_func(-level.cpu().numpy(), threshold) # transform to numpy
             verts, faces = torch.from_numpy(verts.astype(np.float32)), torch.from_numpy(faces.astype(np.int64)) # transform back to pytorch
-        verts = verts / (self.resolution - 1.)
+        return verts, faces
+    
+    def forward(self, threshold=0.):
+        mesh_blocks = []
+        for idx in range(self.num_blocks_x * self.num_blocks_y * self.num_blocks_z):
+            block_idx_x = idx // (self.num_blocks_y * self.num_blocks_z)
+            block_idx_y = (idx // self.num_blocks_z) % self.num_blocks_y
+            block_idx_z = idx % self.num_blocks_z
+            xi = block_idx_x * self.block_res
+            yi = block_idx_y * self.block_res
+            zi = block_idx_z * self.block_res
+            x, y, z = torch.meshgrid(self.x_grid[xi:xi+self.block_res+1],
+                                    self.y_grid[yi:yi+self.block_res+1],
+                                    self.z_grid[zi:zi+self.block_res+1], indexing="ij")
+            xyz = torch.stack([x, y, z], dim=-1)
+            sdf = self.sdf_func(xyz.cuda())
+            verts, faces = self.forward_(sdf, threshold)
+            if verts.shape[0] > 0:
+                verts = verts * self.intv + xyz[0, 0, 0]
+                mesh = trimesh.Trimesh(verts.cpu().numpy(), faces.cpu().numpy())
+            else:
+                mesh = trimesh.Trimesh()
+            mesh_blocks.append(mesh)
+        mesh = trimesh.util.concatenate(mesh_blocks)
         return {
-            'v_pos': verts,
-            't_pos_idx': faces
+            'v_pos': torch.from_numpy(np.array(mesh.vertices)),
+            't_pos_idx': torch.from_numpy(np.array(mesh.faces))
         }
-
-
 class BaseImplicitGeometry(BaseModel):
     def __init__(self, config):
         super().__init__(config)
-        if self.config.isosurface is not None:
-            assert self.config.isosurface.method in ['mc', 'mc-torch']
-            if self.config.isosurface.method == 'mc-torch':
-                raise NotImplementedError("Please do not use mc-torch. It currently has some scaling issues I haven't fixed yet.")
-            self.helper = MarchingCubeHelper(self.config.isosurface.resolution, use_torch=self.config.isosurface.method=='mc-torch')
         self.radius = self.config.radius
         self.contraction_type = None # assigned in system
+        self.sdf_func = lambda x: -self.forward_level(x)
+        self.bounds = np.array([[-self.radius, self.radius], [-self.radius, self.radius], [-self.radius, self.radius]])
+        if self.config.isosurface is not None:
+            assert self.config.isosurface.method in ['mc', 'CuMCubes']
+            self.helper = MarchingCubeHelper(self.sdf_func, self.bounds, int(self.config.isosurface.resolution), method=self.config.isosurface.method)
 
     def forward_level(self, points):
         raise NotImplementedError
@@ -91,26 +135,18 @@ def batch_func(x):
             rv = self.forward_level(x).cpu()
             cleanup()
             return rv
-    
-        level = chunk_batch(batch_func, self.config.isosurface.chunk, True, self.helper.grid_vertices())
-        mesh = self.helper(level, threshold=self.config.isosurface.threshold)
-        mesh['v_pos'] = torch.stack([
-            scale_anything(mesh['v_pos'][...,0], (0, 1), (vmin[0], vmax[0])),
-            scale_anything(mesh['v_pos'][...,1], (0, 1), (vmin[1], vmax[1])),
-            scale_anything(mesh['v_pos'][...,2], (0, 1), (vmin[2], vmax[2]))
-        ], dim=-1)
+
+        bounds = np.array([[vmin[0], vmax[0]], [vmin[1], vmax[1]], [vmin[2], vmax[2]]])
+        sdf_func = lambda x: -self.forward_level(x)
+
         return mesh
 
     @torch.no_grad()
     def isosurface(self):
         if self.config.isosurface is None:
             raise NotImplementedError
-        mesh_coarse = self.isosurface_((-self.radius, -self.radius, -self.radius), (self.radius, self.radius, self.radius))
-        vmin, vmax = mesh_coarse['v_pos'].amin(dim=0), mesh_coarse['v_pos'].amax(dim=0)
-        vmin_ = (vmin - (vmax - vmin) * 0.1).clamp(-self.radius, self.radius)
-        vmax_ = (vmax + (vmax - vmin) * 0.1).clamp(-self.radius, self.radius)
-        mesh_fine = self.isosurface_(vmin_, vmax_)
-        return mesh_fine 
+        mesh = self.helper(threshold=0.001)
+        return mesh
 
 
 @models.register('volume-density')
diff --git a/models/network_utils.py b/models/network_utils.py
index bf1c4ab..54c66f8 100644
--- a/models/network_utils.py
+++ b/models/network_utils.py
@@ -51,6 +51,7 @@ def __init__(self, in_channels, config):
         self.start_level, self.start_step, self.update_steps = config['start_level'], config['start_step'], config['update_steps']
         self.current_level = self.start_level
         self.mask = torch.zeros(self.n_level * self.n_features_per_level, dtype=torch.float32, device=get_rank())
+        self.mask[:self.current_level * self.n_features_per_level] = 1.
 
     def forward(self, x):
         enc = self.encoding(x)
diff --git a/models/neus.py b/models/neus.py
index 4b45f61..3fa1ba0 100644
--- a/models/neus.py
+++ b/models/neus.py
@@ -310,10 +310,11 @@ def regularizations(self, out):
     def export(self, export_config):
         mesh = self.isosurface()
         if export_config.export_vertex_color:
-            _, sdf_grad, feature = chunk_batch(self.geometry, export_config.chunk_size, False, mesh['v_pos'].to(self.rank), with_grad=True, with_feature=True)
+            _, sdf_grad, features = chunk_batch(self.geometry, export_config.chunk_size, False, mesh['v_pos'].to(self.rank), with_grad=True, with_feature=True)
             normal = F.normalize(sdf_grad, p=2, dim=-1)
-            rgb = self.texture(feature, -normal, normal) # set the viewing directions to the normal to get "albedo"
-            mesh['v_rgb'] = rgb.cpu()
+            base_color = torch.sigmoid(features[..., 1:4])
+            mesh['v_rgb'] = base_color.cpu()
+            mesh['v_norm'] = normal.cpu()
         return mesh
 
 @models.register('sh-neus')
diff --git a/models/texture.py b/models/texture.py
index e477303..1b356dc 100644
--- a/models/texture.py
+++ b/models/texture.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 
 import models
-from models.utils import get_activation
+from models.utils import get_activation, reflect, generate_ide_fn
 from models.network_utils import get_encoding, get_mlp
 from systems.utils import update_module_step
 from pytorch_lightning.utilities.rank_zero import rank_zero_info
@@ -47,7 +47,6 @@ def __init__(self, config):
         network = get_mlp(self.n_input_dims, self.n_output_dims, self.config.mlp_network_config)    
         self.encoding = encoding
         self.network = network
-        self.diffuse_only = False
     def forward(self, features, dirs, *args):
         dirs = (dirs + 1.) / 2. # (-1, 1) => (0, 1)
         dirs_embd = self.encoding(dirs.view(-1, self.n_dir_dims))
@@ -59,12 +58,58 @@ def forward(self, features, dirs, *args):
         return color
 
     def update_step(self, epoch, global_step):
-        self.diffuse_only = global_step < self.config.get('diffuse_warmup_steps', 0)
         update_module_step(self.encoding, epoch, global_step)
 
     def regularizations(self, out):
         return {}
 
+
+@models.register('volume-dual-colorV2')
+class VolumeDualColorV2(nn.Module):
+    def __init__(self, config):
+        super(VolumeDualColorV2, self).__init__()
+        self.config = config
+        self.n_dir_dims = self.config.get('n_dir_dims', 3)
+        self.n_output_dims = 3
+        
+        self.use_ide = False
+        if self.use_ide:
+            import numpy as np
+            self.encoding = generate_ide_fn(5)
+            num_sh = (2 ** np.arange(5) + 1).sum() * 2
+            self.n_input_dims = self.config.input_feature_dim + num_sh
+        else:
+            self.encoding = get_encoding(self.n_dir_dims, self.config.dir_encoding_config)
+            self.n_input_dims = self.config.input_feature_dim + self.encoding.n_output_dims
+        network = get_mlp(self.n_input_dims, self.n_output_dims, self.config.mlp_network_config)    
+        self.network = network
+
+    def forward(self, features, viewdirs, normals):
+        
+        VdotN = (-viewdirs * normals).sum(-1, keepdim=True)
+        refdirs = 2 * VdotN * normals + viewdirs
+        
+        if self.use_ide:
+            tint = get_activation(self.config.color_activation)(features[..., 4:5])
+            roughness = get_activation(self.config.color_activation)(features[..., 5:6])
+            
+            refdirs = (refdirs + 1.) / 2. # (-1, 1) => (0, 1)
+            refdirs_embd = self.encoding(refdirs, roughness)
+        else:
+            refdirs = (refdirs + 1.) / 2. # (-1, 1) => (0, 1)
+            refdirs_embd = self.encoding(refdirs.view(-1, self.n_dir_dims))
+            
+        network_inp = torch.cat([features.view(-1, features.shape[-1]), refdirs_embd] + [normals.view(-1, normals.shape[-1])] , dim=-1)
+        color = self.network(network_inp).view(*features.shape[:-1], self.n_output_dims).float()
+        if 'color_activation' in self.config:
+            basecolor = get_activation(self.config.color_activation)(features[..., 1:4])
+            color = get_activation(self.config.color_activation)(color) + basecolor
+        return color
+
+
+    def regularizations(self, out):
+        return {}
+
 @models.register('volume-color')
 class VolumeColor(nn.Module):
     def __init__(self, config):
diff --git a/models/utils.py b/models/utils.py
index 1d5c3cf..4e03c41 100644
--- a/models/utils.py
+++ b/models/utils.py
@@ -6,6 +6,7 @@
 import torch.nn.functional as F
 from torch.autograd import Function
 from torch.cuda.amp import custom_bwd, custom_fwd
+import numpy as np
 
 import tinycudann as tcnn
 
@@ -117,3 +118,156 @@ def cleanup():
     gc.collect()
     torch.cuda.empty_cache()
     tcnn.free_temporary_memory()
+
+
+# Copyright 2022 Google LLC
+def reflect(viewdirs, normals):
+    """Reflect view directions about normals.
+
+    The reflection of a vector v about a unit vector n is a vector u such that
+    dot(v, n) = dot(u, n), and dot(u, u) = dot(v, v). The solution to these two
+    equations is u = 2 dot(n, v) n - v.
+
+    Args:
+      viewdirs: [..., 3] array of view directions.
+      normals: [..., 3] array of normal directions (assumed to be unit vectors).
+
+    Returns:
+      [..., 3] array of reflection directions.
+    """
+    return 2.0 * torch.sum(
+        normals * viewdirs, dim=-1, keepdims=True) * normals - viewdirs
+
+
+def l2_normalize(x, eps=torch.finfo(torch.float32).eps):
+    """Normalize x to unit length along last axis."""
+    eps = torch.tensor(eps, device=x.device)
+    return x / torch.sqrt(torch.maximum(torch.sum(x**2, dim=-1, keepdims=True), eps))
+
+
+def compute_weighted_mae(weights, normals, normals_gt):
+    """Compute weighted mean angular error, assuming normals are unit length."""
+    one_eps = torch.tensor(1 - torch.finfo(torch.float32).eps,
+                           device=weights.device)
+    return (weights * torch.arccos(
+        torch.clip((normals * normals_gt).sum(-1), -one_eps,
+                   one_eps))).sum() / weights.sum() * 180.0 / torch.pi
+
+
+def compute_weighted_normal_loss(weights, normals, normals_gt):
+    N = len(weights)
+    n1 = torch.abs(normals - normals_gt).sum(axis=-1)
+    n2 = torch.abs(1 - (normals * normals_gt).sum(axis=-1))
+    return (weights * (n1 + n2)).sum() / weights.sum() / N
+
+
+def generalized_binomial_coeff(a, k):
+    """Compute generalized binomial coefficients."""
+    return np.prod(a - np.arange(k)) / np.math.factorial(k)
+
+
+def assoc_legendre_coeff(l, m, k):
+    """Compute associated Legendre polynomial coefficients.
+
+    Returns the coefficient of the cos^k(theta)*sin^m(theta) term in the
+    (l, m)th associated Legendre polynomial, P_l^m(cos(theta)).
+
+    Args:
+      l: associated Legendre polynomial degree.
+      m: associated Legendre polynomial order.
+      k: power of cos(theta).
+
+    Returns:
+      A float, the coefficient of the term corresponding to the inputs.
+    """
+    return ((-1)**m * 2**l * np.math.factorial(l) / np.math.factorial(k) /
+            np.math.factorial(l - k - m) *
+            generalized_binomial_coeff(0.5 * (l + k + m - 1.0), l))
+
+
+def sph_harm_coeff(l, m, k):
+    """Compute spherical harmonic coefficients."""
+    return (np.sqrt(
+        (2.0 * l + 1.0) * np.math.factorial(l - m) /
+        (4.0 * np.pi * np.math.factorial(l + m))) * assoc_legendre_coeff(l, m, k))
+
+
+def get_ml_array(deg_view):
+    """Create a list with all pairs of (l, m) values to use in the encoding."""
+    ml_list = []
+    for i in range(deg_view):
+        l = 2**i
+        # Only use nonnegative m values, later splitting real and imaginary parts.
+        for m in range(l + 1):
+            ml_list.append((m, l))
+
+    # Convert list into a numpy array.
+    ml_array = np.array(ml_list).T
+    return ml_array
+
+
+def generate_ide_fn(deg_view):
+    """Generate integrated directional encoding (IDE) function.
+
+    This function returns a function that computes the integrated directional
+    encoding from Equations 6-8 of arxiv.org/abs/2112.03907.
+
+    Args:
+      deg_view: number of spherical harmonics degrees to use.
+
+    Returns:
+      A function for evaluating integrated directional encoding.
+
+    Raises:
+      ValueError: if deg_view is larger than 5.
+    """
+    if deg_view > 5:
+        print('WARNING: Only deg_view of at most 5 is numerically stable.')
+    #   raise ValueError('Only deg_view of at most 5 is numerically stable.')
+
+    ml_array = get_ml_array(deg_view)
+    l_max = 2**(deg_view - 1)
+
+    # Create a matrix corresponding to ml_array holding all coefficients, which,
+    # when multiplied (from the right) by the z coordinate Vandermonde matrix,
+    # results in the z component of the encoding.
+    mat = torch.zeros((l_max + 1, ml_array.shape[1]))
+    for i, (m, l) in enumerate(ml_array.T):
+        for k in range(l - m + 1):
+            mat[k, i] = sph_harm_coeff(l, m, k)
+
+    def integrated_dir_enc_fn(xyz, kappa_inv):
+        """Function returning integrated directional encoding (IDE).
+
+        Args:
+          xyz: [..., 3] array of Cartesian coordinates of directions to evaluate at.
+          kappa_inv: [..., 1] reciprocal of the concentration parameter of the von
+            Mises-Fisher distribution.
+
+        Returns:
+          An array with the resulting IDE.
+        """
+        x = xyz[..., 0:1]
+        y = xyz[..., 1:2]
+        z = xyz[..., 2:3]
+
+        # Compute z Vandermonde matrix.
+        vmz = torch.cat([z**i for i in range(mat.shape[0])], dim=-1)
+
+        # Compute x+iy Vandermonde matrix.
+        vmxy = torch.cat(
+            [(x + 1j * y)**m for m in ml_array[0, :]], dim=-1)
+
+        # Get spherical harmonics.
+        sph_harms = vmxy * torch.matmul(vmz, mat.to(vmz.device))
+
+        # Apply attenuation function using the von Mises-Fisher distribution
+        # concentration parameter, kappa.
+        sigma = torch.tensor(
+            0.5 * ml_array[1, :] * (ml_array[1, :] + 1), dtype=torch.float32)
+        ide = sph_harms * torch.exp(-sigma.to(kappa_inv.device) * kappa_inv)
+
+        # Split into real and imaginary parts and return
+        return torch.cat([torch.real(ide), torch.imag(ide)], dim=-1)
+
+    return integrated_dir_enc_fn
diff --git a/requirements.txt b/requirements.txt
index ae83522..fbeaa29 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ opencv-python
 imageio
 imageio-ffmpeg
 scipy
-PyMCubes
+CuMCubes
 pyransac3d
 torch_efficient_distloss
 tensorboard
diff --git a/utils/mixins.py b/utils/mixins.py
index 672b1b4..a3955b6 100644
--- a/utils/mixins.py
+++ b/utils/mixins.py
@@ -208,16 +208,18 @@ def save_img_sequence(self, filename, img_dir, matcher, save_format='gif', fps=3
             imgs = [cv2.cvtColor(i, cv2.COLOR_BGR2RGB) for i in imgs]
             imageio.mimsave(self.get_save_path(filename), imgs, fps=fps)
     
-    def save_mesh(self, filename, v_pos, t_pos_idx, v_tex=None, t_tex_idx=None, v_rgb=None):
+    def save_mesh(self, filename, v_pos, t_pos_idx, v_tex=None, t_tex_idx=None, v_rgb=None, v_norm=None):
         v_pos, t_pos_idx = self.convert_data(v_pos), self.convert_data(t_pos_idx)
         if v_rgb is not None:
             v_rgb = self.convert_data(v_rgb)
-
+        if v_norm is not None:
+            v_norm = self.convert_data(v_rgb)
         import trimesh
         mesh = trimesh.Trimesh(
             vertices=v_pos,
             faces=t_pos_idx,
-            vertex_colors=v_rgb
+            vertex_colors=v_rgb,
+            vertex_normals=v_norm
         )
         mesh.export(self.get_save_path(filename))