Skip to content

Commit

Permalink
Add compression_level arg to VCF ingestion (#458)
Browse files Browse the repository at this point in the history
  • Loading branch information
gspowley authored Sep 7, 2023
1 parent 5bc791d commit de0f19f
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/tiledb/cloud/vcf/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def create_dataset_udf(
extra_attrs: Optional[Union[Sequence[str], str]] = None,
vcf_attrs: Optional[str] = None,
anchor_gap: Optional[int] = None,
compression_level: Optional[int] = None,
verbose: bool = False,
) -> str:
"""
Expand All @@ -175,6 +176,8 @@ def create_dataset_udf(
:param extra_attrs: INFO/FORMAT fields to materialize, defaults to None
:param vcf_attrs: VCF with all INFO/FORMAT fields to materialize, defaults to None
:param anchor_gap: anchor gap for VCF dataset, defaults to None
:param compression_level: zstd compression level for the VCF dataset,
defaults to None (uses the default level in TileDB-VCF)
:param verbose: verbose logging, defaults to False
:return: dataset URI
"""
Expand Down Expand Up @@ -203,6 +206,7 @@ def create_dataset_udf(
extra_attrs=extra_attrs,
vcf_attrs=vcf_attrs,
anchor_gap=anchor_gap,
compression_level=compression_level,
)

# Create log array and add it to the dataset group
Expand Down Expand Up @@ -1016,6 +1020,7 @@ def ingest_manifest_dag(
extra_attrs: Optional[Union[Sequence[str], str]] = None,
vcf_attrs: Optional[str] = None,
anchor_gap: Optional[int] = None,
compression_level: Optional[int] = None,
verbose: bool = False,
batch_mode: bool = True,
access_credentials_name: Optional[str] = None,
Expand All @@ -1039,6 +1044,8 @@ def ingest_manifest_dag(
:param extra_attrs: INFO/FORMAT fields to materialize, defaults to None
:param vcf_attrs: VCF with all INFO/FORMAT fields to materialize, defaults to None
:param anchor_gap: anchor gap for VCF dataset, defaults to None
:param compression_level: zstd compression level for the VCF dataset,
defaults to None (uses the default level in TileDB-VCF)
:param verbose: verbose logging, defaults to False
:param batch_mode: run all DAGs in batch mode, defaults to True
:param access_credentials_name: name of role in TileDB Cloud to use in tasks
Expand Down Expand Up @@ -1067,6 +1074,7 @@ def ingest_manifest_dag(
extra_attrs=extra_attrs,
vcf_attrs=vcf_attrs,
anchor_gap=anchor_gap,
compression_level=compression_level,
verbose=verbose,
name="Create VCF dataset ",
**kwargs,
Expand Down Expand Up @@ -1446,6 +1454,7 @@ def ingest(
extra_attrs: Optional[Union[Sequence[str], str]] = DEFAULT_ATTRIBUTES,
vcf_attrs: Optional[str] = None,
anchor_gap: Optional[int] = None,
compression_level: Optional[int] = None,
manifest_batch_size: int = MANIFEST_BATCH_SIZE,
manifest_workers: int = MANIFEST_WORKERS,
vcf_batch_size: int = VCF_BATCH_SIZE,
Expand Down Expand Up @@ -1489,6 +1498,8 @@ def ingest(
:param vcf_attrs: VCF with all INFO/FORMAT fields to materialize,
defaults to None
:param anchor_gap: anchor gap for VCF dataset, defaults to None
:param compression_level: zstd compression level for the VCF dataset,
defaults to None (uses the default level in TileDB-VCF)
:param manifest_batch_size: batch size for manifest ingestion,
defaults to MANIFEST_BATCH_SIZE
:param manifest_workers: number of workers for manifest ingestion,
Expand Down Expand Up @@ -1548,6 +1559,7 @@ def ingest(
extra_attrs=extra_attrs,
vcf_attrs=vcf_attrs,
anchor_gap=anchor_gap,
compression_level=compression_level,
verbose=verbose,
batch_mode=batch_mode,
access_credentials_name=access_credentials_name,
Expand Down

0 comments on commit de0f19f

Please sign in to comment.