Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align Glue component with Iberia deployment #25

Merged
merged 13 commits into from
Oct 14, 2024
4 changes: 4 additions & 0 deletions devenv.nix
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@
};
};

# Java is required for PySpark
languages.java.enable = true;
languages.java.jdk.package = pkgs.jdk8; # Java version running on AWS Glue

enterShell = ''
hello
pdm install
Expand Down
12 changes: 3 additions & 9 deletions examples/sparkle/Pulumi.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
name: object_storage
name: simple-spark-application
runtime:
name: python
options:
toolchain: pip
virtualenv: venv
description: A minimal Azure Native Python Pulumi program
config:
pulumi:tags:
value:
pulumi:template: azure-python
description: A minimal spark application that uses Sparkle
region: eu-west-1
8 changes: 3 additions & 5 deletions examples/sparkle/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from damavand.cloud.provider import AwsProvider
from damavand.factories import SparkControllerFactory

Expand All @@ -10,7 +9,7 @@ def main() -> None:
spark_factory = SparkControllerFactory(
provider=AwsProvider(
app_name="my-app",
region="us-west-2",
region="eu-west-1",
),
tags={"env": "dev"},
)
Expand All @@ -22,10 +21,9 @@ def main() -> None:
CustomerOrders(),
],
)
# app_name = os.getenv("APP_NAME", "products-app") # Get app name on runtime

app_name = os.getenv("APP_NAME", "default_app") # Get app name on runtime

spark_controller.run_application(app_name)
# spark_controller.run_application(app_name)
spark_controller.provision()


Expand Down
15 changes: 14 additions & 1 deletion examples/sparkle/applications/orders.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from sparkle.config import Config
from sparkle.config import Config, IcebergConfig, KafkaReaderConfig
from sparkle.config.kafka_config import KafkaConfig, Credentials
from sparkle.writer.iceberg_writer import IcebergWriter
from sparkle.application import Sparkle
from sparkle.reader.kafka_reader import KafkaReader
Expand All @@ -15,6 +16,18 @@ def __init__(self):
version="0.0.1",
database_bucket="s3://test-bucket",
checkpoints_bucket="s3://test-checkpoints",
iceberg_output=IcebergConfig(
database_name="all_products",
database_path="",
table_name="orders_v1",
),
kafka_input=KafkaReaderConfig(
KafkaConfig(
bootstrap_servers="localhost:9119",
credentials=Credentials("test", "test"),
),
kafka_topic="src_orders_v1",
),
),
readers={"orders": KafkaReader},
writers=[IcebergWriter],
Expand Down
14 changes: 11 additions & 3 deletions examples/sparkle/applications/products.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from sparkle.application import Sparkle
from sparkle.config import Config
from sparkle.config import Config, IcebergConfig, TableConfig
from sparkle.writer.iceberg_writer import IcebergWriter
from sparkle.writer.kafka_writer import KafkaStreamPublisher
from sparkle.reader.table_reader import TableReader

from pyspark.sql import DataFrame
Expand All @@ -16,11 +15,20 @@ def __init__(self):
version="0.0.1",
database_bucket="s3://test-bucket",
checkpoints_bucket="s3://test-checkpoints",
iceberg_output=IcebergConfig(
database_name="all_products",
database_path="",
table_name="products_v1",
),
hive_table_input=TableConfig(
database="source_database",
table="products_v1",
bucket="",
),
),
readers={"products": TableReader},
writers=[
IcebergWriter,
KafkaStreamPublisher,
],
)

Expand Down
42 changes: 28 additions & 14 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"pulumi-azure-native>=2.51.0",
"pulumi-random>=4.16.3",
"sparkle @ git+https://github.com/DataChefHQ/[email protected]",
"damavand @ file:///${PROJECT_ROOT}/",
]
requires-python = ">=3.11.0"
readme = "README.md"
Expand Down
1 change: 0 additions & 1 deletion src/damavand/base/controllers/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def application_with_id(self, app_id: str) -> Sparkle:
Returns:
Sparkle: The Spark application.
"""

for app in self.applications:
if app.config.app_id == app_id:
return app
Expand Down
Loading
Loading