Skip to content

Commit

Permalink
fix linting issues
Browse files Browse the repository at this point in the history
  • Loading branch information
melodywang060 committed Oct 11, 2024
1 parent 6993b09 commit b4bce66
Show file tree
Hide file tree
Showing 34 changed files with 552 additions and 182 deletions.
8 changes: 6 additions & 2 deletions extensions/rapids_notebook_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def walk_files(app, dir, outdir):
related_notebook_files = {}
for page in dir.glob("*"):
if page.is_dir():
related_notebook_files[page.name] = walk_files(app, page, outdir / page.name)
related_notebook_files[page.name] = walk_files(
app, page, outdir / page.name
)
else:
with contextlib.suppress(OSError):
os.remove(str(outdir / page.name))
Expand Down Expand Up @@ -57,7 +59,9 @@ def find_notebook_related_files(app, pagename, templatename, context, doctree):
path_to_output_parent = output_root / rel_page_parent

# Copy all related files to output and apply templating
related_notebook_files = walk_files(app, path_to_page_parent, path_to_output_parent)
related_notebook_files = walk_files(
app, path_to_page_parent, path_to_output_parent
)

# Make archive of related files
if related_notebook_files and len(related_notebook_files) > 1:
Expand Down
18 changes: 14 additions & 4 deletions extensions/rapids_related_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def read_notebook_tags(path: str) -> list[str]:
return []


def generate_notebook_grid_myst(notebooks: list[str], env: BuildEnvironment) -> list[str]:
def generate_notebook_grid_myst(
notebooks: list[str], env: BuildEnvironment
) -> list[str]:
"""Generate sphinx-design grid of notebooks in MyST markdown.
Take a list of notebook documents and render out some MyST markdown displaying those
Expand Down Expand Up @@ -73,7 +75,11 @@ def get_title_for_notebook(path: str) -> str:
if i == len(cell_source) - 1: # no next_token
continue
next_token = cell_source[i + 1]
if token.type == "heading_open" and token.tag == "h1" and next_token.type == "inline":
if (
token.type == "heading_open"
and token.tag == "h1"
and next_token.type == "inline"
):
return next_token.content
raise ValueError("No top-level heading found")

Expand Down Expand Up @@ -140,7 +146,9 @@ def add_notebook_tag_map_to_context(app, pagename, templatename, context, doctre
except KeyError:
tag_tree[root] = [suffix]
context["notebook_tag_tree"] = tag_tree
context["notebook_tags"] = [tag for tag, pages in app.env.notebook_tag_map.items() if pagename in pages]
context["notebook_tags"] = [
tag for tag, pages in app.env.notebook_tag_map.items() if pagename in pages
]


class NotebookGalleryTocTree(TocTree):
Expand All @@ -154,7 +162,9 @@ def run(self) -> list[nodes.Node]:
output += toctree

# Generate the card grid for all items in the toctree
notebooks = [notebook for _, notebook in toctree[0].children[0].attributes["entries"]]
notebooks = [
notebook for _, notebook in toctree[0].children[0].attributes["entries"]
]
grid_markdown = generate_notebook_grid_myst(notebooks=notebooks, env=self.env)
for node in parse_markdown(markdown=grid_markdown, state=self.state):
gallery += node
Expand Down
12 changes: 9 additions & 3 deletions extensions/rapids_version_templating.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ def visit_reference(self, node: nodes.reference) -> None:
uri_str = re.sub(r"~~~(.*?)~~~", r"{{ \1 }}", uri_str)

# fill in appropriate values based on app context
node.attributes["refuri"] = re.sub(r"(?<!\$)\{\{.*?\}\}", self.template_func, uri_str)
node.attributes["refuri"] = re.sub(
r"(?<!\$)\{\{.*?\}\}", self.template_func, uri_str
)

# update the document
node.parent.replace(node, node)
Expand All @@ -59,15 +61,19 @@ def visit_Text(self, node: nodes.Text) -> None:
Replace template strings in generic text.
This roughly corresponds to HTML ``<p>``, ``<pre>``, and similar elements.
"""
new_node = nodes.Text(re.sub(r"(?<!\$)\{\{.*?\}\}", self.template_func, node.astext()))
new_node = nodes.Text(
re.sub(r"(?<!\$)\{\{.*?\}\}", self.template_func, node.astext())
)
node.parent.replace(node, new_node)

def template_func(self, match: re.Match) -> str:
"""
Replace template strings like ``{{ rapids_version }}`` with real
values like ``24.10``.
"""
return self.app.builder.templates.render_string(source=match.group(), context=self.app.config.rapids_version)
return self.app.builder.templates.render_string(
source=match.group(), context=self.app.config.rapids_version
)


def version_template(
Expand Down
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"devDependencies": {
"prettier": "3.3.3"
"prettier": "^3.3.3"
}
}
20 changes: 15 additions & 5 deletions source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,18 @@
},
}
rapids_version = (
versions["stable"] if os.environ.get("DEPLOYMENT_DOCS_BUILD_STABLE", "false") == "true" else versions["nightly"]
versions["stable"]
if os.environ.get("DEPLOYMENT_DOCS_BUILD_STABLE", "false") == "true"
else versions["nightly"]
)
rapids_version["rapids_conda_channels_list"] = [
channel for channel in rapids_version["rapids_conda_channels"].split(" ") if channel != "-c"
channel
for channel in rapids_version["rapids_conda_channels"].split(" ")
if channel != "-c"
]
rapids_version["rapids_conda_packages_list"] = rapids_version["rapids_conda_packages"].split(" ")
rapids_version["rapids_conda_packages_list"] = rapids_version[
"rapids_conda_packages"
].split(" ")

# -- General configuration ---------------------------------------------------

Expand Down Expand Up @@ -88,7 +94,9 @@
# -- Options for notebooks -------------------------------------------------

nb_execution_mode = "off"
rapids_deployment_notebooks_base_url = "https://github.com/rapidsai/deployment/blob/main/source/"
rapids_deployment_notebooks_base_url = (
"https://github.com/rapidsai/deployment/blob/main/source/"
)

# -- Options for HTML output -------------------------------------------------

Expand Down Expand Up @@ -138,6 +146,8 @@
def setup(app):
app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
app.add_css_file("css/custom.css")
app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
app.add_js_file(
"https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
)
app.add_js_file("js/nav.js", loading_method="defer")
app.add_js_file("js/notebook-gallery.js", loading_method="defer")
4 changes: 3 additions & 1 deletion source/examples/rapids-1brc-single-node/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@
"source": [
"n = 1_000_000_000 # Number of rows of data to generate\n",
"\n",
"lookup_df = cudf.read_csv(\"lookup.csv\") # Load our lookup table of stations and their mean temperatures\n",
"lookup_df = cudf.read_csv(\n",
" \"lookup.csv\"\n",
") # Load our lookup table of stations and their mean temperatures\n",
"std = 10.0 # We assume temperatures are normally distributed with a standard deviation of 10\n",
"chunksize = 2e8 # Set the number of rows to generate in one go (reduce this if you run into GPU RAM limits)\n",
"filename = Path(\"measurements.txt\") # Choose where to write to\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -995,8 +995,12 @@
"\n",
"\n",
"def map_haversine(part):\n",
" pickup = cuspatial.GeoSeries.from_points_xy(part[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns())\n",
" dropoff = cuspatial.GeoSeries.from_points_xy(part[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns())\n",
" pickup = cuspatial.GeoSeries.from_points_xy(\n",
" part[[\"pickup_longitude\", \"pickup_latitude\"]].interleave_columns()\n",
" )\n",
" dropoff = cuspatial.GeoSeries.from_points_xy(\n",
" part[[\"dropoff_longitude\", \"dropoff_latitude\"]].interleave_columns()\n",
" )\n",
" return cuspatial.haversine_distance(pickup, dropoff)\n",
"\n",
"\n",
Expand Down Expand Up @@ -1502,7 +1506,9 @@
"from random import randrange\n",
"\n",
"\n",
"def generate_workload(stages=3, min_width=1, max_width=3, variation=1, input_workload=None):\n",
"def generate_workload(\n",
" stages=3, min_width=1, max_width=3, variation=1, input_workload=None\n",
"):\n",
" graph = [input_workload] if input_workload is not None else [run_haversine()]\n",
" last_width = min_width\n",
" for _ in range(stages):\n",
Expand Down Expand Up @@ -1640,25 +1646,35 @@
],
"source": [
"%%time\n",
"start_time = (datetime.datetime.now() - datetime.timedelta(minutes=15)).strftime(\"%Y-%m-%dT%H:%M:%SZ\")\n",
"start_time = (datetime.datetime.now() - datetime.timedelta(minutes=15)).strftime(\n",
" \"%Y-%m-%dT%H:%M:%SZ\"\n",
")\n",
"try:\n",
" # Start with a couple of concurrent workloads\n",
" workload = generate_workload(stages=10, max_width=2)\n",
" # Then increase demand as more users appear\n",
" workload = generate_workload(stages=5, max_width=5, min_width=3, variation=5, input_workload=workload)\n",
" workload = generate_workload(\n",
" stages=5, max_width=5, min_width=3, variation=5, input_workload=workload\n",
" )\n",
" # Now reduce the workload for a longer period of time, this could be over a lunchbreak or something\n",
" workload = generate_workload(stages=30, max_width=2, input_workload=workload)\n",
" # Everyone is back from lunch and it hitting the cluster hard\n",
" workload = generate_workload(stages=10, max_width=10, min_width=3, variation=5, input_workload=workload)\n",
" workload = generate_workload(\n",
" stages=10, max_width=10, min_width=3, variation=5, input_workload=workload\n",
" )\n",
" # The after lunch rush is easing\n",
" workload = generate_workload(stages=5, max_width=5, min_width=3, variation=5, input_workload=workload)\n",
" workload = generate_workload(\n",
" stages=5, max_width=5, min_width=3, variation=5, input_workload=workload\n",
" )\n",
" # As we get towards the end of the day demand slows off again\n",
" workload = generate_workload(stages=10, max_width=2, input_workload=workload)\n",
" workload.compute()\n",
"finally:\n",
" client.close()\n",
" cluster.close()\n",
" end_time = (datetime.datetime.now() + datetime.timedelta(minutes=15)).strftime(\"%Y-%m-%dT%H:%M:%SZ\")"
" end_time = (datetime.datetime.now() + datetime.timedelta(minutes=15)).strftime(\n",
" \"%Y-%m-%dT%H:%M:%SZ\"\n",
" )"
]
},
{
Expand Down Expand Up @@ -2021,10 +2037,14 @@
" end_time,\n",
" \"1s\",\n",
")\n",
"running_pods = running_pods[running_pods.columns.drop(list(running_pods.filter(regex=\"prepull\")))]\n",
"running_pods = running_pods[\n",
" running_pods.columns.drop(list(running_pods.filter(regex=\"prepull\")))\n",
"]\n",
"nodes = p.query_range(\"count(kube_node_info)\", start_time, end_time, \"1s\")\n",
"nodes.columns = [\"Available GPUs\"]\n",
"nodes[\"Available GPUs\"] = nodes[\"Available GPUs\"] * 2 # We know our nodes each had 2 GPUs\n",
"nodes[\"Available GPUs\"] = (\n",
" nodes[\"Available GPUs\"] * 2\n",
") # We know our nodes each had 2 GPUs\n",
"nodes[\"Utilized GPUs\"] = running_pods.sum(axis=1)"
]
},
Expand Down
8 changes: 6 additions & 2 deletions source/examples/rapids-azureml-hpo/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@
" )\n",
" ml_client.compute.begin_create_or_update(gpu_target).result()\n",
"\n",
" print(f\"AMLCompute with name {gpu_target.name} is created, the compute size is {gpu_target.size}\")"
" print(\n",
" f\"AMLCompute with name {gpu_target.name} is created, the compute size is {gpu_target.size}\"\n",
" )"
]
},
{
Expand Down Expand Up @@ -485,7 +487,9 @@
"\n",
"\n",
"# Define the limits for this sweep\n",
"sweep_job.set_limits(max_total_trials=10, max_concurrent_trials=2, timeout=18000, trial_timeout=3600)\n",
"sweep_job.set_limits(\n",
" max_total_trials=10, max_concurrent_trials=2, timeout=18000, trial_timeout=3600\n",
")\n",
"\n",
"\n",
"# Specify your experiment details\n",
Expand Down
48 changes: 36 additions & 12 deletions source/examples/rapids-azureml-hpo/rapids_csp_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,9 @@ def load_hyperparams(self, model_name="XGBoost"):
self.log_to_file(str(error))
return

def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBinary"):
def load_data(
self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBinary"
):
"""
Loading the data into the object from the filename and based on the columns that we are
interested in. Also, generates y_label from 'ArrDelay' column to convert this into a binary
Expand Down Expand Up @@ -183,7 +185,9 @@ def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBi

elif "multi" in self.compute_type:
self.log_to_file("\n\tReading using dask dataframe")
dataset = dask.dataframe.read_parquet(target_filename, columns=col_labels)
dataset = dask.dataframe.read_parquet(
target_filename, columns=col_labels
)

elif "GPU" in self.compute_type:
# GPU Reading Option
Expand All @@ -201,7 +205,9 @@ def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBi

elif "multi" in self.compute_type:
self.log_to_file("\n\tReading using dask_cudf")
dataset = dask_cudf.read_parquet(target_filename, columns=col_labels)
dataset = dask_cudf.read_parquet(
target_filename, columns=col_labels
)

# cast all columns to float32
for col in dataset.columns:
Expand All @@ -216,10 +222,14 @@ def load_data(self, filename="dataset.orc", col_labels=None, y_label="ArrDelayBi
dataset = dataset.fillna(0.0) # Filling the null values. Needed for dask-cudf

self.log_to_file(f"\n\tIngestion completed in {ingestion_timer.duration}")
self.log_to_file(f"\n\tDataset descriptors: {dataset.shape}\n\t{dataset.dtypes}")
self.log_to_file(
f"\n\tDataset descriptors: {dataset.shape}\n\t{dataset.dtypes}"
)
return dataset, col_labels, y_label, ingestion_timer.duration

def split_data(self, dataset, y_label, train_size=0.8, random_state=0, shuffle=True):
def split_data(
self, dataset, y_label, train_size=0.8, random_state=0, shuffle=True
):
"""
Splitting data into train and test split, has appropriate imports for different compute modes.
CPU compute - Uses sklearn, we manually filter y_label column in the split call
Expand Down Expand Up @@ -311,9 +321,13 @@ def train_model(self, X_train, y_train, model_params):

try:
if self.model_type == "XGBoost":
trained_model, training_time = self.fit_xgboost(X_train, y_train, model_params)
trained_model, training_time = self.fit_xgboost(
X_train, y_train, model_params
)
elif self.model_type == "RandomForest":
trained_model, training_time = self.fit_random_forest(X_train, y_train, model_params)
trained_model, training_time = self.fit_random_forest(
X_train, y_train, model_params
)
except Exception as error:
self.log_to_file("\n\n!error during model training: " + str(error))
self.log_to_file(f"\n\tFinished training in {training_time:.4f} s")
Expand All @@ -340,7 +354,9 @@ def fit_xgboost(self, X_train, y_train, model_params):
)
elif "multi" in self.compute_type:
self.log_to_file("\n\tTraining multi-GPU XGBoost")
train_DMatrix = xgboost.dask.DaskDMatrix(self.client, data=X_train, label=y_train)
train_DMatrix = xgboost.dask.DaskDMatrix(
self.client, data=X_train, label=y_train
)
trained_model = xgboost.dask.train(
self.client,
dtrain=train_DMatrix,
Expand Down Expand Up @@ -425,8 +441,12 @@ def evaluate_test_perf(self, trained_model, X_test, y_test, threshold=0.5):
try:
if self.model_type == "XGBoost":
if "multi" in self.compute_type:
test_DMatrix = xgboost.dask.DaskDMatrix(self.client, data=X_test, label=y_test)
xgb_pred = xgboost.dask.predict(self.client, trained_model, test_DMatrix).compute()
test_DMatrix = xgboost.dask.DaskDMatrix(
self.client, data=X_test, label=y_test
)
xgb_pred = xgboost.dask.predict(
self.client, trained_model, test_DMatrix
).compute()
xgb_pred = (xgb_pred > threshold) * 1.0
test_accuracy = accuracy_score(y_test.compute(), xgb_pred)
elif "single" in self.compute_type:
Expand All @@ -439,9 +459,13 @@ def evaluate_test_perf(self, trained_model, X_test, y_test, threshold=0.5):
if "multi" in self.compute_type:
cuml_pred = trained_model.predict(X_test).compute()
self.log_to_file("\n\tPrediction complete")
test_accuracy = accuracy_score(y_test.compute(), cuml_pred, convert_dtype=True)
test_accuracy = accuracy_score(
y_test.compute(), cuml_pred, convert_dtype=True
)
elif "single" in self.compute_type:
test_accuracy = trained_model.score(X_test, y_test.astype("int32"))
test_accuracy = trained_model.score(
X_test, y_test.astype("int32")
)

except Exception as error:
self.log_to_file("\n\n!error during inference: " + str(error))
Expand Down
Loading

0 comments on commit b4bce66

Please sign in to comment.