From 5fcedb8dcbbeb4a0d5f647021afe37e3ddfc2329 Mon Sep 17 00:00:00 2001 From: Lachlan Perrier Date: Fri, 7 Jun 2024 18:20:07 -0400 Subject: [PATCH] connecting to 5s --- lasso/build_connectors_mtc.py | 46 ++++++++++++------ lasso/emme.py | 90 ++++++++++++++++++++++++++--------- lasso/parameters.py | 2 +- 3 files changed, 101 insertions(+), 37 deletions(-) diff --git a/lasso/build_connectors_mtc.py b/lasso/build_connectors_mtc.py index 1ebb1a9..a221479 100644 --- a/lasso/build_connectors_mtc.py +++ b/lasso/build_connectors_mtc.py @@ -45,6 +45,13 @@ def get_linestring_from_row(row): closest_nodes["geometry"] = closest_nodes.apply(get_linestring_from_row, axis=1) return gpd.GeoDataFrame(closest_nodes, geometry='geometry', crs="EPSG:2875") +def get_two_way(links_df): + links_df = links_df[["A", "B"]] + + ab = links_df["A"].astype(str) + "-" + links_df["B"].astype(str) + ba = links_df["B"].astype(str) + "-" + links_df["A"].astype(str) + + return ab.isin(ba) def connect_centroids( @@ -52,19 +59,22 @@ def connect_centroids( maz_or_taz ): # taz_nodes = nodes_df[nodes_df["N"].isin(parameters.taz_N_list)] - + links_df["link_is_two_way"] = get_two_way(links_df) * 1 + print("proportion of links 2 way", links_df["link_is_two_way"].mean()) + links_df["link_is_two_way"] = links_df["link_is_two_way"] *-1 + #exclude nodes that are in intersections and already a centroid, non_centroid_nodes = nodes_df[~nodes_df["N"].isin(taz_centroid["N"])] nodes_df_not_intersections, _ = get_non_intersection_drive_nodes(links_df, non_centroid_nodes) # we want to attatch to the lowest rank nodes, - nodes_df_not_intersections = attache_highest_ft_to_node(links_df, nodes_df_not_intersections, clip_upper=7) - non_centroid_nodes = attache_highest_ft_to_node(links_df, non_centroid_nodes, clip_upper=7) + nodes_df_not_intersections = join_ft_and_two_way_to_node(links_df, nodes_df_not_intersections, clip_upper=7) + non_centroid_nodes = join_ft_and_two_way_to_node(links_df, non_centroid_nodes, clip_upper=7) #collect candidate nodes for building connector, change this to TAZ # get the x, y coordinates of the centroid of the shape for building the connector taz_area = taz_zones[[maz_or_taz, "geometry"]].merge(taz_centroid[["N", "X", "Y"]], left_on=maz_or_taz, right_on="N", how="inner") - candidate_points = gpd.sjoin(taz_area, nodes_df_not_intersections[["N", "geometry", "X", "Y", "ft", "county"]].rename(columns={"N":"N_Joined", "X": "X_net", "Y": "Y_net"})) + candidate_points = gpd.sjoin(taz_area, nodes_df_not_intersections[["N", "geometry", "X", "Y", "ft", "link_is_two_way", "county"]].rename(columns={"N":"N_Joined", "X": "X_net", "Y": "Y_net"})) # reverse ft so that when we sort ascending the ft=6 is first # after being sorted, it find the the closest ft6, if there are no ft6 then ft5 ect until @@ -82,7 +92,7 @@ def connect_centroids( # if there were None, we will try again using all non centroid nodes # ---------------------------------------------------------------------------------------------------------------------------------------------------------- taz_with_no_links = taz_area[~taz_area["N"].isin(candidate_points["N"])] - candidate_points = gpd.sjoin(taz_with_no_links, non_centroid_nodes[["N", "geometry", "X", "Y", "ft", "county"]].rename(columns={"N":"N_Joined", "X": "X_net", "Y": "Y_net"})) + candidate_points = gpd.sjoin(taz_with_no_links, non_centroid_nodes[["N", "geometry", "X", "Y", "ft", "link_is_two_way", "county"]].rename(columns={"N":"N_Joined", "X": "X_net", "Y": "Y_net"})) candidate_points["ft"] = -1 * candidate_points["ft"] for taz_node_N, taz_node_candidate_links in candidate_points.groupby("N"): @@ -91,14 +101,14 @@ def connect_centroids( links_to_be_added.append(links) taz_with_no_links = taz_with_no_links[~taz_with_no_links["N"].isin(candidate_points["N"])] - + # ---------------------------------------------------------------------------------------------------------------------------------------------------------- - # if there is is still none we will match to the closest node + # if there is is still none we will match to the closest two-way node # ---------------------------------------------------------------------------------------------------------------------------------------------------------- taz_with_no_links["geometry"] = taz_with_no_links.apply(lambda row: Point(row["X"], row["Y"]), axis=1) # cant do sjoin nearest due to dependency issues, will change and check in future # connections_outside_taz_area = gpd.sjoin_nearest(centroids_with_no_connections, nodes_df_not_intersections, max_distance=parameters.max_length_centroid_connector_when_none_in_taz) - non_centroid_nodes = non_centroid_nodes[non_centroid_nodes["ft"] != 1] + non_centroid_nodes = non_centroid_nodes[(non_centroid_nodes["ft"] != 1) & (non_centroid_nodes["link_is_two_way"] != 0)] join_nodes_tree = cKDTree(non_centroid_nodes[["X", "Y"]].to_numpy()) join_taz_nodes = [] @@ -157,7 +167,8 @@ def connect_centroids( ) ].copy() -def create_links(taz_node_N: int, taz_node_candidate_links: gpd.GeoDataFrame, sort_columns = ("ft", "squared_distance")): +def create_links(taz_node_N: int, taz_node_candidate_links: gpd.GeoDataFrame, sort_columns = ("link_is_two_way", "ft", "squared_distance")): + taz_node_candidate_links.reset_index() @@ -197,18 +208,25 @@ def create_links(taz_node_N: int, taz_node_candidate_links: gpd.GeoDataFrame, so links["taz_node_id"] = taz_node_N return links -def attache_highest_ft_to_node(links_df, nodes_df, clip_upper=6): +def join_ft_and_two_way_to_node(links_df, nodes_df, clip_upper=6): all_ft_into_nodes = pd.concat( [ - links_df[["A", "ft"]].rename(columns={"A":"N"}), - links_df[["B", "ft"]].rename(columns={"A":"N"}), + links_df[["A", "ft", "link_is_two_way"]].rename(columns={"A":"N"}), + links_df[["B", "ft", "link_is_two_way"]].rename(columns={"A":"N"}), ] ) - highest_ft_attached_to_node = all_ft_into_nodes.groupby("N").agg({"ft":"max"}) + highest_ft_attached_to_node = all_ft_into_nodes.groupby("N").agg({"ft":"max", "link_is_two_way":"max"}) + + # highest_ft_attached_to_node["ft"] = highest_ft_attached_to_node[highest_ft_attached_to_node["ft"] <= 6] + highest_ft_attached_to_node["ft"] = highest_ft_attached_to_node["ft"].clip(upper=clip_upper) - return pd.merge(nodes_df, highest_ft_attached_to_node, left_on="N", right_index=True) + + return_nodes = pd.merge(nodes_df, highest_ft_attached_to_node, left_on="N", right_index=True) # should check this is inner merge + + return_nodes = return_nodes[return_nodes["ft"] <= 5] + return return_nodes def get_non_intersection_drive_nodes(links_df, nodes_df): """ diff --git a/lasso/emme.py b/lasso/emme.py index fe881b6..ca14fdc 100644 --- a/lasso/emme.py +++ b/lasso/emme.py @@ -35,6 +35,7 @@ from lasso import StandardTransit from lasso import mtc +from importlib import reload _join = _os.path.join _dir = _os.path.dirname @@ -301,15 +302,13 @@ def extract_gtfs_from_dir(path: str): return bus_shapes - - - def prepare_table_for_tazmaz_drive_network( nodes_df, links_df, input_dir, parameters, taz_or_maz:str, + validate_network_connectivity: bool = False, ): """ @@ -374,6 +373,10 @@ def prepare_table_for_tazmaz_drive_network( # rebuild connectors for all TAZ #TODO test 6 is less links then previous implementatoin + # we want to include managed lanes connectors as well + managed_nodes = list(set(links_df[links_df["managed"] == 1]["A"]) | set(links_df[links_df["managed"] == 1]["B"])) + links_df["managed_lane_connector"] = (links_df["ft"] == 8) & (links_df["A"].isin(managed_nodes) | links_df["B"].isin(managed_nodes)) + drive_links_df = links_df[ ( ~(links_df.A.isin(parameters.taz_N_list + parameters.maz_N_list)) & @@ -388,10 +391,18 @@ def prepare_table_for_tazmaz_drive_network( (links_df.tollbooth != 0) ) ) - ) | links_df["has_bus_on_link"] # if the link has a bus on it we want to keep it no matter what + ) | links_df["has_bus_on_link"] | links_df["managed_lane_connector"] # special cases we would like tp keep ].copy() - from importlib import reload - reload(build_connectors_mtc) + import networkx as nx + + # ----------------------------------------- get largest sub graph ------------------------------------------------- + print("finding largest sub graph...") + G = nx.from_pandas_edgelist(drive_links_df, "A", "B") + sets_of_subgaph_nodes = [list(G.subgraph(c).copy().nodes) for c in nx.connected_components(G)] + sets_of_subgaph_nodes.sort(key=lambda list_of_nodes: len(list_of_nodes)) + largest_sub_graph_nodes = sets_of_subgaph_nodes[-1] + drive_links_df = drive_links_df[drive_links_df["A"].isin(largest_sub_graph_nodes) & drive_links_df["B"].isin(largest_sub_graph_nodes)] + print("done") centroid_connector_links = build_connectors_mtc.connect_centroids(nodes_df, drive_links_df, taz_centroid, taz_areas, parameters, taz_or_maz) # return centroid_connector_links @@ -403,36 +414,76 @@ def prepare_table_for_tazmaz_drive_network( for col_value, default_value in centroid_connector_defaults.items(): centroid_connector_links[col_value] = default_value + centroid_connector_links['distance'] = centroid_connector_links.to_crs(epsg=26915).geometry.length / 1609.34 # centroid_connector_links["_links"] = centroid_connector_links["geometry"].to_wkt() # centroid_connector_links["links"] = centroid_connector_links["_links"] # print(centroid_connector_links["links"]) - drive_links_df["taz_node_id"] = 0 + # drive_links_df["taz_node_id"] = 0 if taz_or_maz == "taz": # if we are doing taz we also have external centroid connectors, we need to include them - external_connectors_slicer = ((links_df.A > 900_000) & (links_df.A < 1_000_000)) | ((links_df.A > 900_000) & (links_df.A < 1_000_000)) + external_connectors_slicer = ((links_df.A > 900_000) & (links_df.A < 1_000_000)) | ((links_df.B > 900_000) & (links_df.B < 1_000_000)) print("model is taz, adding external links:", sum(external_connectors_slicer)) external_connectors = links_df[external_connectors_slicer] centroid_connector_links = pd.concat([centroid_connector_links, external_connectors]) - print(links_df.iloc[10]) centroid_connector_links["geometry_wkt"] = centroid_connector_links["geometry"].apply(lambda x: x.wkt) # bad link Ides - bad_externeral = [5117530, 5118443, 4239334, 4241510, 1143651, 1142182, 5118662, 5117749, 2518654, 2522313, 7135835, - 7137048, 2522059, 2518400, 2522058, 2518399, 2519408,2523067, 5118661, 5117748, 3320292, 3316432] - bad_externeral_connectos = centroid_connector_links["model_link_id"].isin(bad_externeral) - print("Number of External Connectors Removed: ", bad_externeral_connectos.sum()) - centroid_connector_links = centroid_connector_links[~bad_externeral_connectos] - - centroid_connector_links - print(centroid_connector_links.iloc[10]) + # bad_externeral = [5117530, 5118443, 4239334, 4241510, 1143651, 1142182, 5118662, 5117749, 2518654, 2522313, 7135835, + # 7137048, 2522059, 2518400, 2522058, 2518399, 2519408,2523067, 5118661, 5117748, 3320292, 3316432] + # bad_externeral_connectos = centroid_connector_links["model_link_id"].isin(bad_externeral) + + # centroid_connector_links = centroid_connector_links[~bad_externeral_connectos] + # drive_links_df = pd.concat([drive_links_df, centroid_connector_links]) # return drive_links_df + # import networkx as nx + + # subgraph_folder = Path(r"D:\subgraphs_test") + + # for number, subgraph in enumerate([G.subgraph(c).copy() for c in nx.connected_components(G)]): + # file_name = f"graph_number_{number}.gpkg" + # print(file_name) + # subgraph_nodes = list(subgraph.nodes) + # subgraph_df = drive_links_df[drive_links_df["A"].isin(subgraph_nodes) & drive_links_df["B"].isin(subgraph_nodes)] + # subgraph_df[["A", "B", "geometry", "model_link_id", "has_bus_on_link", "ft"]].to_file(subgraph_folder / file_name) + + #assert network is fully connected + # ---------------------------------------------------------------------------- + # ---------------------------------------------------------------------------- + # ---------------------------------------------------------------------------- + if validate_network_connectivity: + print("starting checks") + test_network = pd.concat([drive_links_df, centroid_connector_links]) + # foo = gpd.GeoDataFrame(test_network, geometry="geometry") + # print(foo.columns) + # foo[["A", "B", "segment_id", "geometry"]].to_file(r"D:\data_dump\test.gpkg") + + # return test_network + test_graph = nx.from_pandas_edgelist(test_network, "A", "B", create_using=nx.DiGraph()) + from itertools import product + bad_nodes = set() + print(taz_centroid.shape) + x = 0 + for start_node, end_node in product(taz_centroid["N"], taz_centroid["N"][50:51]): + print(x) + x = x+1 + if not nx.has_path(test_graph, start_node, end_node): + bad_nodes.add(start_node) + if not nx.has_path(test_graph, end_node, start_node): + bad_nodes.add(start_node) + + print("ending checks") + + if len(bad_nodes) > 0: + raise Exception(f"Graph was not fully connected, the following nodes are missing a pasth either to or from them {bad_nodes}") + print("passed") + model_tables = dict() @@ -1993,9 +2044,7 @@ def calc_extra_attribute_values(network, dims=None): else: print("path does not exist") - print("using _eb for first time...") emmebank = _eb.create(emmebank_path, dimensions) - print("emebank ran") emmebank.title = self._NAME emmebank.coord_unit_length = 0.0001 # Meters to kilometers emmebank.unit_of_length = "km" @@ -2003,7 +2052,6 @@ def calc_extra_attribute_values(network, dims=None): emmebank.unit_of_energy = "MJ" emmebank.node_number_digits = 6 emmebank.use_engineering_notation = True - print("network time") self._emmebank = emmebank def save_networks(self): @@ -2144,7 +2192,6 @@ def mode_map(row): except KeyError: index_errors.append("-".join([str(row["A"]), str(row["B"])])) continue - print(row["geometry_wkt"]) # if row["geometry_wkt"] == link = network.create_link(i_node, j_node, mode_map(row)) for attr in connector_attrs: @@ -2176,7 +2223,6 @@ def mode_map(row): # Copy link verticies to correct attribute name, if they are present if "_vertices" in network.attributes("LINK"): for link in network.links(): - print() link.vertices = link._vertices network.delete_attribute("LINK", "_vertices") diff --git a/lasso/parameters.py b/lasso/parameters.py index e70825d..4e70817 100644 --- a/lasso/parameters.py +++ b/lasso/parameters.py @@ -347,7 +347,7 @@ def __init__(self, **kwargs): self.taz_net_max_ft = 6 self.maz_net_max_ft = 7 # Potentially make a named tuple - self.taz_node_join_tolerance = (0, "US survey foot") + self.taz_node_join_tolerance = (100, "US survey foot") self.max_length_centroid_connector_when_none_in_taz = 999999999999999999999999999999999999 #TODO make this relative