Skip to content

Point Clusterization

Clusterization groups nearby points into polygons based on spatial proximity and minimum cluster size.
It can be used for identifying dense urban areas, service hubs, or catchment zones.


Cluster Generation

Clusters are generated using spatial rules:

  • Minimum distance between points to be included in the same cluster.
  • Minimum number of points required to form a valid cluster.
objectnat.get_clusters_polygon(points, min_dist=100, min_point=5, method='HDBSCAN', service_code_column='service_code')

Generate cluster polygons for given points based on a specified minimum distance and minimum points per cluster. Optionally, calculate the relative ratio between types of points within the clusters.

Parameters:

Name Type Description Default
points GeoDataFrame

GeoDataFrame containing the points to be clustered. Must include a 'service_code' column for service ratio calculations.

required
min_dist float | int

Minimum distance between points to be considered part of the same cluster. Defaults to 100.

100
min_point int

Minimum number of points required to form a cluster. Defaults to 5.

5
method Literal['DBSCAN', 'HDBSCAN']

The clustering method to use. Must be either "DBSCAN" or "HDBSCAN". Defaults to "HDBSCAN".

'HDBSCAN'
service_code_column str

Column, containing service type for relative ratio in clasterized polygons. Defaults to "service_code".

'service_code'

Returns:

Type Description
tuple[GeoDataFrame, GeoDataFrame]

A tuple containing the clustered polygons GeoDataFrame and the original points GeoDataFrame with cluster labels.

Source code in src\objectnat\methods\point_clustering\cluster_points_in_polygons.py
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def get_clusters_polygon(
    points: gpd.GeoDataFrame,
    min_dist: float | int = 100,
    min_point: int = 5,
    method: Literal["DBSCAN", "HDBSCAN"] = "HDBSCAN",
    service_code_column: str = "service_code",
) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
    """
    Generate cluster polygons for given points based on a specified minimum distance and minimum points per cluster.
    Optionally, calculate the relative ratio between types of points within the clusters.

    Parameters:
        points (gpd.GeoDataFrame):
            GeoDataFrame containing the points to be clustered.
            Must include a 'service_code' column for service ratio calculations.
        min_dist (float | int, optional):
            Minimum distance between points to be considered part of the same cluster. Defaults to 100.
        min_point (int, optional):
            Minimum number of points required to form a cluster. Defaults to 5.
        method (Literal["DBSCAN", "HDBSCAN"], optional):
            The clustering method to use. Must be either "DBSCAN" or "HDBSCAN". Defaults to "HDBSCAN".
        service_code_column (str, optional):
            Column, containing service type for relative ratio in clasterized polygons. Defaults to "service_code".

    Returns:
        (tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]):
            A tuple containing the clustered polygons GeoDataFrame and the original points GeoDataFrame with cluster labels.
    """
    if method not in ["DBSCAN", "HDBSCAN"]:
        raise ValueError("Method must be either 'DBSCAN' or 'HDBSCAN'")
    original_crs = points.crs
    local_crs = points.estimate_utm_crs()
    points = points.to_crs(local_crs)
    services_select = _get_cluster(points, min_dist, min_point, method)

    if service_code_column not in points.columns:
        logger.warning(
            f"No {service_code_column} column in provided GeoDataFrame, cluster polygons will be without relative ratio"
        )
        points[service_code_column] = service_code_column

    points_normal = services_select[services_select["cluster"] != -1].copy()
    points_outlier = services_select[services_select["cluster"] == -1].copy()

    if len(points_normal) > 0:
        cluster_service = points_normal.groupby("cluster", group_keys=True).apply(
            _get_service_ratio, service_code_column=service_code_column
        )
        if isinstance(cluster_service, pd.Series):
            cluster_service = cluster_service.unstack(level=1, fill_value=0)

        polygons_normal = points_normal.dissolve("cluster").concave_hull(ratio=0.1, allow_holes=True)
        df_clusters_normal = pd.concat([cluster_service, polygons_normal.rename("geometry")], axis=1)
        cluster_normal = df_clusters_normal.index.max()
        points_normal["outlier"] = False
        df_clusters_normal["outlier"] = False
    else:
        df_clusters_normal = None
        cluster_normal = 0

    if len(points_outlier) > 0:
        clusters_outlier = cluster_normal + 1
        new_clusters = list(range(clusters_outlier, clusters_outlier + len(points_outlier)))
        points_outlier.loc[:, "cluster"] = new_clusters

        cluster_service = points_outlier.groupby("cluster", group_keys=True).apply(
            _get_service_ratio, service_code_column=service_code_column
        )
        if isinstance(cluster_service, pd.Series):
            cluster_service = cluster_service.unstack(level=1, fill_value=0)

        df_clusters_outlier = cluster_service.join(points_outlier.set_index("cluster")["geometry"])
        points_outlier["outlier"] = True
        df_clusters_outlier["outlier"] = True
    else:
        points_outlier = None
        df_clusters_outlier = None

    df_clusters = pd.concat([df_clusters_normal, df_clusters_outlier]).fillna(0).set_geometry("geometry")
    df_clusters["geometry"] = df_clusters["geometry"].buffer(min_dist / 2)
    df_clusters = df_clusters.reset_index().rename(columns={"index": "cluster"})

    points = pd.concat([points_normal, points_outlier])

    return df_clusters.to_crs(original_crs), points.to_crs(original_crs)

building_clusters