Skip to content

api_tools

Methods for linking the data from one polygon layer to the SOM input data of another polygon layer.

Links MPAs to subbasin data

Source code in src/api_tools.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def link_areas(config: dict, data: dict[str, pd.DataFrame]):
    """
    Links MPAs to subbasin data
    """
    mpa_id = config['layers']['mpa']['id_attr']
    subbasin_id = config['layers']['subbasin']['id_attr']

    #
    # Get subbasin-country combinations
    #

    if 'path' in config['layers']['subbasin'] and config['layers']['subbasin']['path'] != "":
        path = config['layers']['subbasin']['path']
        if not os.path.isfile(path): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), path)
        if not os.path.exists(path): path = config['layers']['subbasin']['url']
    else:
        path = config['layers']['subbasin']['url']

    # read from path
    subbasins = gpd.read_file(path)

    # fix geometries if needed
    subbasins['geometry'] = subbasins.geometry.make_valid()

    #
    # Get areas from MPA layer
    #

    if 'path' in config['layers']['mpa'] and config['layers']['mpa']['path'] != "":
        path = config['layers']['mpa']['path']
        if not os.path.isfile(path): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), path)
        if not os.path.exists(path): path = config['layers']['mpa']['url']
    else:
        path = config['layers']['mpa']['url']

    # read from path
    mpa = gpd.read_file(path)

    # fix geometries if needed
    mpa['geometry'] = mpa.geometry.make_valid()

    #
    # Get measures in MPAs
    #

    # explode so there's only one measure per row
    mpa[config['layers']['mpa']['measure_attr']] = mpa[config['layers']['mpa']['measure_attr']].apply(lambda x: x.split(config['layers']['mpa']['measure_delimiter']))
    measures = mpa.explode(column=config['layers']['mpa']['measure_attr'])

    #
    # identify links between mpas and subbasins
    #

    # ensure both layers have the same crs
    mpa = mpa.to_crs(subbasins.crs)

    mpa = mpa.reset_index(drop=True)
    subbasins = subbasins.reset_index(drop=True)

    # perform spatial join on intersections
    intersects = gpd.sjoin(
        mpa[[mpa_id, 'geometry']], 
        subbasins[[subbasin_id, 'geometry']], 
        how='left', 
        predicate='intersects'
    )
    # remove rows without intersect
    intersects = intersects.dropna(subset=[subbasin_id])
    # create new columns for mpa and subbasin geometries
    merged = intersects.merge(
        mpa[[mpa_id, 'geometry']].rename(columns={'geometry': 'geom_mpa'}), 
        on=mpa_id
    ).merge(
        subbasins[[subbasin_id, 'geometry']].rename(columns={'geometry': 'geom_subbasins'}), 
        on=subbasin_id
    )
    # calculate intersect area between mpa and subbasin for each intersection (row)
    merged['intersect_area'] = merged.apply(
        lambda x: x['geom_mpa'].intersection(x['geom_subbasins']).area, axis=1
    )
    # find subbasin with the highest intersect area for each mpa
    max_idx = (merged.loc[merged.groupby(mpa_id)['intersect_area'].idxmax()][[mpa_id, subbasin_id]].set_index(mpa_id))
    # map the highest intersect area subbasin to the mpa geodataframe
    mpa[subbasin_id] = mpa[mpa_id].map(max_idx[subbasin_id])
    links = mpa.loc[:, [mpa_id, subbasin_id]]

    #
    # change area ids to match MPAs
    #

    # create the cases dataframe for the input data
    cases = {
        'ID': np.arange(len(measures)), 
        'measure': measures[config['layers']['mpa']['measure_attr']], 
        'activity': np.zeros(len(measures)), 
        'pressure': np.zeros(len(measures)), 
        'state': np.zeros(len(measures)), 
        'coverage': np.ones(len(measures)), 
        'implementation': np.ones(len(measures)), 
        'area_id': measures[config['layers']['mpa']['id_attr']]
    }

    data['cases'] = pd.DataFrame(cases)

    # create the area dataframe
    areas = pd.DataFrame({
        'ID': mpa[mpa_id].unique()
    })
    areas['area'] = areas['ID'].apply(lambda x: mpa.loc[(mpa[mpa_id] == x), config['layers']['mpa']['name_attr']].values[0])
    data['area'] = areas

    for key in ['activity_contributions', 'pressure_contributions', 'thresholds']:
        df = data[key]
        df = df.rename(columns={'area_id': subbasin_id})
        merged = df.merge(links, on=subbasin_id, how='inner')
        merged = merged.rename(columns={mpa_id: 'area_id'})
        merged = merged.drop(columns=[subbasin_id])
        data[key] = merged

    return data