Skip to content

som_app

Main SOM calculation methods.

build_cases(data)

Builds cases.

Parameters:

Name Type Description Default
data dict

dict of dataframes containing all links and ids relevant to SOM calculations.

required

Returns:

Name Type Description
data dict

updated links and ids relevant to SOM calculations.

Source code in src/som_app.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def build_cases(data: dict[str, pd.DataFrame]) -> dict[str, pd.DataFrame]:
    """
    Builds cases.

    Arguments:
        data (dict): dict of dataframes containing all links and ids relevant to SOM calculations.

    Returns:
        data (dict): updated links and ids relevant to SOM calculations.
    """
    cases = data['cases']
    links = data['measure_effects']
    # replace all zeros (0) in activity / pressure / state columns with full list of values
    # filter those lists to only include relevant IDs (from links)
    # finally explode to only have single IDs per row
    cols = ['activity', 'pressure', 'state']
    for col in cols:
        cases[col] = cases[col].astype(object)
    for i, row in cases.iterrows():
        maps_links = links.loc[links['measure'] == row['measure'], cols]    # select relevant measure/activity/pressure/state links
        if len(maps_links) == 0:
            cases.drop(i, inplace=True) # drop rows where measure has no effect
            continue
        for col in cols:
            cases.at[i, col] = maps_links[col].unique().tolist() if row[col] == 0 else row[col]
    for col in cols:
        cases = cases.explode(col)

    cases = cases.reset_index(drop=True)

    # filter out links that don't have associated reduction
    m = cases['measure'].isin(links['measure'])
    a = cases['activity'].isin(links['activity'])
    p = cases['pressure'].isin(links['pressure'])
    s = cases['state'].isin(links['state'])
    existing_links = (m & a & p & s)
    cases = cases.loc[existing_links, :]

    cases = cases.reset_index(drop=True)

    # remove duplicate measures in areas, measure with highest coverage and implementation is chosen
    cases = cases.sort_values(by=['coverage', 'implementation'], ascending=[False, False])
    cases = cases.drop_duplicates(subset=['measure', 'activity', 'pressure', 'state', 'area_id'], keep='first')
    cases = cases.reset_index(drop=True)

    data['cases'] = cases

    return data

build_changes(data, time_steps=1, warnings=False)

Main calculation method. Simulate the reduction in activities and pressures caused by measures and return the change observed in state.

Parameters:

Name Type Description Default
data dict

dict of dataframes containing all links and ids relevant to SOM calculations.

required
time_steps int

NOT IMPLEMENTED. amount of time steps to simulate, i.e. applications of measures several times.

1
warnings bool

toggle for showing warnings related to calculations.

False

Returns:

Name Type Description
data dict

updated links and ids relevant to SOM calculations.

Source code in src/som_app.py
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
def build_changes(data: dict[str, pd.DataFrame], time_steps: int = 1, warnings: bool = False) -> dict[str, pd.DataFrame]:
    """
    Main calculation method. Simulate the reduction in activities and pressures caused by measures and 
    return the change observed in state. 

    Arguments:
        data (dict): dict of dataframes containing all links and ids relevant to SOM calculations.
        time_steps (int): NOT IMPLEMENTED. amount of time steps to simulate, i.e. applications of measures several times.
        warnings (bool): toggle for showing warnings related to calculations.

    Returns:
        data (dict): updated links and ids relevant to SOM calculations.
    """
    # this variable is used in assertions where float number error might affect comparisons
    allowed_error = 0.00001     

    cases = data['cases']
    links = data['measure_effects']
    areas = data['area']['ID']

    # create dataframes to store changes in pressure and state, one column per area_id
    # NOTE: the DataFrames are created on one line to avoid PerformanceWarning

    # represents the amount of the pressure ('ID' column) that is left
    # 1 = unchanged pressure, 0 = no pressure left
    pressure_levels = pd.DataFrame(data['pressure']['ID']).reindex(columns=['ID']+areas.tolist()).fillna(1.0)
    # represents the amount of the total pressure load that is left affecting the given state ('ID' column)
    # 1 = unchanged pressure load, 0 = no pressure load left affecting the state
    total_pressure_load_levels = pd.DataFrame(data['state']['ID']).reindex(columns=['ID']+areas.tolist()).fillna(1.0)

    # represents the reduction observed in the total pressure load ('ID' column)
    total_pressure_load_reductions = pd.DataFrame(data['state']['ID']).reindex(columns=['ID']+areas.tolist()).fillna(0.0)

    # same as pressure_levels, but one dataframe for each separate state, so that state specific reductions on the pressures are captured
    state_pressure_levels = {s: pd.DataFrame(data['pressure']['ID']).reindex(columns=['ID']+areas.tolist()).fillna(1.0) for s in data['state']['ID']}

    # make sure activity contributions don't exceed 100 %
    for area in areas:
        for p_i, p in pressure_levels.iterrows():
            mask = (data['activity_contributions']['area_id'] == area) & (data['activity_contributions']['pressure'] == p['ID'])
            relevant_contributions = data['activity_contributions'].loc[mask, :]
            if len(relevant_contributions) > 0:
                contribution_sum = relevant_contributions['contribution'].sum()
                if contribution_sum > 1:
                    data['activity_contributions'].loc[mask, 'contribution'] = relevant_contributions['contribution'] / contribution_sum
            try: assert data['activity_contributions'].loc[mask, 'contribution'].sum() <= 1 + allowed_error
            except Exception as e: fail_with_message(f'Failed to verify that activity contributions do not exceed 100 % for area {area}, pressure {p["ID"]} with contribution sum {data['activity_contributions'].loc[mask, 'contribution'].sum()}', e)

    # make sure pressure contributions don't exceed 100 %
    for area in areas:
        for s_i, s in total_pressure_load_levels.iterrows():
            mask = (data['pressure_contributions']['area_id'] == area) & (data['pressure_contributions']['state'] == s['ID'])
            relevant_contributions = data['pressure_contributions'].loc[mask, :]
            if len(relevant_contributions) > 0:
                contribution_sum = relevant_contributions['contribution'].sum()
                if contribution_sum > 1:
                    data['pressure_contributions'].loc[mask, 'contribution'] = relevant_contributions['contribution'] / contribution_sum
            try: assert data['pressure_contributions'].loc[mask, 'contribution'].sum() <= 1 + allowed_error
            except Exception as e: fail_with_message(f'Failed to verify that pressure contributions do not exceed 100 % for area {area}, state {s["ID"]} with contribution sum {data['pressure_contributions'].loc[mask, 'contribution'].sum()}', e)

    #
    # simulation loop
    #

    for time_step in range(time_steps):

        #
        # pressure reductions
        #

        # activity contributions
        for area in areas:
            c = cases.loc[cases['area_id'] == area, :]  # select cases for current area
            for p_i, p in pressure_levels.iterrows():
                relevant_measures = c.loc[c['pressure'] == p['ID'], :]  # select all measures affecting the current pressure in the current area
                relevant_overlaps = data['overlaps'].loc[data['overlaps']['pressure'] == p['ID'], :]    # select all overlaps affecting current pressure
                for m_i, m in relevant_measures.iterrows():
                    #
                    # get measure effect (= reduction), and apply modifiers
                    #
                    mask = (links['measure'] == m['measure']) & (links['activity'] == m['activity']) & (links['pressure'] == m['pressure']) & (links['state'] == m['state'])
                    row = links.loc[mask, :]    # find the reduction of the current measure implementation
                    if len(row) == 0:
                        if warnings: print(f'WARNING! Effect of measure {m["measure"]} on activity {m["activity"]} and pressure {m["pressure"]} not known! Measure {m["measure"]} will be skipped in area {area}.')
                        continue    # skip measure if data on the effect is not known
                    try: assert len(row) == 1
                    except Exception as e: fail_with_message(f'ERROR! Multiple instances of measure {m["measure"]} effect on activity {m["activity"]} and pressure {m["pressure"]} given in input data!', e)
                    reduction = row['reduction'].values[0]
                    for mod in ['coverage', 'implementation']:
                        reduction = reduction * m[mod]
                    #
                    # overlaps (measure-measure interaction)
                    #
                    for o_i, o in relevant_overlaps.loc[(relevant_overlaps['overlapped'] == m['measure']) & (relevant_overlaps['activity'] == m['activity']), :].iterrows():
                        if o['overlapping'] in relevant_measures.loc[relevant_measures['activity'] == m['activity'], 'measure'].values: # ensure the overlapping measure is also for the current activity
                            reduction = reduction * o['multiplier']
                    #
                    # contribution
                    #
                    if m['activity'] == 0:
                        contribution = 1    # if activity is 0 (= straight to pressure), contribution will be 1
                    else:
                        cont_mask = (data['activity_contributions']['activity'] == m['activity']) & (data['activity_contributions']['pressure'] == m['pressure']) & (data['activity_contributions']['area_id'] == area)
                        contribution = data['activity_contributions'].loc[cont_mask, 'contribution']
                        if len(contribution) == 0:
                            if warnings: print(f'WARNING! Contribution of activity {m["activity"]} to pressure {m["pressure"]} not known! Measure {m["measure"]} will be skipped in area {area}.')
                            continue    # skip measure if activity is not in contribution list
                        else:
                            try: assert len(contribution) == 1
                            except Exception as e: fail_with_message(f'ERROR! Multiple instances of activity {m["activity"]} contribution on pressure {m["pressure"]} given in input data!', e)
                            contribution = contribution.values[0]
                    #
                    # reduce pressure
                    #
                    pressure_levels.at[p_i, area] = pressure_levels.at[p_i, area] * (1 - reduction * contribution)
                    if pressure_levels.at[p_i, area] < 0:
                        print(f'area {area}, pressure {p["ID"]} => level = {pressure_levels.at[p_i, area]}, red = {reduction}, cont = {contribution}')
                    #
                    # normalize activity contributions to reflect pressure reduction
                    #
                    if abs(1 - contribution) > allowed_error and contribution != 0:     # only normalize if there is change in contributions
                        data['activity_contributions'].loc[cont_mask, 'contribution'] = contribution * (1 - reduction)   # reduce the current contribution before normalizing
                        norm_mask = (data['activity_contributions']['area_id'] == area) & (data['activity_contributions']['pressure'] == p['ID'])
                        relevant_contributions = data['activity_contributions'].loc[norm_mask, 'contribution']
                        data['activity_contributions'].loc[norm_mask, 'contribution'] = relevant_contributions / (1 - reduction * contribution)

        #
        # total pressure load reductions
        #

        # straight to state measures
        for area in areas:
            c = cases.loc[cases['area_id'] == area, :]  # select cases for current area
            for s_i, s in total_pressure_load_levels.iterrows():
                relevant_measures = c.loc[c['state'] == s['ID'], :] # select all measures affecting current state in current the area
                for m_i, m in relevant_measures.iterrows():
                    #
                    # get measure effect (= reduction), and apply modifiers
                    #
                    mask = (links['measure'] == m['measure']) & (links['activity'] == m['activity']) & (links['pressure'] == m['pressure']) & (links['state'] == m['state'])
                    row = links.loc[mask, :]
                    if len(row) == 0:
                        continue
                    reduction = row['reduction'].values[0]
                    for mod in ['coverage', 'implementation']:
                        reduction = reduction * m[mod]
                    #
                    # overlaps (measure-measure interaction)
                    #
                    for o_i, o in data['overlaps'].loc[(data['overlaps']['overlapped'] == m['measure']) & (data['overlaps']['activity'] == m['activity']) & (data['overlaps']['pressure'] == m['pressure']), :].iterrows():
                        if o['overlapping'] in relevant_measures['measure'].values:
                            reduction = reduction * o['multiplier']
                    #
                    # reduce pressure
                    #
                    total_pressure_load_levels.at[s_i, area] = total_pressure_load_levels.at[s_i, area] * (1 - reduction)

        # update state pressures from pressure levels
        for s_i, s in total_pressure_load_levels.iterrows():
            state_pressure_levels[s['ID']].loc[:, :] = pressure_levels.loc[:, :]

        # pressure contributions
        for area in areas:
            for s_i, s in total_pressure_load_levels.iterrows():    # for each state
                a_i = pressure_levels.columns.get_loc(area)     # column index of current area column
                relevant_pressures = data['pressure_contributions'].loc[(data['pressure_contributions']['area_id'] == area) & (data['pressure_contributions']['state'] == s['ID']), :]  # select contributions of pressures affecting current state in current area
                for p_i, p in relevant_pressures.iterrows():
                    #
                    # main pressure reduction
                    #
                    row_i = pressure_levels.loc[pressure_levels['ID'] == p['pressure']].index[0]
                    reduction = 1 - pressure_levels.iloc[row_i, a_i]    # reduction = 100 % - the part that is left of the pressure
                    contribution = data['pressure_contributions'].loc[(data['pressure_contributions']['area_id'] == area) & (data['pressure_contributions']['state'] == s['ID']) & (data['pressure_contributions']['pressure'] == p['pressure']), 'contribution'].values[0]
                    #
                    # subpressures
                    #
                    relevant_subpressures = data['subpressures'].loc[(data['subpressures']['state'] == s['ID']) & (data['subpressures']['state pressure'] == p['pressure']), :]     # find all rows where the current pressure acts as a state pressure for the current state
                    for sp_i, sp in relevant_subpressures.iterrows():   # for each subpressure of the current pressure
                        sp_row_i = pressure_levels.loc[pressure_levels['ID'] == sp['reduced pressure']].index[0]
                        multiplier = sp['multiplier']   # by how much does the subpressure affect the current pressure
                        red = 1 - pressure_levels.iloc[sp_row_i, a_i]    # subpressure reduction = 100 % - the part that is left of the subpressure
                        reduction = reduction + multiplier * red    # the new current pressure reduction is increased by the calculated subpressure reduction
                    try: assert reduction <= 1 + allowed_error
                    except Exception as e: fail_with_message(f'Failed on area {area}, state {s["ID"]}, pressure {p["pressure"]} with reduction {reduction}', e)
                    state_pressure_levels[s['ID']].iloc[row_i, a_i] = state_pressure_levels[s['ID']].iloc[row_i, a_i] * (1 - reduction)
                    #
                    # reduce total pressure load
                    #
                    total_pressure_load_levels.at[s_i, area] = total_pressure_load_levels.at[s_i, area] * (1 - reduction * contribution)
                    #
                    # normalize pressure contributions to reflect pressure reduction
                    #
                    if abs(1 - contribution) > allowed_error and contribution != 0:     # only normalize if there is change in contributions
                        data['pressure_contributions'].loc[(data['pressure_contributions']['area_id'] == area) & (data['pressure_contributions']['state'] == s['ID']) & (data['pressure_contributions']['pressure'] == p['pressure']), 'contribution'] = contribution * (1 - reduction)   # reduce the current contribution before normalizing
                        norm_mask = (data['pressure_contributions']['area_id'] == area) & (data['pressure_contributions']['state'] == s['ID'])
                        relevant_contributions = data['pressure_contributions'].loc[norm_mask, 'contribution']
                        data['pressure_contributions'].loc[norm_mask, 'contribution'] = relevant_contributions / (1 - reduction * contribution)
                        try: assert abs(1 - data['pressure_contributions'].loc[norm_mask, 'contribution'].sum()) <= allowed_error
                        except Exception as e: fail_with_message(f'Failed on area {area}, state {s["ID"]}, pressure {p["pressure"]} with pressure contribution sum not equal to 1', e)

    # total reduction observed in total pressure loads
    for area in areas:
        for s_i, s in total_pressure_load_levels.iterrows():
            total_pressure_load_reductions.at[s_i, area] = 1 - total_pressure_load_levels.at[s_i, area]

    # GES thresholds
    cols = ['PR', '10', '25', '50']
    thresholds = {}
    for col in cols:
        thresholds[col] = pd.DataFrame(data['state']['ID']).reindex(columns=['ID']+areas.tolist())
    for area in areas:
        a_i = total_pressure_load_levels.columns.get_loc(area)
        for s_i, s in total_pressure_load_levels.iterrows():
            row = data['thresholds'].loc[(data['thresholds']['state'] == s['ID']) & (data['thresholds']['area_id'] == area), cols]
            if len(row) == 0:
                continue
            for col in cols:
                thresholds[col].iloc[s_i, a_i] = row.loc[:, col].values[0]

    data.update({
        'pressure_levels': pressure_levels, 
        'state_pressure_levels': state_pressure_levels, 
        'total_pressure_load_levels': total_pressure_load_levels, 
        'total_pressure_load_reductions': total_pressure_load_reductions, 
        'thresholds': thresholds
    })

    return data

build_input(config)

Loads input data. If loading already processed data, probability distributions need to be converted back to arrays.

Parameters:

Name Type Description Default
config dict

configuration settings.

required

Returns:

Name Type Description
input_data dict

SOM input data.

Source code in src/som_app.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def build_input(config: dict) -> dict[str, pd.DataFrame]:
    """
    Loads input data. If loading already processed data, probability distributions need to be converted back to arrays. 

    Arguments:
        config (dict): configuration settings.

    Returns:
        input_data (dict): SOM input data.
    """
    # process legacy input data to be usable by the tool
    if config['use_legacy_input_data']:
        # process input data
        input_data = process_input_data(config)

        # load areas from layers and adjust area ids
        if config['link_mpas_to_subbasins']:
            print('Linking areas in input data...')
            input_data = api_tools.link_areas(config, input_data)

        # export input data to excel
        path = os.path.realpath(config['input_data_legacy']['general_input'])
        if not os.path.isfile(path): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), config['input_data_legacy']['general_input'])
        path = os.path.join(os.path.dirname(path), 'input_data.xlsx')
        config['input_data']['path'] = path
        with pd.ExcelWriter(path) as writer:
            for key in input_data:
                input_data[key].to_excel(writer, sheet_name=key, index=False)

    # load processed input data used by the tool
    path = os.path.realpath(config['input_data']['path'])
    if not os.path.isfile(path): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), config['input_data']['path'])
    input_data = pd.read_excel(io=path, sheet_name=None)
    conversion_sheet = [
        ('measure_effects', 'reduction'), 
        ('activity_contributions', 'contribution'), 
        ('pressure_contributions', 'contribution'), 
        ('thresholds', 'PR'), 
        ('thresholds', '10'), 
        ('thresholds', '25'), 
        ('thresholds', '50')
    ]
    def str_to_arr(s):
        if type(s) is float: return s
        arr = []
        for a in [x for x in s.replace('[', '').replace(']', '').split(' ')]:
            if a != '':
                arr.append(a)
        arr = np.array(arr)
        arr = arr.astype(float)
        arr = arr / np.sum(arr)
        return arr
    for sheet in conversion_sheet:
        input_data[sheet[0]][sheet[1]] = input_data[sheet[0]][sheet[1]].apply(str_to_arr)

    # load areas from layers and adjust area ids (if not using legacy data)
    # if done this way, input data file is not updated
    if config['link_mpas_to_subbasins'] and not config['use_legacy_input_data']:
        print('Linking areas in input data...')
        input_data = api_tools.link_areas(config, input_data)

    # make sure areas do not go over 32 characters
    # input_data['area']['area'] = input_data['area']['area'].apply(lambda x: x if len(x) <= 32 else x[:32])
    # validate IDs
    for key in ['measure', 'activity', 'pressure', 'state', 'area']:
        input_data[key][key] = input_data[key][key].apply(sanitize_string)

    return input_data

Build links by picking random samples using probability distributions.

Parameters:

Name Type Description Default
data dict

dict of dataframes containing all links and ids relevant to SOM calculations.

required

Returns:

Name Type Description
data dict

updated links and ids relevant to SOM calculations.

Source code in src/som_app.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def build_links(data: dict[str, pd.DataFrame]) -> dict[str, pd.DataFrame]:
    """
    Build links by picking random samples using probability distributions.

    Arguments:
        data (dict): dict of dataframes containing all links and ids relevant to SOM calculations.

    Returns:
        data (dict): updated links and ids relevant to SOM calculations.
    """
    #
    # measure effects
    #

    # verify that there are no duplicate links
    try: assert len(data['measure_effects'][data['measure_effects'].duplicated(['measure', 'activity', 'pressure', 'state'])]) == 0
    except Exception as e: fail_with_message(f'Duplicate measure effects in input data!', e)

    # get picks from cumulative distribution
    data['measure_effects']['reduction'] = data['measure_effects']['reduction'].apply(get_pick)

    #
    # activity contributions
    #

    data['activity_contributions']['contribution'] = data['activity_contributions']['contribution'].apply(get_pick)

    #
    # pressure contributions
    #

    # get picks from cumulative distribution
    data['pressure_contributions']['contribution'] = data['pressure_contributions']['contribution'].apply(lambda x: get_pick(x) if not np.any(np.isnan(x)) else np.nan)

    data['pressure_contributions'] = data['pressure_contributions'].drop_duplicates(subset=['state', 'pressure', 'area_id'], keep='first').reset_index(drop=True)

    # verify that there are no duplicate links
    try: assert len(data['pressure_contributions'][data['pressure_contributions'].duplicated(['state', 'pressure', 'area_id'])]) == 0
    except Exception as e: fail_with_message(f'Duplicate pressure contributions in input data!', e)

    # make sure pressure contributions for each state / area are 100 %
    for area in data['area']['ID']:
        for state in data['state']['ID']:
            mask = (data['pressure_contributions']['area_id'] == area) & (data['pressure_contributions']['state'] == state)
            relevant_contributions = data['pressure_contributions'].loc[mask, :]
            if len(relevant_contributions) > 0:
                data['pressure_contributions'].loc[mask, 'contribution'] = relevant_contributions['contribution'] / relevant_contributions['contribution'].sum()

    #
    # thresholds
    #

    threshold_cols = ['PR', '10', '25', '50']   # target thresholds (PR=GES)

    # get picks from cumulative distribution
    for col in threshold_cols:
        data['thresholds'][col] = data['thresholds'][col].apply(lambda x: get_pick(x) if not np.any(np.isnan(x)) else np.nan)

    data['thresholds'] = data['thresholds'].drop_duplicates(subset=['state', 'area_id'], keep='first').reset_index(drop=True)

    # verify that there are no duplicate links
    try: assert len(data['thresholds'][data['thresholds'].duplicated(['state', 'area_id'])]) == 0
    except Exception as e: fail_with_message(f'Duplicate GES targets in input data!', e)

    return data

build_results(sim_res, input_data)

Process the simulated results to calculate uncertainties. Uncertainty is determined as standard error of the mean.

Parameters:

Name Type Description Default
sim_res str

path to directory holding individual simulation run results.

required
input_data dict

SOM input data.

required

Returns:

Name Type Description
res dict

SOM calculation results.

Source code in src/som_app.py
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
def build_results(sim_res: str, input_data: dict[str, pd.DataFrame]) -> dict[str, pd.DataFrame]:
    """
    Process the simulated results to calculate uncertainties. 
    Uncertainty is determined as standard error of the mean.

    Arguments:
        sim_res (str): path to directory holding individual simulation run results.
        input_data (dict): SOM input data.

    Returns:
        res (dict): SOM calculation results.
    """
    files = [os.path.join(sim_res, x) for x in os.listdir(sim_res) if x.endswith('.pickle') and 'sim_res' in x]

    areas = input_data['area']['ID']
    pressures = input_data['pressure']['ID']
    states = input_data['state']['ID']

    res = {}

    for key, val, ids in [
        ('Pressure', 'pressure_levels', pressures), 
        ('TPL', 'total_pressure_load_levels', states), 
        ('TPLRed', 'total_pressure_load_reductions', states), 
        ('Thresholds', ('thresholds', 'PR'), states)
    ]:
        res[key] = {
            'Mean': pd.DataFrame(ids).reindex(columns=['ID']+areas.tolist()).fillna(1.0), 
            'Error': pd.DataFrame(ids).reindex(columns=['ID']+areas.tolist()).fillna(1.0)
        }
        arr = np.empty(shape=(len(ids.tolist()), len(areas.tolist()), len(files)))
        for i in range(len(files)):
            with open(files[i], 'rb') as f:
                data = pickle.load(f)
            if type(val) == str:
                arr[:, :, i] = data[val].values[:, 1:]
            else:
                arr[:, :, i] = data[val[0]][val[1]].values[:, 1:]
        res[key]['Mean'].iloc[:, 1:] = np.mean(arr, axis=2)
        res[key]['Error'].iloc[:, 1:] = np.std(arr, axis=2, ddof=1) / np.sqrt(arr.shape[2])    # calculate standard error

    res['StatePressure'] = {
        s: {
            'Mean': pd.DataFrame(pressures).reindex(columns=['ID']+areas.tolist()).fillna(1.0), 
            'Error': pd.DataFrame(pressures).reindex(columns=['ID']+areas.tolist()).fillna(1.0)
        } for s in states
    }
    for s in res['StatePressure']:
        arr = np.empty(shape=(len(pressures.tolist()), len(areas.tolist()), len(files)))
        for i in range(len(files)):
            with open(files[i], 'rb') as f:
                data = pickle.load(f)
            arr[:, :, i] = data['state_pressure_levels'][s].values[:, 1:]
        res['StatePressure'][s]['Mean'].iloc[:, 1:] = np.mean(arr, axis=2)
        res['StatePressure'][s]['Error'].iloc[:, 1:] = np.std(arr, axis=2, ddof=1) / np.sqrt(arr.shape[2])

    for key, val, col in [
        ('MeasureEffects', 'measure_effects', 'reduction'), 
        ('ActivityContributions', 'activity_contributions', 'contribution'), 
        ('PressureContributions', 'pressure_contributions', 'contribution')
    ]:
        res[key] = {
            'Mean': pd.DataFrame(input_data[val]), 
            'Error': pd.DataFrame(input_data[val])
        }
        arr = np.empty(shape=([x for x in input_data[val].values.shape]+[len(files)]))
        for i in range(len(files)):
            with open(files[i], 'rb') as f:
                data = pickle.load(f)
            arr[:, :, i] = data[val].values
        res[key]['Mean'][col] = np.mean(arr[:, -1, :], axis=1)
        res[key]['Error'][col] = np.std(arr[:, -1, :], axis=1, ddof=1) / np.sqrt(arr.shape[2])

    return res

build_scenario(data, scenario)

Build scenario. Updates activity contributions to pressures to reflect changes in the activities.

Parameters:

Name Type Description Default
data dict

dict of dataframes containing all links and ids relevant to SOM calculations.

required
scenario str

name of scenario to be accessed from data.

required

Returns:

Name Type Description
data dict

updated links and ids relevant to SOM calculations.

Source code in src/som_app.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def build_scenario(data: dict[str, pd.DataFrame], scenario: str) -> pd.DataFrame:
    """
    Build scenario. Updates activity contributions to pressures to reflect changes in the activities.

    Arguments:
        data (dict): dict of dataframes containing all links and ids relevant to SOM calculations.
        scenario (str): name of scenario to be accessed from data.

    Returns:
        data (dict): updated links and ids relevant to SOM calculations.
    """
    act_to_press = data['activity_contributions']
    dev_scen = data['development_scenarios']

    # for each pressure, save the total contribution of activities for later normalization
    actual_sum = {}
    for pressure_id in act_to_press['pressure'].unique():
        actual_sum[pressure_id] = {}
        activities = act_to_press.loc[act_to_press['pressure'] == pressure_id, :]
        for area in activities['area_id'].unique():
            actual_sum[pressure_id][area] = activities.loc[activities['area_id'] == area, 'contribution'].sum()

    # multiply activities by scenario multiplier
    def get_scenario(activity_id):
        multiplier = dev_scen.loc[dev_scen['activity'] == activity_id, scenario]
        if len(multiplier) == 0:
            return 1
        multiplier = multiplier.values[0]
        return multiplier
    act_to_press['contribution'] = act_to_press['contribution'] * act_to_press['activity'].apply(get_scenario)

    # normalize
    normalize_factor = {}
    for pressure_id in act_to_press['pressure'].unique():
        normalize_factor[pressure_id] = {}
        activities = act_to_press.loc[act_to_press['pressure'] == pressure_id, :]
        for area in activities['area_id'].unique():
            scenario_sum = activities.loc[activities['area_id'] == area, 'contribution'].sum()
            normalize_factor[pressure_id][area] = 1 + scenario_sum - actual_sum[pressure_id][area]

    def normalize(value, pressure_id, area_id):
        return value * normalize_factor[pressure_id][area_id]

    act_to_press['contribution'] = act_to_press.apply(lambda x: normalize(x['contribution'], x['pressure'], x['area_id']), axis=1)

    return act_to_press

export_results_to_excel(res, input_data, export_path)

Exports simulation results as excel file.

Parameters:

Name Type Description Default
res dict

SOM calculation results.

required
input_data dict

SOM input data.

required
export_path str

output path for exported results.

required
Source code in src/som_app.py
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
def export_results_to_excel(res: dict[str, pd.DataFrame], input_data: dict[str, pd.DataFrame], export_path: str):
    """
    Exports simulation results as excel file.

    Arguments:
        res (dict): SOM calculation results.
        input_data (dict): SOM input data.
        export_path (str): output path for exported results.
    """
    with pd.ExcelWriter(export_path) as writer:
        new_res = set_id_columns(res, input_data)
        for key in new_res:
            if key != 'StatePressure':
                for r in ['Mean', 'Error']:
                    new_res[key][r].to_excel(writer, sheet_name=key+r, index=False)

set_id_columns(res, data)

Replaces id column values with the name of the corresponding measure/activity/pressure/state in the result dataframes.

Parameters:

Name Type Description Default
res dict

SOM calculation results.

required

Returns:

Name Type Description
res dict

updated results.

Source code in src/som_app.py
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
def set_id_columns(res: dict[str, pd.DataFrame], data: dict[str, pd.DataFrame]) -> dict[str, pd.DataFrame]:
    """
    Replaces id column values with the name of the corresponding measure/activity/pressure/state in the result dataframes.

    Arguments:
        res (dict): SOM calculation results.

    Returns:
        res (dict): updated results.
    """
    res = copy.deepcopy(res)
    relations = {
        'Pressure': 'pressure', 
        'StatePressure': 'pressure', 
        'TPL': 'state', 
        'TPLRed': 'state', 
        'Thresholds': 'state', 
    }
    def replace_ids(id, k):
        return data[k].loc[data[k]['ID'] == id, k].values[0]
    for key in relations:
        if key == 'StatePressure':
            for s in data['state']['ID']:
                for r in ['Mean', 'Error']:
                    res[key][s][r]['ID'] = res[key][s][r]['ID'].apply(lambda x: replace_ids(x, relations[key]))
                    res[key][s][r] = res[key][s][r].rename(columns={col: data['area'].loc[data['area']['ID'] == col, 'area'].values[0] for col in [c for c in res[key][s][r].columns if c != 'ID']})
        else:
            for r in ['Mean', 'Error']:
                res[key][r]['ID'] = res[key][r]['ID'].apply(lambda x: replace_ids(x, relations[key]))
                res[key][r] = res[key][r].rename(columns={col: data['area'].loc[data['area']['ID'] == col, 'area'].values[0] for col in [c for c in res[key][r].columns if c != 'ID']})
    relations = {
        'MeasureEffects': ['measure', 'activity', 'pressure', 'state'], 
        'ActivityContributions': ['activity', 'pressure', 'area_id'], 
        'PressureContributions': ['state', 'pressure', 'area_id']
    }
    conversions = {
        'activity': 'activity', 
        'pressure': 'pressure', 
        'state': 'state', 
        'area_id': 'area'
    }
    for key in relations:
        for r in ['Mean', 'Error']:
            for col in relations[key]:
                k = conversions[col] if col in conversions else col
                res[key][r][col] = res[key][r][col].apply(lambda id: data[k].loc[data[k]['ID'] == id, k].values[0] if id != 0 else '-')

    return res