Skip to content

utilities

Small utility methods for convenience.

Timer

Simple timer class for determining execution time.

Source code in src/utilities.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
class Timer:
    """
    Simple timer class for determining execution time.
    """
    def __init__(self) -> None:
        self.start = time.perf_counter()
    def time_passed(self) -> int:
        """Return time passed since start, in seconds."""
        return time.perf_counter() - self.start
    def get_time(self) -> tuple:
        """Returns durations in hours, minutes, seconds as a named tuple."""
        duration = self.time_passed()
        hours = duration // 3600
        minutes = (duration % 3600) // 60
        seconds = duration % 60
        PassedTime = namedtuple('PassedTime', 'hours minutes seconds')
        return PassedTime(hours, minutes, seconds)
    def get_duration(self) -> str:
        """Returns a string of duration in hours, minutes, seconds."""
        t = self.get_time()
        return '%d h %d min %d sec' % (t.hours, t.minutes, t.seconds)
    def get_hhmmss(self) -> str:
        """Returns a string of duration in hh:mm:ss format."""
        t = self.get_time()
        return '[' + ':'.join(f'{int(value):02d}' for value in [t.hours, t.minutes, t.seconds]) + ']'
    def reset(self) -> None:
        """Reset timer to zero."""
        self.start = time.perf_counter()

get_duration()

Returns a string of duration in hours, minutes, seconds.

Source code in src/utilities.py
30
31
32
33
def get_duration(self) -> str:
    """Returns a string of duration in hours, minutes, seconds."""
    t = self.get_time()
    return '%d h %d min %d sec' % (t.hours, t.minutes, t.seconds)

get_hhmmss()

Returns a string of duration in hh:mm:ss format.

Source code in src/utilities.py
34
35
36
37
def get_hhmmss(self) -> str:
    """Returns a string of duration in hh:mm:ss format."""
    t = self.get_time()
    return '[' + ':'.join(f'{int(value):02d}' for value in [t.hours, t.minutes, t.seconds]) + ']'

get_time()

Returns durations in hours, minutes, seconds as a named tuple.

Source code in src/utilities.py
22
23
24
25
26
27
28
29
def get_time(self) -> tuple:
    """Returns durations in hours, minutes, seconds as a named tuple."""
    duration = self.time_passed()
    hours = duration // 3600
    minutes = (duration % 3600) // 60
    seconds = duration % 60
    PassedTime = namedtuple('PassedTime', 'hours minutes seconds')
    return PassedTime(hours, minutes, seconds)

reset()

Reset timer to zero.

Source code in src/utilities.py
38
39
40
def reset(self) -> None:
    """Reset timer to zero."""
    self.start = time.perf_counter()

time_passed()

Return time passed since start, in seconds.

Source code in src/utilities.py
19
20
21
def time_passed(self) -> int:
    """Return time passed since start, in seconds."""
    return time.perf_counter() - self.start

display_progress(completion, size=20, text='Progress: ')

Shows the current simulation progress as a percentage with a progress bar.

Parameters:

Name Type Description Default
completion float

fraction representing completion.

required
size int

total amount of simulations to run.

20
text str

optional text to display before progress bas.

'Progress: '
Source code in src/utilities.py
74
75
76
77
78
79
80
81
82
83
84
85
def display_progress(completion: float, size: int = 20, text: str = 'Progress: '):
    """
    Shows the current simulation progress as a percentage with a progress bar.

    Arguments:
        completion (float): fraction representing completion.
        size (int): total amount of simulations to run.
        text (str): optional text to display before progress bas.
    """
    x = int(size*completion)
    sys.stdout.write("%s[%s%s] %02d %%\r" % (text, "#"*x, "."*(size-x), completion*100))
    sys.stdout.flush()

exception_traceback(e, file=None)

Format exception traceback and print it.

Parameters:

Name Type Description Default
e Exception

exception from which to print traceback.

required
file str

optional file path to write to, otherwise defaults to sys.stdout.

None
Source code in src/utilities.py
43
44
45
46
47
48
49
50
51
52
def exception_traceback(e: Exception, file = None):
    """
    Format exception traceback and print it.

    Arguments:
        e (Exception): exception from which to print traceback.
        file (str): optional file path to write to, otherwise defaults to sys.stdout.
    """
    tb = traceback.format_exception(type(e), e, e.__traceback__)
    print(''.join(tb), file=file)

fail_with_message(m=None, e=None, file=None, do_not_exit=False)

Prints the given exception traceback along with given message, and exits.

Parameters:

Name Type Description Default
m str

optional message to print along with traceback.

None
e Exception

exception from which to print traceback.

None
file str

optional file path to write to, otherwise defaults to sys.stdout.

None
do_not_exit bool

optional flag to not exit.

False
Source code in src/utilities.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def fail_with_message(m: str = None, e: Exception = None, file = None, do_not_exit: bool = False):
    """
    Prints the given exception traceback along with given message, and exits.

    Arguments:
        m (str): optional message to print along with traceback.
        e (Exception): exception from which to print traceback.
        file (str): optional file path to write to, otherwise defaults to sys.stdout.
        do_not_exit (bool): optional flag to not exit.
    """
    if e is not None:
        exception_traceback(e, file)
    if m is not None:
        print(m, file=file)
    print('Terminating.', file=file)
    if not do_not_exit:
        exit()

get_dist_from_picks(picks)

Takes an array of picks and returns the probability distribution for each percentage unit. Picks need to be fractions in [0, 1].

Parameters:

Name Type Description Default
picks ndarray

array of random samples.

required

Returns:

Name Type Description
dist ndarray

probability distribution.

Source code in src/utilities.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def get_dist_from_picks(picks: np.ndarray) -> np.ndarray:
    """
    Takes an array of picks and returns the probability distribution for each percentage unit. Picks need to be fractions in [0, 1].

    Arguments:
        picks (ndarray): array of random samples.

    Returns:
        dist (ndarray): probability distribution.
    """
    picks = np.round(picks, decimals=2)
    unique, count = np.unique(picks, return_counts=True)
    dist = np.zeros(shape=101)  # probability distribution, each element represents a percentage from 0 - 100 %
    # for each percentage, set its value to its frequency in the picks
    for i in range(dist.size):
        for k in range(unique.size):
            if i / 100.0 == unique[k]:
                dist[i] = count[k]
    dist = dist / dist.sum()    # normalize frequencies to sum up to 1
    return dist

get_pick(dist)

Makes a random pick within [0, 1] weighted by the given discrete distribution.

Parameters:

Name Type Description Default
dist ndarray

probability distribution.

required

Returns:

Name Type Description
pick float

sample from the distribution.

Source code in src/utilities.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def get_pick(dist: np.ndarray) -> float:
    """
    Makes a random pick within [0, 1] weighted by the given discrete distribution.

    Arguments:
        dist (ndarray): probability distribution.

    Returns:
        pick (float): sample from the distribution.
    """
    if dist is not None:
        step = 1 / (dist.size - 1)
        a = np.arange(0, 1 + step, step)
        pick = np.random.choice(a, p=dist)
        return pick
    else:
        return np.nan

get_prob_dist(expecteds, lower_boundaries, upper_boundaries, weights)

Returns an aggregated probability distribution from all the individual expert answers provided. Each value in the argument arrays correspond to a PERT distribution characteristic (peak, high, low). Each individual distribution has a weight which impacts its contribution to the final aggregated distribution. All arguments should be 1D arrays with percentage as unit.

Parameters:

Name Type Description Default
expecteds ndarray

individual distribution peaks.

required
lower_boundaries ndarray

individual distributions lows.

required
upper_boundaries ndarray

individual distribution highs.

required
weights ndarray

individual distribution weights.

required

Returns:

Type Description
ndarray

numpy array

Source code in src/utilities.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
def get_prob_dist(expecteds: np.ndarray, 
                  lower_boundaries: np.ndarray, 
                  upper_boundaries: np.ndarray, 
                  weights: np.ndarray) -> np.ndarray:
    '''
    Returns an aggregated probability distribution from all the individual expert answers provided. 
    Each value in the argument arrays correspond to a PERT distribution characteristic (peak, high, low). 
    Each individual distribution has a weight which impacts its contribution to the final aggregated distribution. 
    All arguments should be 1D arrays with percentage as unit.

    Arguments:
        expecteds (ndarray): individual distribution peaks.
        lower_boundaries (ndarray): individual distributions lows.
        upper_boundaries (ndarray): individual distribution highs.
        weights (ndarray): individual distribution weights.

    Returns:
        numpy array
    '''
    # verify that all arrays have the same size
    assert expecteds.size == lower_boundaries.size == upper_boundaries.size == weights.size

    #
    # TODO: remove uncomment in future to not accept faulty data
    # for now, sort arrays to have values in correct order
    #
    # # verify that all lower boundaries are lower than the upper boundaries
    # assert np.sum(lower_boundaries > upper_boundaries) == 0
    # # verify that most likely values are between lower and upper boundaries
    # assert np.sum((expecteds < lower_boundaries) & (expecteds > upper_boundaries)) == 0
    arr = np.full((len(expecteds), 3), np.nan)
    arr[:, 0] = lower_boundaries
    arr[:, 1] = expecteds
    arr[:, 2] = upper_boundaries
    arr = np.array([np.sort(row) for row in arr])
    lower_boundaries = arr[:, 0]
    expecteds = arr[:, 1]
    upper_boundaries = arr[:, 2]

    # select values that are not nan, bool matrix
    non_nan = ~np.isnan(expecteds) & ~np.isnan(lower_boundaries) & ~np.isnan(upper_boundaries)
    # multiply those values with weights, True = 1 and False = 0
    weights_non_nan = (non_nan * weights)

    # create a PERT distribution for each expert
    # from each distribution, draw a large number of picks
    # pool the picks together
    number_of_picks = 5000
    picks = []
    for i in range(len(expecteds)):
        peak = expecteds[i]
        low = lower_boundaries[i]
        high = upper_boundaries[i]
        w = weights_non_nan[i]
        if ~non_nan[i]: # note the tilde ~ to check for nan value
            continue    # skip if any value is nan
        dist = pert_dist(peak, low, high, w * number_of_picks)
        picks += dist.tolist()

    # return nan if no distributions (= no expert answers)
    if len(picks) == 0:
        return np.nan

    # create final probability distribution
    picks = np.array(picks) / 100.0   # convert percentages to fractions
    prob_dist = get_dist_from_picks(picks)

    return prob_dist

pert_dist(peak, low, high, size)

Returns a set of random picks from a PERT distribution.

Parameters:

Name Type Description Default
peak float

distribution peak.

required
low float

distribution lower tail.

required
high float

distribution higher tail.

required
size int

number of picks to return.

required

Returns:

Type Description
ndarray

numpy array

Source code in src/utilities.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def pert_dist(peak: float, low: float, high: float, size: int) -> np.ndarray:
    '''
    Returns a set of random picks from a PERT distribution.

    Arguments:
        peak (float): distribution peak.
        low (float): distribution lower tail.
        high (float): distribution higher tail.
        size (int): number of picks to return.

    Returns:
        numpy array
    '''
    # weight, controls probability of edge values (higher -> more emphasis on most likely, lower -> extreme values more probable)
    # 4 is standard used in unmodified PERT distributions
    gamma = 4
    # calculate expected value
    # mu = ((low + gamma) * (peak + high)) / (gamma + 2)
    if low == high and low == peak:
        return np.full(int(size), peak)
    r = high - low
    alpha = 1 + gamma * (peak - low) / r
    beta = 1 + gamma * (high - peak) / r
    return low + np.random.default_rng().beta(alpha, beta, size=int(size)) * r

plot_dist(dist)

Plot the given distribution

Parameters:

Name Type Description Default
dist ndarray

probability distribution.

required
Source code in src/utilities.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
def plot_dist(dist: np.ndarray):
    """
    Plot the given distribution

    Arguments:
        dist (ndarray): probability distribution.
    """
    # plot distribution
    y_vals = dist
    step = 1 / y_vals.size
    x_vals = np.arange(0, 1, step)
    plt.plot(x_vals, y_vals)
    # verify that get_pick works
    picks = np.array([get_pick(dist) for i in range(5000)])
    y_vals = get_dist_from_picks(picks)
    step = 1 / y_vals.size
    x_vals = np.arange(0, 1, step)
    plt.plot(x_vals, y_vals)
    plt.show()

sanitize_string(s)

Makes a string valid for file and directory names.

Parameters:

Name Type Description Default
s str

string to sanitize.

required
Source code in src/utilities.py
262
263
264
265
266
267
268
269
270
271
272
273
def sanitize_string(s: str):
    """
    Makes a string valid for file and directory names.

    Arguments:
        s (str): string to sanitize.
    """
    place_holder = '_'
    for invalid in ['*', '"', '/', '\\', '<', '>', ':', '|', '?']:
        s = s.replace(invalid, place_holder)
    s = s.strip()   # remove leading and trailing whitespace
    return s