Skip to content

Documentation for Utils Module

check_field_filled(factsheet_dict, factsheet_path, value, empty='')

Check if the field in the factsheet file is filled or not.

Parameters:

Name Type Description Default
factsheet_dict dict

The factshett dict.

required
factsheet_path list

The factsheet field to check.

required
value float

The value to add in the field.

required
empty string

If the value could not be appended, the empty string is returned.

''

Returns:

Name Type Description
float

The value added in the factsheet or empty if the value could not be appened

Source code in nebula/addons/trustworthiness/utils.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
def check_field_filled(factsheet_dict, factsheet_path, value, empty=""):
    """
    Check if the field in the factsheet file is filled or not.

    Args:
        factsheet_dict (dict): The factshett dict.
        factsheet_path (list): The factsheet field to check.
        value (float): The value to add in the field.
        empty (string): If the value could not be appended, the empty string is returned.

    Returns:
        float: The value added in the factsheet or empty if the value could not be appened

    """
    if factsheet_dict[factsheet_path[0]][factsheet_path[1]]:
        return factsheet_dict[factsheet_path[0]][factsheet_path[1]]
    elif value != "" and value != "nan":
        if type(value) != str and type(value) != list:
            if math.isnan(value):
                return 0
            else:
                return value
        else:
            return value
    else:
        return empty

count_class_samples(scenario_name, dataloaders_files, class_counter=None)

Counts the number of samples by class.

Parameters:

Name Type Description Default
scenario_name string

Name of the scenario.

required
dataloaders_files list

Files that contain the dataloaders.

required
Source code in nebula/addons/trustworthiness/utils.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def count_class_samples(scenario_name, dataloaders_files, class_counter: Counter = None):
    """
    Counts the number of samples by class.

    Args:
        scenario_name (string): Name of the scenario.
        dataloaders_files (list): Files that contain the dataloaders.

    """

    result = {}
    dataloaders = []

    if class_counter:
        result = {hashids.encode(int(class_id)): count for class_id, count in class_counter.items()}
    else:
        for file in dataloaders_files:
            with open(file, "rb") as f:
                dataloader = pickle.load(f)
                dataloaders.append(dataloader)

        for dataloader in dataloaders:
            for batch, labels in dataloader:
                for b, label in zip(batch, labels):
                    l = hashids.encode(label.item())
                    if l in result:
                        result[l] += 1
                    else:
                        result[l] = 1

    try:
        name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "count_class.json")
    except:
        name_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "count_class.json")

    with open(name_file, "w") as f:
        json.dump(result, f)

get_entropy(client_id, scenario_name, dataloader)

Get the entropy of each client in the scenario.

Parameters:

Name Type Description Default
client_id int

The client id.

required
scenario_name string

Name of the scenario.

required
dataloaders_files list

Files that contain the dataloaders.

required
Source code in nebula/addons/trustworthiness/utils.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def get_entropy(client_id, scenario_name, dataloader):
    """
    Get the entropy of each client in the scenario.

    Args:
        client_id (int): The client id.
        scenario_name (string): Name of the scenario.
        dataloaders_files (list): Files that contain the dataloaders.

    """
    result = {}
    client_entropy = {}

    name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "entropy.json")

    if os.path.exists(name_file):
        logging.info(f"entropy fiel already exists.. loading.")
        with open(name_file, "r") as f:
            client_entropy = json.load(f)

    client_id_hash = hashids.encode(client_id)

    for batch, labels in dataloader:
        for b, label in zip(batch, labels):
            l = hashids.encode(label.item())
            if l in result:
                result[l] += 1
            else:
                result[l] = 1

    n = len(dataloader)
    entropy_value = entropy([x / n for x in result.values()], base=2)
    client_entropy[client_id_hash] = entropy_value
    with open(name_file, "w") as f:
        json.dump(client_entropy, f)

get_input_value(input_docs, inputs, operation)

Gets the input value from input document and apply the metric operation on the value.

Parameters:

Name Type Description Default
inputs_docs map

The input document map.

required
inputs list

All the inputs.

required
operation string

The metric operation.

required

Returns:

Name Type Description
float

The metric value

Source code in nebula/addons/trustworthiness/utils.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def get_input_value(input_docs, inputs, operation):
    """
    Gets the input value from input document and apply the metric operation on the value.

    Args:
        inputs_docs (map): The input document map.
        inputs (list): All the inputs.
        operation (string): The metric operation.

    Returns:
        float: The metric value

    """

    input_value = None
    args = []
    for i in inputs:
        source = i.get("source", "")
        field = i.get("field_path", "")
        input_doc = input_docs.get(source, None)
        if input_doc is None:
            logger.warning(f"{source} is null")
        else:
            input = get_value_from_path(input_doc, field)
            args.append(input)
    try:
        operationFn = getattr(calculation, operation)
        input_value = operationFn(*args)
    except TypeError:
        logger.warning(f"{operation} is not valid")

    return input_value

get_value_from_path(input_doc, path)

Gets the input value from input document by path.

Parameters:

Name Type Description Default
inputs_doc map

The input document map.

required
path string

The field name of the input value of interest.

required

Returns:

Name Type Description
float

The input value from the input document

Source code in nebula/addons/trustworthiness/utils.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
def get_value_from_path(input_doc, path):
    """
    Gets the input value from input document by path.

    Args:
        inputs_doc (map): The input document map.
        path (string): The field name of the input value of interest.

    Returns:
        float: The input value from the input document

    """

    d = input_doc
    for nested_key in path.split("/"):
        temp = d.get(nested_key)
        if isinstance(temp, dict):
            d = d.get(nested_key)
        else:
            return temp
    return None

read_csv(filename)

Read a CSV file.

Parameters:

Name Type Description Default
filename string

Name of the file.

required

Returns:

Name Type Description
object

The CSV readed.

Source code in nebula/addons/trustworthiness/utils.py
155
156
157
158
159
160
161
162
163
164
165
166
167
def read_csv(filename):
    """
    Read a CSV file.

    Args:
        filename (string): Name of the file.

    Returns:
        object: The CSV readed.

    """
    if exists(filename):
        return pd.read_csv(filename)

write_results_json(out_file, dict)

Writes the result to JSON.

Parameters:

Name Type Description Default
out_file string

The output file.

required
dict dict

The object to be witten into JSON.

required

Returns:

Name Type Description
float

The input value from the input document

Source code in nebula/addons/trustworthiness/utils.py
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
def write_results_json(out_file, dict):
    """
    Writes the result to JSON.

    Args:
        out_file (string): The output file.
        dict (dict): The object to be witten into JSON.

    Returns:
        float: The input value from the input document

    """

    with open(out_file, "a") as f:
        json.dump(dict, f, indent=4)