Source code for mindmeld.active_learning.classifiers

# -*- coding: utf-8 -*-
#
# Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This module contains classifiers for the Active Learning Pipeline.
"""

import os
import logging
from abc import ABC, abstractmethod
from collections import defaultdict
from typing import List, Dict
from sklearn.model_selection import StratifiedKFold
import numpy as np

from .data_loading import LabelMap, DataBucket
from .heuristics import (
    Heuristic,
    KLDivergenceSampling,
    DisagreementSampling,
    EnsembleSampling,
)

from ..components.classifier import Classifier
from ..components.nlp import NaturalLanguageProcessor
from ..constants import (
    TuneLevel,
    TuningType,
    ACTIVE_LEARNING_RANDOM_SEED,
    AL_DEFAULT_AGGREGATE_STATISTIC,
    AL_DEFAULT_CLASS_LEVEL_STATISTIC,
    AL_SUPPORTED_AGGREGATE_STATISTICS,
    AL_SUPPORTED_CLASS_LEVEL_STATISTICS,
)
from ..resource_loader import ProcessedQueryList

logger = logging.getLogger(__name__)

MULTI_MODEL_HEURISTICS = (KLDivergenceSampling, DisagreementSampling, EnsembleSampling)


[docs]class ALClassifier(ABC): """ Abstract class for Active Learning Classifiers.""" def __init__(self, app_path: str, tuning_level: list): """ Args: app_path (str): Path to MindMeld application tuning_level (list): The hierarchy levels to tune ("domain", "intent" or "entity") """ self.app_path = app_path self.tuning_level = tuning_level self.intent2idx, self.idx2intent, self.domain_indices = self._get_mappings() def _get_mappings(self): """Get mappings of intents to indices and the indices that map to each domain. Returns: intent2idx (Dict): Maps intents to indices idx2intent (Dict): Maps indices to intents domain_indices (Dict): Maps domains to a tuple containing the start and ending indexes of intents with the given domain. """ idx = 0 intent2idx, idx2intent, domain_indices = {}, {}, {} for domain in sorted(os.listdir(os.path.join(self.app_path, "domains"))): start_idx = idx for intent in sorted( os.listdir(os.path.join(self.app_path, "domains", domain)) ): intent2idx[f"{domain}.{intent}"] = idx idx2intent[idx] = f"{domain}.{intent}" idx += 1 end_idx = idx - 1 domain_indices[domain] = (start_idx, end_idx) return intent2idx, idx2intent, domain_indices
[docs] @abstractmethod def train(self): raise NotImplementedError( "Subclasses must implement their classifier's fit method." )
[docs]class MindMeldALClassifier(ALClassifier): """Active Learning classifier that uses MindMeld classifiers internally. Handles the training of MindMeld components (Domain or Intent classifiers) and collecting performance statistics (eval_stats).""" def __init__( self, app_path: str, tuning_level: list, n_classifiers: int, aggregate_statistic: str = None, class_level_statistic: str = None, ): """ Args: app_path (str): Path to MindMeld application tuning_level (list): The hierarchy levels to tune ("domain", "intent" or "entity") n_classifiers (int): Number of classifiers to be used by multi-model strategies. """ super().__init__(app_path=app_path, tuning_level=tuning_level) self.nlp = NaturalLanguageProcessor(self.app_path) self.n_classifiers = n_classifiers self.aggregate_statistic = MindMeldALClassifier._validate_aggregate_statistic( aggregate_statistic ) self.class_level_statistic = ( MindMeldALClassifier._validate_class_level_statistic(class_level_statistic) ) @staticmethod def _validate_aggregate_statistic(aggregate_statistic): """Method to validate the aggregate statistic. If an aggregate statistic is not provided the default is used. (Options: "accuracy", "f1_weighted", "f1_macro", "f1_micro".) Args: aggregate_statistic (str): Aggregate statistic to record. Returns: aggregate_statistic (str): Aggregate statistic to record. Raises: ValueError: If an invalid value is provided. """ if not aggregate_statistic: logger.info( "Aggregate statistic not defined, using default: %r.", AL_DEFAULT_AGGREGATE_STATISTIC, ) return AL_DEFAULT_AGGREGATE_STATISTIC if aggregate_statistic not in AL_SUPPORTED_AGGREGATE_STATISTICS: raise ValueError( "Not a valid aggregate statistic: {!r}.".format(aggregate_statistic) ) return aggregate_statistic @staticmethod def _validate_class_level_statistic(class_level_statistic): """Method to validate the class-level statistic. If an class-level statistic is not provided the default is used. (Options: "f_beta", "percision", "recall") Args: class_level_statistic (str): Class_level statistic to record. Returns: class_level_statistic (str): Class_level statistic to record. Raises: ValueError: If an invalid value is provided. """ if not class_level_statistic: logger.info( "Class-level statistic not defined, using default: %r.", AL_DEFAULT_CLASS_LEVEL_STATISTIC, ) return AL_DEFAULT_CLASS_LEVEL_STATISTIC if class_level_statistic not in AL_SUPPORTED_CLASS_LEVEL_STATISTICS: raise ValueError( "Not a valid class-level statistic: {!r}.".format(class_level_statistic) ) return class_level_statistic @staticmethod def _get_tagger_probs( classifier: Classifier, queries: ProcessedQueryList, entity_tag_to_id: Dict, ): """Get the probability distribution for a query across entities. For each token within a query, this function will obtain the probability distribution for entity tags as predicted by the entity recognition model. output dimension will be: [# queries] * [# tokens] * [# tags] Args: classifier (MindMeld Classifer): Domain or Intent Classifier queries (ProcessedQueryList): List of MindMeld queries entity_tag_to_id (Dict): Dictionary mapping domain or intent names to vector index positions. Returns: prob_vector (List[List[List]]]): Probability distribution vectors for given queries. """ queries_prob_vectors = [] if not queries: return queries_prob_vectors classifier_eval = classifier.evaluate(queries=queries, fetch_distribution=True) domain = classifier.domain intent = classifier.intent # default is set to 1. If there are no entities, this token/query will not get preference # setting it to 0 would cause active learning to select these tokens/queries first. default_prob = 1.0 default_tag = "O|" default_key = f"{domain}.{intent}.{default_tag}" default_idx = entity_tag_to_id[default_key] if not classifier_eval: # if no classifier is fit, then the evaluation object cannot be created. # This case is the default. for _ in range(len(queries)): query_prob_vector_2d = np.zeros((1, len(entity_tag_to_id))) query_prob_vector_2d[0][default_idx] = default_prob queries_prob_vectors.append(query_prob_vector_2d) return queries_prob_vectors # Else, if there is classifier eval object for query in classifier_eval.results: if not (query.predicted and query.probas): query_prob_vector_2d = np.zeros((1, len(entity_tag_to_id))) query_prob_vector_2d[0][default_idx] = default_prob else: # Create and populate a 2D vector (# tokens * # tags) query_prob_vector_2d = np.zeros( (len(query.probas), len(entity_tag_to_id)) ) for token_idx, tags_probas_pair in enumerate(query.probas): tags, probas = tags_probas_pair for i, tag in enumerate(tags): key = f"{domain}.{intent}.{tag}" tag_index = entity_tag_to_id.get(key, default_idx) # To-do: check default idx to default value map, whether needed. query_prob_vector_2d[token_idx][tag_index] = probas[i] queries_prob_vectors.append(query_prob_vector_2d) return queries_prob_vectors @staticmethod def _get_classifier_probs( classifier: Classifier, queries: ProcessedQueryList, nlp_component_to_id: Dict, ): """Get the probability distribution for a query across domains or intents Args: classifier (MindMeld Classifer): Domain or Intent Classifier queries (ProcessedQueryList): List of MindMeld queries nlp_component_to_id (Dict): Dictionary mapping domain or intent names to vector index positions. Returns: prob_vector (List[List]]): Probability distribution vectors for given queries. """ queries_prob_vectors = [] if queries: classifier_eval = classifier.evaluate(queries=queries) for x in classifier_eval.results: query_prob_vector = np.zeros(len(nlp_component_to_id)) for nlp_component, index in x.probas.items(): query_prob_vector[nlp_component_to_id[nlp_component]] = index queries_prob_vectors.append(query_prob_vector) assert len(queries_prob_vectors) == len(queries) return queries_prob_vectors @staticmethod def _get_probs( classifier: Classifier, queries: ProcessedQueryList, nlp_component_to_id: Dict, nlp_component_type=None, ): """Get the probability distribution for a query across domains, intents or entities. Args: classifier (MindMeld Classifer): Domain or Intent Classifier queries (ProcessedQueryList): List of MindMeld queries nlp_component_to_id (Dict): Dictionary mapping domain or intent names to vector index positions. nlp_component_type (str): Domain/Intent/Entity Returns: prob_vector (List[List]]): Probability distribution vectors for given queries. """ # If type is entity, get recognizer probabilities if nlp_component_type == TuneLevel.ENTITY.value: return MindMeldALClassifier._get_tagger_probs( classifier=classifier, queries=queries, entity_tag_to_id=nlp_component_to_id, ) # Else obtain classifier probabilities return MindMeldALClassifier._get_classifier_probs( classifier=classifier, queries=queries, nlp_component_to_id=nlp_component_to_id, ) def _pad_intent_probs( self, ic_queries_prob_vectors: List[List[float]], intents: List ): """Pads the intent probability array with zeroes for out-of-domain intents. Args: ic_queries_prob_vectors (List[List[float]]]): 2D Array containing the probability distribution for a single query across intents in the query's domain. intents (List): List intents in the order that corresponds with the intent probabities for the queries. Intents are in the form "domain.intent". Returns: padded_ic_queries_prob_vectors (List[List[float]]]): 2D Array containing the probability distribution for a single query across all intents (including out-of-domain intents). """ padded_ic_queries_prob_vectors = [] for unordered_ic_query_prob_vector in ic_queries_prob_vectors: ordered_ic_query_prob_vector = np.zeros(len(self.intent2idx)) for i, intent in enumerate(intents): ordered_ic_query_prob_vector[ self.intent2idx[intent] ] = unordered_ic_query_prob_vector[i] padded_ic_queries_prob_vectors.append(ordered_ic_query_prob_vector) return padded_ic_queries_prob_vectors
[docs] def train( self, data_bucket: DataBucket, heuristic: Heuristic, tuning_type: TuningType = TuningType.CLASSIFIER, ): """Main training function. Args: data_bucket (DataBucket): DataBucket for current iteration heuristic (Heuristic): Current Heuristic. tuning_type (TuningType): Component to be tuned ("classifier" or "tagger") Returns: eval_stats (defaultdict): Evaluation metrics to be included in accuracies.json confidences_2d (List[List]): 2D array with probability vectors for unsampled queries (returns a 3d output for tagger tuning). confidences_3d (List[List[List]]]): 3D array with probability vectors for unsampled queries from multiple classifiers domain_indices (Dict): Maps domains to a tuple containing the start and ending indexes of intents with the given domain. """ self.tuning_type = tuning_type eval_stats = defaultdict(dict) eval_stats["num_sampled"] = len(data_bucket.sampled_queries) confidences_2d, eval_stats = self.train_single(data_bucket, eval_stats) return_confidences_3d = isinstance(heuristic, MULTI_MODEL_HEURISTICS) confidences_3d = ( self.train_multi(data_bucket) if return_confidences_3d else None ) domain_indices = ( self.domain_indices if isinstance(heuristic, KLDivergenceSampling) else None ) return ( eval_stats, confidences_2d, confidences_3d, domain_indices, )
[docs] def train_single( self, data_bucket: DataBucket, eval_stats: defaultdict = None, ): """Trains a single model to get a 2D probability array for single-model selection strategies. Args: data_bucket (DataBucket): Databucket for current iteration eval_stats (defaultdict): Evaluation metrics to be included in accuracies.json Returns: confidences_2d (List): 2D array with probability vectors for unsampled queries (returns a 3d output for tagger tuning). """ return self._train_single( sampled_queries=data_bucket.sampled_queries, unsampled_queries=data_bucket.unsampled_queries, test_queries=data_bucket.test_queries, label_map=data_bucket.label_map, eval_stats=eval_stats, )
def _train_single( self, sampled_queries: ProcessedQueryList, unsampled_queries: ProcessedQueryList, test_queries: ProcessedQueryList, label_map: LabelMap, eval_stats: Dict = None, ): """Helper function to train a single model and obtain a 2D probability array. Args: sampled_queries (ProcessedQueryList): Current set of sampled queries in DataBucket. unsampled_queries (ProcessedQueryList): Current set of unsampled queries in DataBucket. test_queries (ProcessedQueryList): Set of test queries in DataBucket. label_map LabelMap: Class that stores index mappings for a MindMeld app. (Eg. domain2id, domain_to_intent2id) eval_stats (Dict): Evaluation metrics to be included in accuracies.json Returns: confidences_2d (List): 2D array with probability vectors for unsampled queries (returns a 3d output for tagger tuning). """ if self.tuning_type == TuningType.CLASSIFIER: # Domain Level dc_queries_prob_vectors, dc_eval_test = self.domain_classifier_fit_eval( sampled_queries=sampled_queries, unsampled_queries=unsampled_queries, test_queries=test_queries, domain2id=label_map.domain2id, ) if eval_stats: self._update_eval_stats_domain_level(eval_stats, dc_eval_test) confidences_2d = dc_queries_prob_vectors # Intent Level if TuneLevel.INTENT.value in self.tuning_level: ( ic_queries_prob_vectors, ic_eval_test_dict, ) = self.intent_classifiers_fit_eval( sampled_queries=sampled_queries, unsampled_queries=unsampled_queries, test_queries=test_queries, domain_list=list(label_map.domain2id), domain_to_intent2id=label_map.domain_to_intent2id, ) if eval_stats: self._update_eval_stats_intent_level(eval_stats, ic_eval_test_dict) confidences_2d = ic_queries_prob_vectors else: # Entity Level if TuneLevel.ENTITY.value in self.tuning_level: ( er_queries_prob_vectors, er_eval_test_dict, ) = self.entity_recognizers_fit_eval( sampled_queries=sampled_queries, unsampled_queries=unsampled_queries, test_queries=test_queries, domain_to_intents=label_map.domain_to_intents, entity2id=label_map.entity2id, ) if eval_stats: self._update_eval_stats_entity_level(eval_stats, er_eval_test_dict) confidences_2d = er_queries_prob_vectors return confidences_2d, eval_stats
[docs] def train_multi(self, data_bucket: DataBucket): """Trains multiple models to get a 3D probability array for multi-model selection strategies. Args: data_bucket (DataBucket): Databucket for current iteration Returns: confidences_3d (List[List[List]]]): 3D array with probability vectors for unsampled queries from multiple classifiers """ return self._train_multi( sampled_queries=data_bucket.sampled_queries, unsampled_queries=data_bucket.unsampled_queries, test_queries=data_bucket.test_queries, label_map=data_bucket.label_map, )
def _train_multi( self, sampled_queries: ProcessedQueryList, unsampled_queries: ProcessedQueryList, test_queries: ProcessedQueryList, label_map: LabelMap, ): """Helper function to train multiple models and obtain a 3D probability array. Args: sampled_queries (ProcessedQueryList): Current set of sampled queries in DataBucket. unsampled_queries (ProcessedQueryList): Current set of unsampled queries in DataBucket. test_queries (ProcessedQueryList): Set of test queries in DataBucket. label_map LabelMap: Class that stores index mappings for a MindMeld app. (Eg. domain2Id, domain_to_intent2id) Returns: confidences_3d (List[List[List]]]): 3D array with probability vectors for unsampled queries from multiple classifiers """ sampled_queries_ids = sampled_queries.elements skf = StratifiedKFold( n_splits=self.n_classifiers, shuffle=True, random_state=ACTIVE_LEARNING_RANDOM_SEED, ) y = [ f"{domain}.{intent}" for domain, intent in zip( sampled_queries.domains(), sampled_queries.intents() ) ] fold_sampled_queries_ids = [ [sampled_queries_ids[i] for i in fold] for _, fold in skf.split(sampled_queries_ids, y) ] fold_sampled_queries_lists = [ ProcessedQueryList(sampled_queries.cache, fold) for fold in fold_sampled_queries_ids ] confidences_3d = [] for fold_sample_queries in fold_sampled_queries_lists: confidences_2d, _ = self._train_single( fold_sample_queries, unsampled_queries, test_queries, label_map, ) confidences_3d.append(confidences_2d) return confidences_3d
[docs] def domain_classifier_fit_eval( self, sampled_queries: ProcessedQueryList, unsampled_queries: ProcessedQueryList, test_queries: ProcessedQueryList, domain2id: Dict, ): """Fit and evaluate the domain classifier. Args: sampled_queries (ProcessedQueryList): List of Sampled Queries unsampled_queries (ProcessedQueryList): List of Unsampled Queries test_queries (ProcessedQueryList): List of Test Queries domain2id (Dict): Dictionary mapping domains to IDs Returns: dc_queries_prob_vectors (List[List]): List of probability distributions for unsampled queries. dc_eval_test (mindmeld.models.model.StandardModelEvaluation): Mindmeld evaluation object for the domain classifier. """ # Check for domain classifier edge case if len(domain2id) == 1: raise ValueError( "Only one domain present, use intent level tuning instead.", ) dc = self.nlp.domain_classifier dc.fit(queries=sampled_queries) dc_eval_test = dc.evaluate(queries=test_queries) dc_queries_prob_vectors = MindMeldALClassifier._get_probs( dc, unsampled_queries, domain2id ) return dc_queries_prob_vectors, dc_eval_test
def _update_eval_stats_domain_level(self, eval_stats: Dict, dc_eval_test): """Update the eval_stats dictionary with evaluation metrics from the domain classifier. Args: eval_stats (Dict): Evaluation metrics to be included in accuracies.json dc_eval_test (mindmeld.models.model.StandardModelEvaluation): Mindmeld evaluation object for the domain classifier. """ eval_stats["accuracies"]["overall"] = dc_eval_test.get_stats()["stats_overall"][ self.aggregate_statistic ] logger.info( "Overall Domain-level Accuracy: %s", eval_stats["accuracies"]["overall"] )
[docs] def intent_classifiers_fit_eval( self, sampled_queries: ProcessedQueryList, unsampled_queries: ProcessedQueryList, test_queries: ProcessedQueryList, domain_list: Dict, domain_to_intent2id: Dict, ): """Fit and evaluate the intent classifier. Args: sampled_queries (ProcessedQueryList): List of Sampled Queries. unsampled_queries (ProcessedQueryList): List of Unsampled Queries. test_queries (ProcessedQueryList): List of Test Queries. domain_list (List[str]): List of domains used by the application. domain_to_intent2id (Dict): Dictionary mapping intents to IDs. Returns: ic_queries_prob_vectors (List[List]): List of probability distributions for unsampled queries. ic_eval_test_dict (Dict): Dictionary mapping a domain (str) to the associated ic_eval_test object. """ ic_eval_test_dict = {} unsampled_idx_preds_pairs = [] for domain in domain_list: # Filter Queries _, filtered_sampled_queries = DataBucket.filter_queries_by_nlp_component( query_list=sampled_queries, component_type="domain", component_name=domain, ) ( filtered_unsampled_queries_indices, filtered_unsampled_queries, ) = DataBucket.filter_queries_by_nlp_component( query_list=unsampled_queries, component_type="domain", component_name=domain, ) _, filtered_test_queries = DataBucket.filter_queries_by_nlp_component( query_list=test_queries, component_type="domain", component_name=domain ) # Train ic = self.nlp.domains[domain].intent_classifier ic.fit(queries=filtered_sampled_queries) # Evaluate Test Queries ic_eval_test = ic.evaluate(queries=filtered_test_queries) if not ic_eval_test: # Check for intent classifier edge cases if len(domain_to_intent2id[domain]) == 1: raise ValueError( "Only one intent in domain '{!s}', use domain level tuning instead.".format( domain ) ) else: # In case of missing test files, ic_eval_test object is a NoneType. In that case # we have no predictions to evaluate the intent level classifiers. Domain # classifier can have atleast one test file across intents, hence is better # suited for such applications. raise ValueError( "Missing test files in domain '{!s}', use domain level tuning " "instead.".format(domain) ) ic_eval_test_dict[domain] = ic_eval_test # Get Probability Vectors ic_queries_prob_vectors = MindMeldALClassifier._get_probs( classifier=ic, queries=filtered_unsampled_queries, nlp_component_to_id=domain_to_intent2id[domain], ) intents = [ f"{domain}.{intent}" for intent in ic_eval_test.get_stats()["class_labels"] ] padded_ic_queries_prob_vectors = self._pad_intent_probs( ic_queries_prob_vectors, intents ) for i in range(len(filtered_unsampled_queries)): unsampled_idx_preds_pairs.append( ( filtered_unsampled_queries_indices[i], padded_ic_queries_prob_vectors[i], ) ) unsampled_idx_preds_pairs.sort(key=lambda x: x[0]) padded_ic_queries_prob_vectors = [x[1] for x in unsampled_idx_preds_pairs] return padded_ic_queries_prob_vectors, ic_eval_test_dict
def _update_eval_stats_intent_level( self, eval_stats: defaultdict, ic_eval_test_dict: Dict ): """Update the eval_stats dictionary with evaluation metrics from intent classifiers. Args: eval_stats (defaultdict): Evaluation metrics to be included in accuracies.json. ic_eval_test_dict (Dict): Dictionary mapping a domain (str) to the associated ic_eval_test object. """ for domain, ic_eval_test in ic_eval_test_dict.items(): eval_stats["accuracies"][domain] = { "overall": ic_eval_test.get_stats()["stats_overall"][ self.aggregate_statistic ] } for i, intent in enumerate(ic_eval_test.get_stats()["class_labels"]): eval_stats["accuracies"][domain][intent] = { "overall": ic_eval_test.get_stats()["class_stats"][ self.class_level_statistic ][i] }
[docs] def entity_recognizers_fit_eval( self, sampled_queries: ProcessedQueryList, unsampled_queries: ProcessedQueryList, test_queries: ProcessedQueryList, domain_to_intents: Dict, entity2id: Dict, ): """Fit and evaluate the entity recognizer. Args: sampled_queries (ProcessedQueryList): List of Sampled Queries. unsampled_queries (ProcessedQueryList): List of Unsampled Queries. test_queries (ProcessedQueryList): List of Test Queries. domain_to_intents (Dict): Dictionary mapping domain to list of intents. entity2id (Dict): Dictionary mapping entities to IDs. Returns: ic_queries_prob_vectors (List[List]): List of probability distributions for unsampled queries. ic_eval_test_dict (Dict): Dictionary mapping a domain (str) to the associated ic_eval_test object. """ er_eval_test_dict = {} unsampled_idx_preds_pairs = {} for domain, intents in domain_to_intents.items(): for intent in intents: # Filter Queries ( _, filtered_sampled_queries, ) = DataBucket.filter_queries_by_nlp_component( query_list=sampled_queries, component_type=TuneLevel.INTENT.value, component_name=intent, ) ( filtered_unsampled_queries_indices, filtered_unsampled_queries, ) = DataBucket.filter_queries_by_nlp_component( query_list=unsampled_queries, component_type=TuneLevel.INTENT.value, component_name=intent, ) _, filtered_test_queries = DataBucket.filter_queries_by_nlp_component( query_list=test_queries, component_type=TuneLevel.INTENT.value, component_name=intent, ) # Train er = self.nlp.domains[domain].intents[intent].entity_recognizer try: er.fit(queries=filtered_sampled_queries) except ValueError: # single class, cannot fit with solver logger.info( "Skipped fitting entity recognizer for domain `%s` and intent `%s`." "Cannot fit with solver.", domain, intent, ) # Evaluate Test Queries er_eval_test = er.evaluate(queries=filtered_test_queries) er_eval_test_dict[f"{domain}.{intent}"] = er_eval_test # Get Probability Vectors er_queries_prob_vectors = MindMeldALClassifier._get_probs( classifier=er, queries=filtered_unsampled_queries, nlp_component_to_id=entity2id, nlp_component_type=TuneLevel.ENTITY.value, ) for i, index in enumerate(filtered_unsampled_queries_indices): unsampled_idx_preds_pairs[index] = er_queries_prob_vectors[i] indices = list(unsampled_idx_preds_pairs.keys()) indices.sort() er_queries_prob_vectors = [ unsampled_idx_preds_pairs[index] for index in indices ] return er_queries_prob_vectors, er_eval_test_dict
def _update_eval_stats_entity_level( self, eval_stats: defaultdict, er_eval_test_dict: Dict, verbose: bool = False, ): """Update the eval_stats dictionary with evaluation metrics from entity recognizers. Args: eval_stats (defaultdict): Evaluation metrics to be included in accuracies.json. er_eval_test_dict (Dict): Dictionary mapping a domain.intent (str) to the associated er_eval_test object. """ for domain_intent, er_eval_test in er_eval_test_dict.items(): domain, intent = domain_intent.split(".") if er_eval_test: if domain not in eval_stats["accuracies"]: eval_stats["accuracies"].update({domain: {}}) if intent not in eval_stats["accuracies"][domain]: eval_stats["accuracies"][domain].update({intent: {}}) eval_stats["accuracies"][domain][intent]["entities"] = { "overall": er_eval_test.get_stats()["stats_overall"][ self.aggregate_statistic ] } if verbose: # To generate plots at a sub-entity level (B, I, O, E, S tags) for e, entity in enumerate( er_eval_test.get_stats()["class_labels"] ): eval_stats["accuracies"][domain][intent]["entities"][ entity ] = er_eval_test.get_stats()["class_stats"][ self.class_level_statistic ][ e ]