Source code for mindmeld.active_learning.classifiers
# -*- coding: utf-8 -*-## Copyright (c) 2015 Cisco Systems, Inc. and others. All rights reserved.# Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at# http://www.apache.org/licenses/LICENSE-2.0# Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License."""This module contains classifiers for the Active Learning Pipeline."""importosimportloggingfromabcimportABC,abstractmethodfromcollectionsimportdefaultdictfromtypingimportList,Dictfromsklearn.model_selectionimportStratifiedKFoldimportnumpyasnpfrom.data_loadingimportLabelMap,DataBucketfrom.heuristicsimport(Heuristic,KLDivergenceSampling,DisagreementSampling,EnsembleSampling,)from..components.classifierimportClassifierfrom..components.nlpimportNaturalLanguageProcessorfrom..constantsimport(TuneLevel,TuningType,ACTIVE_LEARNING_RANDOM_SEED,AL_DEFAULT_AGGREGATE_STATISTIC,AL_DEFAULT_CLASS_LEVEL_STATISTIC,AL_SUPPORTED_AGGREGATE_STATISTICS,AL_SUPPORTED_CLASS_LEVEL_STATISTICS,)from..resource_loaderimportProcessedQueryListlogger=logging.getLogger(__name__)MULTI_MODEL_HEURISTICS=(KLDivergenceSampling,DisagreementSampling,EnsembleSampling)
[docs]classALClassifier(ABC):""" Abstract class for Active Learning Classifiers."""def__init__(self,app_path:str,tuning_level:list):""" Args: app_path (str): Path to MindMeld application tuning_level (list): The hierarchy levels to tune ("domain", "intent" or "entity") """self.app_path=app_pathself.tuning_level=tuning_levelself.intent2idx,self.idx2intent,self.domain_indices=self._get_mappings()def_get_mappings(self):"""Get mappings of intents to indices and the indices that map to each domain. Returns: intent2idx (Dict): Maps intents to indices idx2intent (Dict): Maps indices to intents domain_indices (Dict): Maps domains to a tuple containing the start and ending indexes of intents with the given domain. """idx=0intent2idx,idx2intent,domain_indices={},{},{}fordomaininsorted(os.listdir(os.path.join(self.app_path,"domains"))):start_idx=idxforintentinsorted(os.listdir(os.path.join(self.app_path,"domains",domain))):intent2idx[f"{domain}.{intent}"]=idxidx2intent[idx]=f"{domain}.{intent}"idx+=1end_idx=idx-1domain_indices[domain]=(start_idx,end_idx)returnintent2idx,idx2intent,domain_indices
[docs]@abstractmethoddeftrain(self):raiseNotImplementedError("Subclasses must implement their classifier's fit method.")
[docs]classMindMeldALClassifier(ALClassifier):"""Active Learning classifier that uses MindMeld classifiers internally. Handles the training of MindMeld components (Domain or Intent classifiers) and collecting performance statistics (eval_stats)."""def__init__(self,app_path:str,tuning_level:list,n_classifiers:int,aggregate_statistic:str=None,class_level_statistic:str=None,):""" Args: app_path (str): Path to MindMeld application tuning_level (list): The hierarchy levels to tune ("domain", "intent" or "entity") n_classifiers (int): Number of classifiers to be used by multi-model strategies. """super().__init__(app_path=app_path,tuning_level=tuning_level)self.nlp=NaturalLanguageProcessor(self.app_path)self.n_classifiers=n_classifiersself.aggregate_statistic=MindMeldALClassifier._validate_aggregate_statistic(aggregate_statistic)self.class_level_statistic=(MindMeldALClassifier._validate_class_level_statistic(class_level_statistic))@staticmethoddef_validate_aggregate_statistic(aggregate_statistic):"""Method to validate the aggregate statistic. If an aggregate statistic is not provided the default is used. (Options: "accuracy", "f1_weighted", "f1_macro", "f1_micro".) Args: aggregate_statistic (str): Aggregate statistic to record. Returns: aggregate_statistic (str): Aggregate statistic to record. Raises: ValueError: If an invalid value is provided. """ifnotaggregate_statistic:logger.info("Aggregate statistic not defined, using default: %r.",AL_DEFAULT_AGGREGATE_STATISTIC,)returnAL_DEFAULT_AGGREGATE_STATISTICifaggregate_statisticnotinAL_SUPPORTED_AGGREGATE_STATISTICS:raiseValueError("Not a valid aggregate statistic: {!r}.".format(aggregate_statistic))returnaggregate_statistic@staticmethoddef_validate_class_level_statistic(class_level_statistic):"""Method to validate the class-level statistic. If an class-level statistic is not provided the default is used. (Options: "f_beta", "percision", "recall") Args: class_level_statistic (str): Class_level statistic to record. Returns: class_level_statistic (str): Class_level statistic to record. Raises: ValueError: If an invalid value is provided. """ifnotclass_level_statistic:logger.info("Class-level statistic not defined, using default: %r.",AL_DEFAULT_CLASS_LEVEL_STATISTIC,)returnAL_DEFAULT_CLASS_LEVEL_STATISTICifclass_level_statisticnotinAL_SUPPORTED_CLASS_LEVEL_STATISTICS:raiseValueError("Not a valid class-level statistic: {!r}.".format(class_level_statistic))returnclass_level_statistic@staticmethoddef_get_tagger_probs(classifier:Classifier,queries:ProcessedQueryList,entity_tag_to_id:Dict,):"""Get the probability distribution for a query across entities. For each token within a query, this function will obtain the probability distribution for entity tags as predicted by the entity recognition model. output dimension will be: [# queries] * [# tokens] * [# tags] Args: classifier (MindMeld Classifer): Domain or Intent Classifier queries (ProcessedQueryList): List of MindMeld queries entity_tag_to_id (Dict): Dictionary mapping domain or intent names to vector index positions. Returns: prob_vector (List[List[List]]]): Probability distribution vectors for given queries. """queries_prob_vectors=[]ifnotqueries:returnqueries_prob_vectorsclassifier_eval=classifier.evaluate(queries=queries,fetch_distribution=True)domain=classifier.domainintent=classifier.intent# default is set to 1. If there are no entities, this token/query will not get preference# setting it to 0 would cause active learning to select these tokens/queries first.default_prob=1.0default_tag="O|"default_key=f"{domain}.{intent}.{default_tag}"default_idx=entity_tag_to_id[default_key]ifnotclassifier_eval:# if no classifier is fit, then the evaluation object cannot be created.# This case is the default.for_inrange(len(queries)):query_prob_vector_2d=np.zeros((1,len(entity_tag_to_id)))query_prob_vector_2d[0][default_idx]=default_probqueries_prob_vectors.append(query_prob_vector_2d)returnqueries_prob_vectors# Else, if there is classifier eval objectforqueryinclassifier_eval.results:ifnot(query.predictedandquery.probas):query_prob_vector_2d=np.zeros((1,len(entity_tag_to_id)))query_prob_vector_2d[0][default_idx]=default_probelse:# Create and populate a 2D vector (# tokens * # tags)query_prob_vector_2d=np.zeros((len(query.probas),len(entity_tag_to_id)))fortoken_idx,tags_probas_pairinenumerate(query.probas):tags,probas=tags_probas_pairfori,taginenumerate(tags):key=f"{domain}.{intent}.{tag}"tag_index=entity_tag_to_id.get(key,default_idx)# To-do: check default idx to default value map, whether needed.query_prob_vector_2d[token_idx][tag_index]=probas[i]queries_prob_vectors.append(query_prob_vector_2d)returnqueries_prob_vectors@staticmethoddef_get_classifier_probs(classifier:Classifier,queries:ProcessedQueryList,nlp_component_to_id:Dict,):"""Get the probability distribution for a query across domains or intents Args: classifier (MindMeld Classifer): Domain or Intent Classifier queries (ProcessedQueryList): List of MindMeld queries nlp_component_to_id (Dict): Dictionary mapping domain or intent names to vector index positions. Returns: prob_vector (List[List]]): Probability distribution vectors for given queries. """queries_prob_vectors=[]ifqueries:classifier_eval=classifier.evaluate(queries=queries)forxinclassifier_eval.results:query_prob_vector=np.zeros(len(nlp_component_to_id))fornlp_component,indexinx.probas.items():query_prob_vector[nlp_component_to_id[nlp_component]]=indexqueries_prob_vectors.append(query_prob_vector)assertlen(queries_prob_vectors)==len(queries)returnqueries_prob_vectors@staticmethoddef_get_probs(classifier:Classifier,queries:ProcessedQueryList,nlp_component_to_id:Dict,nlp_component_type=None,):"""Get the probability distribution for a query across domains, intents or entities. Args: classifier (MindMeld Classifer): Domain or Intent Classifier queries (ProcessedQueryList): List of MindMeld queries nlp_component_to_id (Dict): Dictionary mapping domain or intent names to vector index positions. nlp_component_type (str): Domain/Intent/Entity Returns: prob_vector (List[List]]): Probability distribution vectors for given queries. """# If type is entity, get recognizer probabilitiesifnlp_component_type==TuneLevel.ENTITY.value:returnMindMeldALClassifier._get_tagger_probs(classifier=classifier,queries=queries,entity_tag_to_id=nlp_component_to_id,)# Else obtain classifier probabilitiesreturnMindMeldALClassifier._get_classifier_probs(classifier=classifier,queries=queries,nlp_component_to_id=nlp_component_to_id,)def_pad_intent_probs(self,ic_queries_prob_vectors:List[List[float]],intents:List):"""Pads the intent probability array with zeroes for out-of-domain intents. Args: ic_queries_prob_vectors (List[List[float]]]): 2D Array containing the probability distribution for a single query across intents in the query's domain. intents (List): List intents in the order that corresponds with the intent probabities for the queries. Intents are in the form "domain.intent". Returns: padded_ic_queries_prob_vectors (List[List[float]]]): 2D Array containing the probability distribution for a single query across all intents (including out-of-domain intents). """padded_ic_queries_prob_vectors=[]forunordered_ic_query_prob_vectorinic_queries_prob_vectors:ordered_ic_query_prob_vector=np.zeros(len(self.intent2idx))fori,intentinenumerate(intents):ordered_ic_query_prob_vector[self.intent2idx[intent]]=unordered_ic_query_prob_vector[i]padded_ic_queries_prob_vectors.append(ordered_ic_query_prob_vector)returnpadded_ic_queries_prob_vectors
[docs]deftrain(self,data_bucket:DataBucket,heuristic:Heuristic,tuning_type:TuningType=TuningType.CLASSIFIER,):"""Main training function. Args: data_bucket (DataBucket): DataBucket for current iteration heuristic (Heuristic): Current Heuristic. tuning_type (TuningType): Component to be tuned ("classifier" or "tagger") Returns: eval_stats (defaultdict): Evaluation metrics to be included in accuracies.json confidences_2d (List[List]): 2D array with probability vectors for unsampled queries (returns a 3d output for tagger tuning). confidences_3d (List[List[List]]]): 3D array with probability vectors for unsampled queries from multiple classifiers domain_indices (Dict): Maps domains to a tuple containing the start and ending indexes of intents with the given domain. """self.tuning_type=tuning_typeeval_stats=defaultdict(dict)eval_stats["num_sampled"]=len(data_bucket.sampled_queries)confidences_2d,eval_stats=self.train_single(data_bucket,eval_stats)return_confidences_3d=isinstance(heuristic,MULTI_MODEL_HEURISTICS)confidences_3d=(self.train_multi(data_bucket)ifreturn_confidences_3delseNone)domain_indices=(self.domain_indicesifisinstance(heuristic,KLDivergenceSampling)elseNone)return(eval_stats,confidences_2d,confidences_3d,domain_indices,)
[docs]deftrain_single(self,data_bucket:DataBucket,eval_stats:defaultdict=None,):"""Trains a single model to get a 2D probability array for single-model selection strategies. Args: data_bucket (DataBucket): Databucket for current iteration eval_stats (defaultdict): Evaluation metrics to be included in accuracies.json Returns: confidences_2d (List): 2D array with probability vectors for unsampled queries (returns a 3d output for tagger tuning). """returnself._train_single(sampled_queries=data_bucket.sampled_queries,unsampled_queries=data_bucket.unsampled_queries,test_queries=data_bucket.test_queries,label_map=data_bucket.label_map,eval_stats=eval_stats,)
def_train_single(self,sampled_queries:ProcessedQueryList,unsampled_queries:ProcessedQueryList,test_queries:ProcessedQueryList,label_map:LabelMap,eval_stats:Dict=None,):"""Helper function to train a single model and obtain a 2D probability array. Args: sampled_queries (ProcessedQueryList): Current set of sampled queries in DataBucket. unsampled_queries (ProcessedQueryList): Current set of unsampled queries in DataBucket. test_queries (ProcessedQueryList): Set of test queries in DataBucket. label_map LabelMap: Class that stores index mappings for a MindMeld app. (Eg. domain2id, domain_to_intent2id) eval_stats (Dict): Evaluation metrics to be included in accuracies.json Returns: confidences_2d (List): 2D array with probability vectors for unsampled queries (returns a 3d output for tagger tuning). """ifself.tuning_type==TuningType.CLASSIFIER:# Domain Leveldc_queries_prob_vectors,dc_eval_test=self.domain_classifier_fit_eval(sampled_queries=sampled_queries,unsampled_queries=unsampled_queries,test_queries=test_queries,domain2id=label_map.domain2id,)ifeval_stats:self._update_eval_stats_domain_level(eval_stats,dc_eval_test)confidences_2d=dc_queries_prob_vectors# Intent LevelifTuneLevel.INTENT.valueinself.tuning_level:(ic_queries_prob_vectors,ic_eval_test_dict,)=self.intent_classifiers_fit_eval(sampled_queries=sampled_queries,unsampled_queries=unsampled_queries,test_queries=test_queries,domain_list=list(label_map.domain2id),domain_to_intent2id=label_map.domain_to_intent2id,)ifeval_stats:self._update_eval_stats_intent_level(eval_stats,ic_eval_test_dict)confidences_2d=ic_queries_prob_vectorselse:# Entity LevelifTuneLevel.ENTITY.valueinself.tuning_level:(er_queries_prob_vectors,er_eval_test_dict,)=self.entity_recognizers_fit_eval(sampled_queries=sampled_queries,unsampled_queries=unsampled_queries,test_queries=test_queries,domain_to_intents=label_map.domain_to_intents,entity2id=label_map.entity2id,)ifeval_stats:self._update_eval_stats_entity_level(eval_stats,er_eval_test_dict)confidences_2d=er_queries_prob_vectorsreturnconfidences_2d,eval_stats
[docs]deftrain_multi(self,data_bucket:DataBucket):"""Trains multiple models to get a 3D probability array for multi-model selection strategies. Args: data_bucket (DataBucket): Databucket for current iteration Returns: confidences_3d (List[List[List]]]): 3D array with probability vectors for unsampled queries from multiple classifiers """returnself._train_multi(sampled_queries=data_bucket.sampled_queries,unsampled_queries=data_bucket.unsampled_queries,test_queries=data_bucket.test_queries,label_map=data_bucket.label_map,)
def_train_multi(self,sampled_queries:ProcessedQueryList,unsampled_queries:ProcessedQueryList,test_queries:ProcessedQueryList,label_map:LabelMap,):"""Helper function to train multiple models and obtain a 3D probability array. Args: sampled_queries (ProcessedQueryList): Current set of sampled queries in DataBucket. unsampled_queries (ProcessedQueryList): Current set of unsampled queries in DataBucket. test_queries (ProcessedQueryList): Set of test queries in DataBucket. label_map LabelMap: Class that stores index mappings for a MindMeld app. (Eg. domain2Id, domain_to_intent2id) Returns: confidences_3d (List[List[List]]]): 3D array with probability vectors for unsampled queries from multiple classifiers """sampled_queries_ids=sampled_queries.elementsskf=StratifiedKFold(n_splits=self.n_classifiers,shuffle=True,random_state=ACTIVE_LEARNING_RANDOM_SEED,)y=[f"{domain}.{intent}"fordomain,intentinzip(sampled_queries.domains(),sampled_queries.intents())]fold_sampled_queries_ids=[[sampled_queries_ids[i]foriinfold]for_,foldinskf.split(sampled_queries_ids,y)]fold_sampled_queries_lists=[ProcessedQueryList(sampled_queries.cache,fold)forfoldinfold_sampled_queries_ids]confidences_3d=[]forfold_sample_queriesinfold_sampled_queries_lists:confidences_2d,_=self._train_single(fold_sample_queries,unsampled_queries,test_queries,label_map,)confidences_3d.append(confidences_2d)returnconfidences_3d
[docs]defdomain_classifier_fit_eval(self,sampled_queries:ProcessedQueryList,unsampled_queries:ProcessedQueryList,test_queries:ProcessedQueryList,domain2id:Dict,):"""Fit and evaluate the domain classifier. Args: sampled_queries (ProcessedQueryList): List of Sampled Queries unsampled_queries (ProcessedQueryList): List of Unsampled Queries test_queries (ProcessedQueryList): List of Test Queries domain2id (Dict): Dictionary mapping domains to IDs Returns: dc_queries_prob_vectors (List[List]): List of probability distributions for unsampled queries. dc_eval_test (mindmeld.models.model.StandardModelEvaluation): Mindmeld evaluation object for the domain classifier. """# Check for domain classifier edge caseiflen(domain2id)==1:raiseValueError("Only one domain present, use intent level tuning instead.",)dc=self.nlp.domain_classifierdc.fit(queries=sampled_queries)dc_eval_test=dc.evaluate(queries=test_queries)dc_queries_prob_vectors=MindMeldALClassifier._get_probs(dc,unsampled_queries,domain2id)returndc_queries_prob_vectors,dc_eval_test
def_update_eval_stats_domain_level(self,eval_stats:Dict,dc_eval_test):"""Update the eval_stats dictionary with evaluation metrics from the domain classifier. Args: eval_stats (Dict): Evaluation metrics to be included in accuracies.json dc_eval_test (mindmeld.models.model.StandardModelEvaluation): Mindmeld evaluation object for the domain classifier. """eval_stats["accuracies"]["overall"]=dc_eval_test.get_stats()["stats_overall"][self.aggregate_statistic]logger.info("Overall Domain-level Accuracy: %s",eval_stats["accuracies"]["overall"])
[docs]defintent_classifiers_fit_eval(self,sampled_queries:ProcessedQueryList,unsampled_queries:ProcessedQueryList,test_queries:ProcessedQueryList,domain_list:Dict,domain_to_intent2id:Dict,):"""Fit and evaluate the intent classifier. Args: sampled_queries (ProcessedQueryList): List of Sampled Queries. unsampled_queries (ProcessedQueryList): List of Unsampled Queries. test_queries (ProcessedQueryList): List of Test Queries. domain_list (List[str]): List of domains used by the application. domain_to_intent2id (Dict): Dictionary mapping intents to IDs. Returns: ic_queries_prob_vectors (List[List]): List of probability distributions for unsampled queries. ic_eval_test_dict (Dict): Dictionary mapping a domain (str) to the associated ic_eval_test object. """ic_eval_test_dict={}unsampled_idx_preds_pairs=[]fordomainindomain_list:# Filter Queries_,filtered_sampled_queries=DataBucket.filter_queries_by_nlp_component(query_list=sampled_queries,component_type="domain",component_name=domain,)(filtered_unsampled_queries_indices,filtered_unsampled_queries,)=DataBucket.filter_queries_by_nlp_component(query_list=unsampled_queries,component_type="domain",component_name=domain,)_,filtered_test_queries=DataBucket.filter_queries_by_nlp_component(query_list=test_queries,component_type="domain",component_name=domain)# Trainic=self.nlp.domains[domain].intent_classifieric.fit(queries=filtered_sampled_queries)# Evaluate Test Queriesic_eval_test=ic.evaluate(queries=filtered_test_queries)ifnotic_eval_test:# Check for intent classifier edge casesiflen(domain_to_intent2id[domain])==1:raiseValueError("Only one intent in domain '{!s}', use domain level tuning instead.".format(domain))else:# In case of missing test files, ic_eval_test object is a NoneType. In that case# we have no predictions to evaluate the intent level classifiers. Domain# classifier can have atleast one test file across intents, hence is better# suited for such applications.raiseValueError("Missing test files in domain '{!s}', use domain level tuning ""instead.".format(domain))ic_eval_test_dict[domain]=ic_eval_test# Get Probability Vectorsic_queries_prob_vectors=MindMeldALClassifier._get_probs(classifier=ic,queries=filtered_unsampled_queries,nlp_component_to_id=domain_to_intent2id[domain],)intents=[f"{domain}.{intent}"forintentinic_eval_test.get_stats()["class_labels"]]padded_ic_queries_prob_vectors=self._pad_intent_probs(ic_queries_prob_vectors,intents)foriinrange(len(filtered_unsampled_queries)):unsampled_idx_preds_pairs.append((filtered_unsampled_queries_indices[i],padded_ic_queries_prob_vectors[i],))unsampled_idx_preds_pairs.sort(key=lambdax:x[0])padded_ic_queries_prob_vectors=[x[1]forxinunsampled_idx_preds_pairs]returnpadded_ic_queries_prob_vectors,ic_eval_test_dict
def_update_eval_stats_intent_level(self,eval_stats:defaultdict,ic_eval_test_dict:Dict):"""Update the eval_stats dictionary with evaluation metrics from intent classifiers. Args: eval_stats (defaultdict): Evaluation metrics to be included in accuracies.json. ic_eval_test_dict (Dict): Dictionary mapping a domain (str) to the associated ic_eval_test object. """fordomain,ic_eval_testinic_eval_test_dict.items():eval_stats["accuracies"][domain]={"overall":ic_eval_test.get_stats()["stats_overall"][self.aggregate_statistic]}fori,intentinenumerate(ic_eval_test.get_stats()["class_labels"]):eval_stats["accuracies"][domain][intent]={"overall":ic_eval_test.get_stats()["class_stats"][self.class_level_statistic][i]}
[docs]defentity_recognizers_fit_eval(self,sampled_queries:ProcessedQueryList,unsampled_queries:ProcessedQueryList,test_queries:ProcessedQueryList,domain_to_intents:Dict,entity2id:Dict,):"""Fit and evaluate the entity recognizer. Args: sampled_queries (ProcessedQueryList): List of Sampled Queries. unsampled_queries (ProcessedQueryList): List of Unsampled Queries. test_queries (ProcessedQueryList): List of Test Queries. domain_to_intents (Dict): Dictionary mapping domain to list of intents. entity2id (Dict): Dictionary mapping entities to IDs. Returns: ic_queries_prob_vectors (List[List]): List of probability distributions for unsampled queries. ic_eval_test_dict (Dict): Dictionary mapping a domain (str) to the associated ic_eval_test object. """er_eval_test_dict={}unsampled_idx_preds_pairs={}fordomain,intentsindomain_to_intents.items():forintentinintents:# Filter Queries(_,filtered_sampled_queries,)=DataBucket.filter_queries_by_nlp_component(query_list=sampled_queries,component_type=TuneLevel.INTENT.value,component_name=intent,)(filtered_unsampled_queries_indices,filtered_unsampled_queries,)=DataBucket.filter_queries_by_nlp_component(query_list=unsampled_queries,component_type=TuneLevel.INTENT.value,component_name=intent,)_,filtered_test_queries=DataBucket.filter_queries_by_nlp_component(query_list=test_queries,component_type=TuneLevel.INTENT.value,component_name=intent,)# Trainer=self.nlp.domains[domain].intents[intent].entity_recognizertry:er.fit(queries=filtered_sampled_queries)exceptValueError:# single class, cannot fit with solverlogger.info("Skipped fitting entity recognizer for domain `%s` and intent `%s`.""Cannot fit with solver.",domain,intent,)# Evaluate Test Querieser_eval_test=er.evaluate(queries=filtered_test_queries)er_eval_test_dict[f"{domain}.{intent}"]=er_eval_test# Get Probability Vectorser_queries_prob_vectors=MindMeldALClassifier._get_probs(classifier=er,queries=filtered_unsampled_queries,nlp_component_to_id=entity2id,nlp_component_type=TuneLevel.ENTITY.value,)fori,indexinenumerate(filtered_unsampled_queries_indices):unsampled_idx_preds_pairs[index]=er_queries_prob_vectors[i]indices=list(unsampled_idx_preds_pairs.keys())indices.sort()er_queries_prob_vectors=[unsampled_idx_preds_pairs[index]forindexinindices]returner_queries_prob_vectors,er_eval_test_dict
def_update_eval_stats_entity_level(self,eval_stats:defaultdict,er_eval_test_dict:Dict,verbose:bool=False,):"""Update the eval_stats dictionary with evaluation metrics from entity recognizers. Args: eval_stats (defaultdict): Evaluation metrics to be included in accuracies.json. er_eval_test_dict (Dict): Dictionary mapping a domain.intent (str) to the associated er_eval_test object. """fordomain_intent,er_eval_testiner_eval_test_dict.items():domain,intent=domain_intent.split(".")ifer_eval_test:ifdomainnotineval_stats["accuracies"]:eval_stats["accuracies"].update({domain:{}})ifintentnotineval_stats["accuracies"][domain]:eval_stats["accuracies"][domain].update({intent:{}})eval_stats["accuracies"][domain][intent]["entities"]={"overall":er_eval_test.get_stats()["stats_overall"][self.aggregate_statistic]}ifverbose:# To generate plots at a sub-entity level (B, I, O, E, S tags)fore,entityinenumerate(er_eval_test.get_stats()["class_labels"]):eval_stats["accuracies"][domain][intent]["entities"][entity]=er_eval_test.get_stats()["class_stats"][self.class_level_statistic][e]