Source code for mindmeld.components.intent_classifier

# -*- coding: utf-8 -*-
#
# Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains the intent classifier component of the MindMeld natural language processor.
"""
import logging

from ..constants import DEFAULT_TRAIN_SET_REGEX
from ..markup import mark_down
from ..models import CLASS_LABEL_TYPE, QUERY_EXAMPLE_TYPE
from ._config import get_classifier_config
from .classifier import Classifier

logger = logging.getLogger(__name__)


[docs]class IntentClassifier(Classifier): """An intent classifier is used to determine the target intent for a given query. It is trained using all of the labeled queries across all intents for a domain in an application. The labels for the training data are the intent names associated with each query. Attributes: domain (str): The domain that this intent classifier belongs to. """ CLF_TYPE = "intent" """The classifier type.""" def __init__(self, resource_loader, domain): """Initializes an intent classifier. Args: resource_loader (ResourceLoader): An object which can load resources for the \ classifier. domain (str): The domain that this intent classifier belongs to. """ super().__init__(resource_loader) self.domain = domain def _get_model_config(self, **kwargs): """Gets a machine learning model configuration Returns: ModelConfig: The model configuration corresponding to the provided config name """ kwargs["example_type"] = QUERY_EXAMPLE_TYPE kwargs["label_type"] = CLASS_LABEL_TYPE loaded_config = get_classifier_config( self.CLF_TYPE, self._resource_loader.app_path, domain=self.domain ) return super()._get_model_config(loaded_config, **kwargs)
[docs] def fit(self, *args, **kwargs): # pylint: disable=signature-differs """Trains the intent classification model using the provided training queries. Args: model_type (str): The type of machine learning model to use. If omitted, the default model type will be used. features (dict): Features to extract from each example instance to form the feature vector used for model training. If omitted, the default feature set for the model type will be used. params_grid (dict): The grid of hyper-parameters to search, for finding the optimal hyper-parameter settings for the model. If omitted, the default hyper-parameter search grid will be used. queries (list[ProcessedQuery]): The labeled queries to use as training data. cv (optional): Cross-validation settings. """ logger.info("Fitting intent classifier: domain=%r", self.domain) return super().fit(*args, **kwargs)
[docs] def dump(self, *args, **kwargs): # pylint: disable=signature-differs """Persists the trained intent classification model to disk. Args: model_path (str): The location on disk where the model should be stored. """ logger.info("Saving intent classifier: domain=%r", self.domain) super().dump(*args, **kwargs)
[docs] def unload(self): logger.info("Unloading intent classifier: domain=%r", self.domain) super().unload()
[docs] def load(self, *args, **kwargs): """Loads the trained intent classification model from disk. Args: model_path (str): The location on disk where the model is stored. """ logger.info("Loading intent classifier: domain=%r", self.domain) super().load(*args, **kwargs)
[docs] def inspect(self, query, intent=None, dynamic_resource=None): """Inspects the query. Args: query (Query): The query to be predicted. intent (str): The expected intent label for this query. dynamic_resource (dict, optional): A dynamic resource to aid NLP inference. Returns: (list of lists): 2D list that includes every feature, their value, weight and \ probability. """ return self._model.inspect( example=query, gold_label=intent, dynamic_resource=dynamic_resource )
def _get_queries_from_label_set(self, label_set=DEFAULT_TRAIN_SET_REGEX): return self._resource_loader.get_flattened_label_set( domain=self.domain, label_set=label_set ) def _get_examples_and_labels(self, queries): return (queries.queries(), queries.intents()) def _get_examples_and_labels_hash(self, queries): raw_queries = [] for intent, raw_query in zip(queries.intents(), queries.raw_queries()): raw_queries.append( self.domain + "###" + intent + "###" + mark_down(raw_query) ) raw_queries.sort() return self._resource_loader.hash_list(raw_queries)