Source code for mindmeld.components.intent_classifier

# -*- coding: utf-8 -*-
#
# Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains the intent classifier component of the MindMeld natural language processor.
"""
import logging

from ..constants import DEFAULT_TRAIN_SET_REGEX
from ..markup import mark_down
from ..models import CLASS_LABEL_TYPE, QUERY_EXAMPLE_TYPE
from ._config import get_classifier_config
from .classifier import Classifier

logger = logging.getLogger(__name__)


[docs]class IntentClassifier(Classifier):
    """An intent classifier is used to determine the target intent for a given query. It is trained
    using all of the labeled queries across all intents for a domain in an application. The
    labels for the training data are the intent names associated with each query.

    Attributes:
        domain (str): The domain that this intent classifier belongs to.
    """

    CLF_TYPE = "intent"
    """The classifier type."""

    def __init__(self, resource_loader, domain):
        """Initializes an intent classifier.

        Args:
            resource_loader (ResourceLoader): An object which can load resources for the \
                classifier.
            domain (str): The domain that this intent classifier belongs to.
        """
        super().__init__(resource_loader)
        self.domain = domain

    def _get_model_config(self, **kwargs):
        """Gets a machine learning model configuration

        Returns:
            ModelConfig: The model configuration corresponding to the provided config name
        """
        kwargs["example_type"] = QUERY_EXAMPLE_TYPE
        kwargs["label_type"] = CLASS_LABEL_TYPE
        loaded_config = get_classifier_config(
            self.CLF_TYPE, self._resource_loader.app_path, domain=self.domain
        )
        return super()._get_model_config(loaded_config, **kwargs)

[docs]    def fit(self, *args, **kwargs):  # pylint: disable=signature-differs
        """Trains the intent classification model using the provided training queries.

        Args:
            model_type (str): The type of machine learning model to use. If omitted, the default
                model type will be used.
            features (dict): Features to extract from each example instance to form the feature
                vector used for model training. If omitted, the default feature set for the model
                type will be used.
            params_grid (dict): The grid of hyper-parameters to search, for finding the optimal
                hyper-parameter settings for the model. If omitted, the default hyper-parameter
                search grid will be used.
            queries (list[ProcessedQuery]): The labeled queries to use as training data.
            cv (optional): Cross-validation settings.
        """
        logger.info("Fitting intent classifier: domain=%r", self.domain)
        return super().fit(*args, **kwargs)

[docs]    def dump(self, *args, **kwargs):  # pylint: disable=signature-differs
        """Persists the trained intent classification model to disk.

        Args:
            model_path (str): The location on disk where the model should be stored.
        """
        logger.info("Saving intent classifier: domain=%r", self.domain)
        super().dump(*args, **kwargs)

[docs]    def unload(self):
        logger.info("Unloading intent classifier: domain=%r", self.domain)
        super().unload()

[docs]    def load(self, *args, **kwargs):
        """Loads the trained intent classification model from disk.

        Args:
            model_path (str): The location on disk where the model is stored.
        """
        logger.info("Loading intent classifier: domain=%r", self.domain)
        super().load(*args, **kwargs)

[docs]    def inspect(self, query, intent=None, dynamic_resource=None):
        """Inspects the query.

        Args:
            query (Query): The query to be predicted.
            intent (str): The expected intent label for this query.
            dynamic_resource (dict, optional): A dynamic resource to aid NLP inference.

        Returns:
            (list of lists): 2D list that includes every feature, their value, weight and \
                probability.
        """
        return self._model.inspect(
            example=query, gold_label=intent, dynamic_resource=dynamic_resource
        )

    def _get_queries_from_label_set(self, label_set=DEFAULT_TRAIN_SET_REGEX):
        return self._resource_loader.get_flattened_label_set(
            domain=self.domain,
            label_set=label_set
        )

    def _get_examples_and_labels(self, queries):
        return (queries.queries(), queries.intents())

    def _get_examples_and_labels_hash(self, queries):
        raw_queries = []
        for intent, raw_query in zip(queries.intents(), queries.raw_queries()):
            raw_queries.append(
                self.domain + "###" + intent + "###" + mark_down(raw_query)
            )
        raw_queries.sort()
        return self._resource_loader.hash_list(raw_queries)