Source code for mrftools.Learner

"""Main learner class for log-linear model parameter learning. """
import copy

from .ConvexBeliefPropagator import ConvexBeliefPropagator
from .opt import *
from .MatrixBeliefPropagator import MatrixBeliefPropagator


[docs]class Learner(object):
    """
    Learner class for log-lienar model parameter learning. This class contains methods for calculating various 
    objective functions and gradients, and implements a subgradient optimization for the variational likelihood.
    """
    def __init__(self, inference_type=MatrixBeliefPropagator):
        """
        Initialize a learner by setting the inference method for computing variational likelihood approximations.
        
        :param inference_type: Inference class for computing feature expectations used in variational likelihood
        """
        self.label_expectations = None
        self.inferred_expectations = None
        self.inference_type = inference_type
        self.num_examples = 0
        self.models = []
        self.conditioned_models = []
        self.conditioned_belief_propagators = []
        self.belief_propagators = []
        self.l1_regularization = 0.00
        self.l2_regularization = 1
        self.weight_dim = None
        self.fully_observed = True
        self.initialization_flag = False
        self.loss_augmented = False
        self.inference_instantiator = None
        self.start_time = 0
        self.max_time = np.inf
        self.display = 'off'

[docs]    def set_regularization(self, l1, l2):
        """
        Set the regularization parameters.
        
        :param l1: l1 regularization parameter
        :param l2: l2 regularization parameter
        :return: 
        """
        self.l1_regularization = l1
        self.l2_regularization = l2

[docs]    def add_data(self, labels, model):
        """
        Add data example to training set. The states variable should be a dictionary containing all the states of the
         unary variables. 
        
        :param labels: dict containing true states of all labeled variables
        :param model: LogLinearModel object containing features for each pairwise and unary potential
        :return: 
        """
        self.models.append(model)
        if self.inference_instantiator:
            # if a custom inference instantiation function is provided, use that instead of default constructor
            bp = self.inference_instantiator(model)
        else:
            bp = self.inference_type(model)

        if self.loss_augmented:
            # if we are using augmented loss for max-margin learning, add loss-augmented potentials to inference
            for (var, state) in labels.items():
                bp.augment_loss(var, state)

        self.belief_propagators.append(bp)

        if self.weight_dim is None:
            self.weight_dim = model.weight_dim
        else:
            assert self.weight_dim == model.weight_dim, "Parameter dimensionality did not match"

        # create inference objects to extract the feature expectations based on either (1) reading the variable states
        # or (2) inferring the latent variables then reading the inferred expectations
        self.conditioned_models.append(model)

        if self.inference_instantiator:
            conditioned_bp = self.inference_instantiator(model)
        else:
            conditioned_bp = self.inference_type(model)

        for (var, state) in labels.items():
            conditioned_bp.condition(var, state)

        for var in model.variables:
            if var not in labels.keys():
                self.fully_observed = False

        self.conditioned_belief_propagators.append(conditioned_bp)

        self.num_examples += 1

    def _set_initialize_every_iter(self, flag):
        """
        Force learner to reinitialize inference objects before each objetive and gradient computation instead of warm
        starting from the previous iteration. Doing so will make learning slower, but may protect against getting
        stuck in a local optimum.
        
        :param flag: Boolean value of whether to initialize
        :return: None:
        """
        self.initialization_flag = flag

[docs]    def do_inference(self, belief_propagators):
        """
        Perform inference on all stored models.
        
        :param belief_propagators: iterable of inference objects
        :return: None
        """
        for bp in belief_propagators:
            if self.initialization_flag:
                bp.initialize_messages()
            bp.infer(display=self.display)

[docs]    def set_inference_truncation(self, bp_iter):
        """
        Set maximum number of iterations for inference. Useful for faster learning or inner-dual learning to stop
        inference before they run to convergence. 
        :param bp_iter: maximum iterations each belief propagator can run for each gradient/objective computation
        :return: NOne
        """
        for bp in self.belief_propagators + self.conditioned_belief_propagators:
            bp.set_max_iter(bp_iter)

[docs]    def get_feature_expectations(self, belief_propagators):
        """
        Run inference and return the marginal in vector form using the order of self.potentials.
        
        :param belief_propagators: iterable of inference objects to use to get feature expectations
        :return: vector of feature expectations
        """
        marginal_sum = 0
        for bp in belief_propagators:
            marginal_sum += np.true_divide(bp.get_feature_expectations(), len(bp.mn.variables))

        return marginal_sum / len(belief_propagators)

[docs]    def get_bethe_entropy(self, belief_propagators):
        """
        Compute the average Bethe entropy of all inference objects
        :param belief_propagators: iterable of inference objects
        :return: average Bethe entropy of all objectives
        """
        bethe = 0
        for bp in belief_propagators:
            bp.compute_beliefs()
            bp.compute_pairwise_beliefs()
            bethe += bp.compute_bethe_entropy()

        bethe = bethe / self.num_examples
        return bethe

[docs]    def subgrad_obj(self, weights, options=None, do_inference=True):
        """
        Compute the variational negative log likelihood. Performs inference on latent variables in the labeled 
        inference objects before calling the EM objective
        
        :param weights: Weight vector containing the same number of entries as all weights for this model
        :param do_inference: Boolean value indicating whether or not to run inference. Defaults to True.
        :return: objective value (float)
        """
        if self.label_expectations is None or not self.fully_observed:
            self.label_expectations = self.calculate_expectations(weights, self.conditioned_belief_propagators,
                                                                  do_inference)
        return self.objective(weights)

[docs]    def subgrad_grad(self, weights, options=None, do_inference=False):
        """
        Compute the gradient of the variational negative log likelihood. 
        
        :param weights: Weight vector containing the same number of entries as all weights for this model
        :param do_inference: Boolean value indicating whether or not to run inference. Defaults to False because 
                            typically the objective function was called immediately before, which does inference.
        :return: gradient with respect to weights
        """
        if self.label_expectations is None or not self.fully_observed:
            self.label_expectations = self.calculate_expectations(weights, self.conditioned_belief_propagators,
                                                                  do_inference)
        return self.gradient(weights)

[docs]    def learn(self, weights, optimizer=ada_grad, callback=None, opt_args=None):
        """
        Fit model parameters my maximizing the variational likelihood
        :param weights: Initial weight vector. Can be used to warm start from a previous solution.
        :param optimizer: gradient-based optimization function, as defined in opt.py
        :param callback: callback function run during each iteration of the optimizer. The function receives the 
                        weights as input. Can be useful for diagnostics, live plotting, storing records, etc.
        :param opt_args: optimization arguments. Usually a dictionary of parameter values
        :return: learned weights
        """
        self.start_time = time.time()
        res = optimizer(self.subgrad_obj, self.subgrad_grad, weights, opt_args, callback=callback)
        new_weights = res

        return new_weights

[docs]    def set_weights(self, weight_vector, belief_propagators):
        """
        Set weights of Markov net from vector using the order in self.potentials.
        :param weight_vector: weight vector containing weights for all potentials
        :param belief_propagators: iterable of belief propagators whose models should be updated with the weights
        :return: None
        """
        for bp in belief_propagators:
            bp.mn.set_weights(weight_vector)

[docs]    def calculate_expectations(self, weights, belief_propagators, should_infer=True):
        """
        Calculate the feature expectations given the provided model weights.
        
        :param weights: weight vector containing weights for all potentials
        :param belief_propagators: iterable of belief propagators whose models should be updated with the weights
        :param should_infer: Boolean value of whether to run inference. This value should usually only be False when
                            inference has already been run for this particular weight vector, i.e., if this function
                            is being called immediately after it has been called with the same weights.
        :return: feature expectation vector
        """
        self.set_weights(weights, belief_propagators)
        if should_infer:
            self.do_inference(belief_propagators)

        return self.get_feature_expectations(belief_propagators)

[docs]    def objective(self, weights, options=None):
        """
        Return the primal regularized negative variational log likelihood 
        :param weights: weight vector containing weights for all potentials
        :param options: Unused (for now) options for objective function
        :return: objective value
        """
        self.inferred_expectations = self.calculate_expectations(weights, self.belief_propagators, True)

        term_p = sum([np.true_divide(x.compute_energy_functional(), len(x.mn.variables)) for x in
                      self.belief_propagators]) / len(self.belief_propagators)

        if not self.fully_observed:
            # recompute energy functional for label distributions only in latent variable case
            self.set_weights(weights, self.conditioned_belief_propagators)
            term_q = sum([np.true_divide(x.compute_energy_functional(), len(x.mn.variables)) for x in
                          self.conditioned_belief_propagators]) / len(self.conditioned_belief_propagators)
        else:
            term_q = np.dot(self.label_expectations, weights)

        self.term_q_p = term_p - term_q

        objective = 0.0
        # add regularization penalties
        objective += self.l1_regularization * np.sum(np.abs(weights))
        objective += 0.5 * self.l2_regularization * weights.dot(weights)
        objective += self.term_q_p

        return objective

[docs]    def gradient(self, weights, options=None):
        """
        Return the gradient of the regularized negative variational log likelihood 
        :param weights: weight vector containing weights for all potentials
        :param options: Unused (for now) options for objective function
        :return: gradient vector
        """
        if self.start_time != 0 and time.time() - self.start_time > self.max_time:
            if self.display == 'full':
                print('more than %d seconds...' % self.max_time)
            grad = np.zeros(len(weights))
            return grad
        else:
            self.inferred_expectations = self.calculate_expectations(weights, self.belief_propagators, False)

            grad = np.zeros(len(weights))

            # add regularization penalties
            grad += self.l1_regularization * np.sign(weights)
            grad += self.l2_regularization * weights

            grad -= np.squeeze(self.label_expectations)
            grad += np.squeeze(self.inferred_expectations)

            return grad

[docs]    def dual_obj(self, weights, options=None):
        """
        Return the dual regularized negative variational log likelihood including Lagrangian penalty terms for 
        local inconsistencies of estimated marginals (i.e., beliefs)
        :param weights: weight vector containing weights for all potentials
        :param options: Unused (for now) options for objective function
        :return: dual objective value
        """
        if self.label_expectations is None or not self.fully_observed:
            self.label_expectations = self.calculate_expectations(weights, self.conditioned_belief_propagators, True)
        self.inferred_expectations = self.calculate_expectations(weights, self.belief_propagators, True)
        term_p = sum(
            [np.true_divide(x.compute_dual_objective(), len(x.mn.variables)) for x in self.belief_propagators]) / len(
            self.belief_propagators)
        if not self.fully_observed:
            # recompute energy functional for label distributions only in latent variable case
            self.set_weights(weights, self.conditioned_belief_propagators)
            term_q = sum([np.true_divide(x.compute_dual_objective(), len(x.mn.variables)) for x in
                          self.conditioned_belief_propagators]) / len(self.conditioned_belief_propagators)
        else:
            term_q = np.dot(self.label_expectations, weights)

        self.term_q_p = term_p - term_q

        objec = 0.0
        # add regularization penalties
        objec += self.l1_regularization * np.sum(np.abs(weights))
        objec += 0.5 * self.l2_regularization * weights.dot(weights)
        objec += self.term_q_p

        return objec
Source code for mrftools.Learner

mrftools

Navigation

Related Topics