Source code for renn.utils

# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for optimization."""

import jax
from jax import flatten_util
import jax.numpy as jnp

import numpy as np
import tqdm

__all__ = [
    'batch_mean', 'norm', 'identity', 'fst', 'snd', 'optimize', 'one_hot',
    'compose'
]


[docs]def batch_mean(fun, in_axes): """Converts a function to a batched version (maps over multiple inputs). This takes a function that returns a scalar (such as a loss function) and returns a new function that maps the function over multiple arguments (such as over multiple random seeds) and returns the average of the results. It is useful for generating a batched version of a loss function, where the loss function has stochasticity that depends on a random seed argument. Args: fun: function, Function to batch. in_axes: tuple, Specifies the arguments to fun to batch over. For example, in_axes=(None, 0) would batch over the second argument. Returns: batch_fun: function, computes the average over a batch. """ mapped_fun = jax.vmap(fun, in_axes=in_axes) def batch_fun(*args): return jnp.mean(mapped_fun(*args)) return batch_fun
[docs]def norm(params, order=2): """Computes the (flattened) norm of a pytree.""" return jnp.linalg.norm(flatten_util.ravel_pytree(params)[0], ord=order)
[docs]def identity(x): """Identity function.""" return x
[docs]def fst(xs): """Returns the first element from a list.""" return xs[0]
[docs]def snd(xs): """Returns the second element from a list.""" return xs[1]
[docs]def compose(*funcs): """Returns a function that is the composition of multiple functions.""" def wrapper(x): for func in reversed(funcs): x = func(x) return x return wrapper
[docs]def optimize(loss_fun, x0, optimizer, steps, stop_tol=-np.inf): """Run an optimizer on a given loss function. Args: loss_fun: Scalar loss function to optimize. x0: Initial parameters. optimizer: An tuple of optimizer functions (init_opt, update_opt, get_params) from a jax.experimental.optimizers instance. steps: Iterator over steps. stop_tol: Stop if the loss is below this value (Default: -np.inf). Returns: loss_hist: Array of losses during training. final_params: Optimized parameters. """ # Initialize optimizer. init_opt, update_opt, get_params = optimizer opt_state = init_opt(x0) # Loss and gradient. value_and_grad = jax.value_and_grad(loss_fun) @jax.jit def step(k, state): params = get_params(state) loss, grads = value_and_grad(params) return loss, update_opt(k, grads, state) # Store loss history. loss_hist = [] for k in steps: f, opt_state = step(k, opt_state) loss_hist.append(f) if f <= stop_tol: break # Extract final parameters. final_params = get_params(opt_state) return np.array(loss_hist), final_params
[docs]def one_hot(labels, num_classes, dtype=jnp.float32): """Creates a one-hot encoding of an array of labels. Args: labels: array of integers with shape (num_examples,). num_classes: int, Total number of classes. dtype: optional, jax datatype for the return array (Default: float32). Returns: one_hot_labels: array with shape (num_examples, num_classes). """ return jnp.array(jnp.array(labels)[:, None] == jnp.arange(num_classes), dtype)
def select(sequences, indices): """Given an array of shape (number_of_sequences, sequence_length, element_dimension), and a 1D array specifying which indices of each sequence to select, return a (number_of_sequences, element_dimension)-shaped array with the selected elements. Args: sequences: array with shape (number_of_sequences, sequence_length, element_dimension) indices: 1D array with length number_of_sequence Returns: selected_elements: array with shape (number_of_sequences, element_dimension) """ assert len(indices) == sequences.shape[0] # shape indices properly indices_shaped = indices[:, jnp.newaxis, jnp.newaxis] # select element selected_elements = jnp.take_along_axis(sequences, indices_shaped, axis=1) # remove sequence dimension selected_elements = jnp.squeeze(selected_elements, axis=1) return selected_elements def make_loss_function(network_apply_fun, basic_loss_fun, regularization_fun): """ Given the network-function, the basic loss function, and a regularization function, return a loss function which maps a tuple of network parameters and a training batch to a loss value. Arguments: network_apply_fun - maps (network_params, batched_inputs) -> network_logits basic_loss_fun - maps (logits, batched_labels) -> scalar loss value regularization_fun - maps network_params -> scalar loss value Returns: total_loss_fun - maps (network_params, batch) -> scalar loss value """ def total_loss_fun(params, batch): """ Maps network parameters and training batch to a loss value. Args: batch: a dictionary with keys ['inputs', 'index', 'labels'] 'inputs': sequence of inputs with shape (batch_size, max_sequence_length) 'index' : 1d-array storing length of the corresponding input sequence 'labels': 1d-array storing label of corresponding input sequence Returns: loss: scalar loss averaged over batch """ all_time_logits = network_apply_fun(params, batch['inputs']) end_logits = select(all_time_logits, batch['index'] - 1) return basic_loss_fun(end_logits, batch['labels']) + regularization_fun(params) return total_loss_fun def make_acc_fun(network_apply_fun, num_outputs=1): """ Given a network function and number of outputs, returns an accuracy function """ if num_outputs == 1: prediction_function = lambda x: (x >= 0.).astype(jnp.int32) else: prediction_function = lambda x: x.argmax(axis=-1).astype(jnp.int32) @jax.jit def accuracy_fun(params, batch): all_time_logits = network_apply_fun(params, batch['inputs']) end_logits = select(all_time_logits, batch['index'] - 1) predictions = jnp.squeeze(prediction_function(end_logits)) accuracies = (batch['labels'] == predictions).astype(jnp.int32) return jnp.mean(accuracies) return accuracy_fun