Source code for renn.rnn.network

# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Recurrent neural network (RNN) helper functions."""

import functools

import jax
from jax.experimental import stax
import jax.numpy as jnp
import numpy as np

from . import cells
from . import unroll

__all__ = ['build_rnn', 'mse', 'eigsorted', 'timescale']


[docs]def build_rnn(num_tokens, emb_size, cell, num_outputs=1): """Builds an end-to-end recurrent neural network (RNN) model. Args: num_tokens: int, Number of different input tokens. emb_size: int, Dimensionality of the embedding vectors. cell: RNNCell to use as the core update function (see cells.py). num_outputs: int, Number of outputs from the readout (Default: 1). Returns: init_fun: function that takes a PRNGkey and input_shape and returns expected shapes and initialized embedding, RNN, and readout parameters. apply_fun: function that takes a tuple of network parameters and batch of input tokens and applies the RNN to each sequence in the batch. emb_apply: function to just apply the embedding. readout_apply: function to just apply the readout. """ emb_init, emb_apply = cells.embedding(num_tokens, emb_size) readout_init, readout_apply = stax.Dense(num_outputs) def init_fun(key, input_shape): """Initialize the components of the RNN. Args: key: Jax PRNGkey used to initialize the parameters. input_shape: tuple representing the input shape, should be (batch_size, sequence_length). Returns: shapes: set of tuples representing the shapes after applying the Embedding, RNN Cell, and Readout layers. network_params: tuple of network parameters, containing the embedding, RNN cell, and readout parameters. """ emb_key, cell_key, readout_key = jax.random.split(key, 3) # Initialize the Embedding for the input tokens. emb_shape, emb_params = emb_init(emb_key, input_shape) # The cell is defined for a single update step, which is why we ignore # the sequence dimension (emb_shape[1]) here. rnn_shape, rnn_params = cell.init(cell_key, (emb_shape[0], emb_shape[2])) output_shape, readout_params = readout_init(readout_key, rnn_shape) shapes = (emb_shape, rnn_shape, output_shape) network_params = (emb_params, rnn_params, readout_params) return shapes, network_params def apply_fun(network_params, tokens): """Applies the RNN on a batch of input sequences. Args: network_params: tuple of network parameters (see init_fun). tokens: batch of inputs, with shape (batch_size, sequence_length). Returns: outputs: network outputs, at every step along the sequence. """ emb_params, rnn_params, readout_params = network_params # Apply the embedding. inputs = emb_apply(emb_params, tokens) # Run the RNN. initial_states = cell.get_initial_state(rnn_params, batch_size=tokens.shape[0]) return unroll.unroll_rnn(initial_states, inputs, functools.partial(cell.batch_apply, rnn_params), functools.partial(readout_apply, readout_params)) return init_fun, apply_fun, emb_apply, readout_apply
[docs]def mse(y, yhat): """Mean squared error loss.""" return 0.5 * jnp.mean((y - yhat)**2)
[docs]def eigsorted(jac): """Computes sorted eigenvalues and corresponding eigenvectors of a matrix. Notes: The eigenvectors are stored in the columns of the returned matrices. The right and left eigenvectors are returned, such that: J=REL^T Args: jac: numpy array used to compute the eigendecomposition (must be square). Returns: rights: right eigenvectors, as columns in the returned array. eigvals: numpy array of eigenvalues. lefts: left eigenvectors, as columns in the returned array. """ unsorted_eigvals, unsorted_rights = np.linalg.eig(jac) sorted_indices = np.flipud(np.argsort(np.abs(unsorted_eigvals))) eigenvalues = unsorted_eigvals[sorted_indices] rights = unsorted_rights[:, sorted_indices] lefts = np.linalg.pinv(rights).T return rights, eigenvalues, lefts
[docs]def timescale(eigenvalues): """Converts eigenvalues into approximate time constants.""" return -1. / np.log(np.abs(eigenvalues))