"""
`Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting
<https://arxiv.org/abs/2106.13008>`_
"""
from typing import Any, Callable, Dict, Optional, Tuple
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, Dense, Dropout, LayerNormalization, ReLU
from tfts.layers.attention_layer import Attention, SelfAttention
from tfts.layers.autoformer_layer import AutoCorrelation, SeriesDecomp
from tfts.layers.dense_layer import FeedForwardNetwork
from tfts.layers.embed_layer import DataEmbedding
from ..layers.util_layer import ShapeLayer
from .base import BaseConfig, BaseModel
[docs]
class Encoder(tf.keras.layers.Layer):
"""Encoder for Autoformer architecture."""
def __init__(
self,
kernel_size: int,
hidden_size: int,
num_layers: int,
num_attention_heads: int,
attention_probs_dropout_prob: float,
ffn_intermediate_size: int,
hidden_dropout_prob: float,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.kernel_size = kernel_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.num_attention_heads = num_attention_heads
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.ffn_intermediate_size = ffn_intermediate_size
self.hidden_dropout_prob = hidden_dropout_prob
def build(self, input_shape):
super().build(input_shape)
self.layers = [
EncoderLayer(
kernel_size=self.kernel_size,
d_model=self.hidden_size,
num_attention_heads=self.num_attention_heads,
dropout_rate=self.hidden_dropout_prob,
)
for _ in range(self.num_layers)
]
self.norm = LayerNormalization()
self.norm.build(list(input_shape[:-1]) + [self.hidden_size])
self.built = True
[docs]
def call(self, x: tf.Tensor, mask: Optional[tf.Tensor] = None) -> tf.Tensor:
"""Process input through the encoder.
Args:
x: Input tensor of shape [batch_size, time_steps, features]
mask: Optional attention mask
Returns:
Processed tensor after applying encoder operations
"""
for layer in self.layers:
x = layer(x)
if self.norm is not None:
x = self.norm(x)
return x
[docs]
def get_config(self):
config = {
"kernel_size": self.kernel_size,
"hidden_size": self.hidden_size,
"num_layers": self.num_layers,
"num_attention_heads": self.num_attention_heads,
"attention_probs_dropout_prob": self.attention_probs_dropout_prob,
"ffn_intermediate_size": self.ffn_intermediate_size,
"hidden_dropout_prob": self.hidden_dropout_prob,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
batch_size, time_steps, _ = input_shape
return (batch_size, time_steps, self.hidden_size)
[docs]
class EncoderLayer(tf.keras.layers.Layer):
"""Encoder Layer for Autoformer architecture."""
def __init__(
self, kernel_size: int, d_model: int, num_attention_heads: int, dropout_rate: float = 0.1, **kwargs
) -> None:
super().__init__(**kwargs)
self.kernel_size = kernel_size
self.d_model = d_model
self.num_attention_heads = num_attention_heads
self.dropout_rate = dropout_rate
def build(self, input_shape: Tuple[Optional[int], ...]) -> None:
super().build(input_shape)
self.series_decomp1 = SeriesDecomp(self.kernel_size)
self.series_decomp2 = SeriesDecomp(self.kernel_size)
self.autocorrelation = AutoCorrelation(self.d_model, self.num_attention_heads)
self.drop = Dropout(self.dropout_rate)
self.dense = Dense(input_shape[-1])
self.norm1 = LayerNormalization()
self.norm2 = LayerNormalization()
self.built = True
[docs]
def call(self, x: tf.Tensor) -> tf.Tensor:
"""Process input through the encoder layer.
Args:
x: Input tensor of shape [batch_size, time_steps, features]
Returns:
Processed tensor after applying encoder operations
"""
# First sub-layer
residual = x
x = self.autocorrelation(x, x, x)
x = self.drop(x)
x = x + residual
x = self.norm1(x)
# Second sub-layer
residual = x
x = self.dense(x)
x = self.drop(x)
x = x + residual
x = self.norm2(x)
return x
[docs]
class Decoder(tf.keras.layers.Layer):
"""Decoder for Autoformer architecture."""
def __init__(
self,
kernel_size: int,
hidden_size: int,
num_layers: int,
num_attention_heads: int,
attention_probs_dropout_prob: float,
ffn_intermediate_size: int,
hidden_dropout_prob: float,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.kernel_size = kernel_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.num_attention_heads = num_attention_heads
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.ffn_intermediate_size = ffn_intermediate_size
self.hidden_dropout_prob = hidden_dropout_prob
def build(self, input_shape):
super().build(input_shape)
self.layers = [
DecoderLayer(
kernel_size=self.kernel_size,
d_model=self.hidden_size,
num_attention_heads=self.num_attention_heads,
drop_rate=self.hidden_dropout_prob,
)
for _ in range(self.num_layers)
]
self.norm = LayerNormalization()
self.norm.build(list(input_shape[:-1]) + [self.hidden_size])
self.built = True
[docs]
def call(
self,
x: tf.Tensor,
memory: tf.Tensor,
x_mask: Optional[tf.Tensor] = None,
memory_mask: Optional[tf.Tensor] = None,
) -> tf.Tensor:
"""Process input through the decoder.
Args:
x: Input tensor of shape [batch_size, time_steps, features]
memory: Memory tensor from encoder
x_mask: Optional attention mask for decoder input
memory_mask: Optional attention mask for encoder memory
Returns:
Processed tensor after applying decoder operations
"""
for layer in self.layers:
x = layer(x, memory)
if self.norm is not None:
x = self.norm(x)
return x
[docs]
def get_config(self):
config = {
"kernel_size": self.kernel_size,
"hidden_size": self.hidden_size,
"num_layers": self.num_layers,
"num_attention_heads": self.num_attention_heads,
"attention_probs_dropout_prob": self.attention_probs_dropout_prob,
"ffn_intermediate_size": self.ffn_intermediate_size,
"hidden_dropout_prob": self.hidden_dropout_prob,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
batch_size, time_steps, _ = input_shape
return (batch_size, time_steps, self.hidden_size)
[docs]
class DecoderLayer(tf.keras.layers.Layer):
"""Decoder Layer for Autoformer architecture."""
def __init__(
self, kernel_size: int, d_model: int, num_attention_heads: int, drop_rate: float = 0.1, **kwargs
) -> None:
super().__init__(**kwargs)
self.kernel_size = kernel_size
self.d_model = d_model
self.num_attention_heads = num_attention_heads
self.drop_rate = drop_rate
def build(self, input_shape: Tuple[Optional[int], ...]) -> None:
self.series_decomp1 = SeriesDecomp(self.kernel_size)
self.series_decomp2 = SeriesDecomp(self.kernel_size)
self.series_decomp3 = SeriesDecomp(self.kernel_size)
self.autocorrelation1 = AutoCorrelation(self.d_model, self.num_attention_heads)
self.autocorrelation2 = AutoCorrelation(self.d_model, self.num_attention_heads)
self.conv1 = Conv1D(self.d_model, kernel_size=3, strides=1, padding="same")
self.project = Conv1D(1, kernel_size=3, strides=1, padding="same")
self.drop = Dropout(self.drop_rate)
self.dense1 = Dense(input_shape[-1])
self.conv2 = Conv1D(input_shape[-1], kernel_size=3, strides=1, padding="same")
self.activation = ReLU()
self.norm1 = LayerNormalization()
self.norm2 = LayerNormalization()
self.norm3 = LayerNormalization()
super().build(input_shape)
[docs]
def call(self, x: tf.Tensor, memory: tf.Tensor) -> tf.Tensor:
"""Process input through the decoder layer.
Args:
x: Input tensor of shape [batch_size, time_steps, features]
memory: Memory tensor from encoder
Returns:
Processed tensor after applying decoder operations
"""
# Self-attention sub-layer
residual = x
x = self.autocorrelation1(x, x, x)
x = self.drop(x)
x = x + residual
x = self.norm1(x)
# Cross-attention sub-layer
residual = x
x = self.autocorrelation2(x, memory, memory)
x = self.drop(x)
x = x + residual
x = self.norm2(x)
# Feed-forward sub-layer
residual = x
x = self.conv1(x)
x = self.activation(x)
x = self.drop(x)
x = self.conv2(x)
x = x + residual
x = self.norm3(x)
return x